LCOV - code coverage report
Current view: top level - uvm - uvm_fault.c (source / functions) Hit Total Coverage
Test: 6.4 Lines: 23 503 4.6 %
Date: 2018-10-19 03:25:38 Functions: 0 13 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*      $OpenBSD: uvm_fault.c,v 1.93 2018/04/12 17:13:44 deraadt Exp $  */
       2             : /*      $NetBSD: uvm_fault.c,v 1.51 2000/08/06 00:22:53 thorpej Exp $   */
       3             : 
       4             : /*
       5             :  * Copyright (c) 1997 Charles D. Cranor and Washington University.
       6             :  * All rights reserved.
       7             :  *
       8             :  * Redistribution and use in source and binary forms, with or without
       9             :  * modification, are permitted provided that the following conditions
      10             :  * are met:
      11             :  * 1. Redistributions of source code must retain the above copyright
      12             :  *    notice, this list of conditions and the following disclaimer.
      13             :  * 2. Redistributions in binary form must reproduce the above copyright
      14             :  *    notice, this list of conditions and the following disclaimer in the
      15             :  *    documentation and/or other materials provided with the distribution.
      16             :  *
      17             :  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
      18             :  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
      19             :  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
      20             :  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
      21             :  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
      22             :  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
      23             :  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
      24             :  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
      25             :  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
      26             :  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      27             :  *
      28             :  * from: Id: uvm_fault.c,v 1.1.2.23 1998/02/06 05:29:05 chs Exp
      29             :  */
      30             : 
      31             : /*
      32             :  * uvm_fault.c: fault handler
      33             :  */
      34             : 
      35             : #include <sys/param.h>
      36             : #include <sys/systm.h>
      37             : #include <sys/kernel.h>
      38             : #include <sys/proc.h>
      39             : #include <sys/malloc.h>
      40             : #include <sys/mman.h>
      41             : 
      42             : #include <uvm/uvm.h>
      43             : 
      44             : /*
      45             :  *
      46             :  * a word on page faults:
      47             :  *
      48             :  * types of page faults we handle:
      49             :  *
      50             :  * CASE 1: upper layer faults                   CASE 2: lower layer faults
      51             :  *
      52             :  *    CASE 1A         CASE 1B                  CASE 2A        CASE 2B
      53             :  *    read/write1     write>1                  read/write   +-cow_write/zero
      54             :  *         |             |                         |        |        
      55             :  *      +--|--+       +--|--+     +-----+       +  |  +     | +-----+
      56             :  * amap |  V  |       |  ----------->new|          |        | |  ^  |
      57             :  *      +-----+       +-----+     +-----+       +  |  +     | +--|--+
      58             :  *                                                 |        |    |
      59             :  *      +-----+       +-----+                   +--|--+     | +--|--+
      60             :  * uobj | d/c |       | d/c |                   |  V  |     +----|  |
      61             :  *      +-----+       +-----+                   +-----+       +-----+
      62             :  *
      63             :  * d/c = don't care
      64             :  * 
      65             :  *   case [0]: layerless fault
      66             :  *      no amap or uobj is present.   this is an error.
      67             :  *
      68             :  *   case [1]: upper layer fault [anon active]
      69             :  *     1A: [read] or [write with anon->an_ref == 1]
      70             :  *              I/O takes place in top level anon and uobj is not touched.
      71             :  *     1B: [write with anon->an_ref > 1]
      72             :  *              new anon is alloc'd and data is copied off ["COW"]
      73             :  *
      74             :  *   case [2]: lower layer fault [uobj]
      75             :  *     2A: [read on non-NULL uobj] or [write to non-copy_on_write area]
      76             :  *              I/O takes place directly in object.
      77             :  *     2B: [write to copy_on_write] or [read on NULL uobj]
      78             :  *              data is "promoted" from uobj to a new anon.   
      79             :  *              if uobj is null, then we zero fill.
      80             :  *
      81             :  * we follow the standard UVM locking protocol ordering:
      82             :  *
      83             :  * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ) 
      84             :  * we hold a PG_BUSY page if we unlock for I/O
      85             :  *
      86             :  *
      87             :  * the code is structured as follows:
      88             :  *  
      89             :  *     - init the "IN" params in the ufi structure
      90             :  *   ReFault:
      91             :  *     - do lookups [locks maps], check protection, handle needs_copy
      92             :  *     - check for case 0 fault (error)
      93             :  *     - establish "range" of fault
      94             :  *     - if we have an amap lock it and extract the anons
      95             :  *     - if sequential advice deactivate pages behind us
      96             :  *     - at the same time check pmap for unmapped areas and anon for pages
      97             :  *       that we could map in (and do map it if found)
      98             :  *     - check object for resident pages that we could map in
      99             :  *     - if (case 2) goto Case2
     100             :  *     - >>> handle case 1
     101             :  *           - ensure source anon is resident in RAM
     102             :  *           - if case 1B alloc new anon and copy from source
     103             :  *           - map the correct page in
     104             :  *   Case2:
     105             :  *     - >>> handle case 2
     106             :  *           - ensure source page is resident (if uobj)
     107             :  *           - if case 2B alloc new anon and copy from source (could be zero
     108             :  *              fill if uobj == NULL)
     109             :  *           - map the correct page in
     110             :  *     - done!
     111             :  *
     112             :  * note on paging:
     113             :  *   if we have to do I/O we place a PG_BUSY page in the correct object,
     114             :  * unlock everything, and do the I/O.   when I/O is done we must reverify
     115             :  * the state of the world before assuming that our data structures are
     116             :  * valid.   [because mappings could change while the map is unlocked]
     117             :  *
     118             :  *  alternative 1: unbusy the page in question and restart the page fault
     119             :  *    from the top (ReFault).   this is easy but does not take advantage
     120             :  *    of the information that we already have from our previous lookup, 
     121             :  *    although it is possible that the "hints" in the vm_map will help here.
     122             :  *
     123             :  * alternative 2: the system already keeps track of a "version" number of
     124             :  *    a map.   [i.e. every time you write-lock a map (e.g. to change a
     125             :  *    mapping) you bump the version number up by one...]   so, we can save
     126             :  *    the version number of the map before we release the lock and start I/O.
     127             :  *    then when I/O is done we can relock and check the version numbers
     128             :  *    to see if anything changed.    this might save us some over 1 because
     129             :  *    we don't have to unbusy the page and may be less compares(?).
     130             :  *
     131             :  * alternative 3: put in backpointers or a way to "hold" part of a map
     132             :  *    in place while I/O is in progress.   this could be complex to
     133             :  *    implement (especially with structures like amap that can be referenced
     134             :  *    by multiple map entries, and figuring out what should wait could be
     135             :  *    complex as well...).
     136             :  *
     137             :  * given that we are not currently multiprocessor or multithreaded we might
     138             :  * as well choose alternative 2 now.   maybe alternative 3 would be useful
     139             :  * in the future.    XXX keep in mind for future consideration//rechecking.
     140             :  */
     141             : 
     142             : /*
     143             :  * local data structures
     144             :  */
     145             : struct uvm_advice {
     146             :         int nback;
     147             :         int nforw;
     148             : };
     149             : 
     150             : /*
     151             :  * page range array: set up in uvmfault_init().
     152             :  */
     153             : static struct uvm_advice uvmadvice[MADV_MASK + 1];
     154             : 
     155             : #define UVM_MAXRANGE 16 /* must be max() of nback+nforw+1 */
     156             : 
     157             : /*
     158             :  * private prototypes
     159             :  */
     160             : static void uvmfault_amapcopy(struct uvm_faultinfo *);
     161             : static __inline void uvmfault_anonflush(struct vm_anon **, int);
     162             : void    uvmfault_unlockmaps(struct uvm_faultinfo *, boolean_t);
     163             : void    uvmfault_update_stats(struct uvm_faultinfo *);
     164             : 
     165             : /*
     166             :  * inline functions
     167             :  */
     168             : /*
     169             :  * uvmfault_anonflush: try and deactivate pages in specified anons
     170             :  *
     171             :  * => does not have to deactivate page if it is busy
     172             :  */
     173             : static __inline void
     174           0 : uvmfault_anonflush(struct vm_anon **anons, int n)
     175             : {
     176             :         int lcv;
     177             :         struct vm_page *pg;
     178             :         
     179           0 :         for (lcv = 0 ; lcv < n ; lcv++) {
     180           0 :                 if (anons[lcv] == NULL)
     181             :                         continue;
     182           0 :                 pg = anons[lcv]->an_page;
     183           0 :                 if (pg && (pg->pg_flags & PG_BUSY) == 0) {
     184           0 :                         uvm_lock_pageq();
     185           0 :                         if (pg->wire_count == 0) {
     186           0 :                                 pmap_page_protect(pg, PROT_NONE);
     187           0 :                                 uvm_pagedeactivate(pg);
     188           0 :                         }
     189           0 :                         uvm_unlock_pageq();
     190           0 :                 }
     191             :         }
     192           0 : }
     193             : 
     194             : /*
     195             :  * normal functions
     196             :  */
     197             : /*
     198             :  * uvmfault_init: compute proper values for the uvmadvice[] array.
     199             :  */
     200             : void
     201           0 : uvmfault_init(void)
     202             : {
     203             :         int npages;
     204             : 
     205             :         npages = atop(16384);
     206           0 :         if (npages > 0) {
     207           0 :                 KASSERT(npages <= UVM_MAXRANGE / 2);
     208           0 :                 uvmadvice[MADV_NORMAL].nforw = npages;
     209           0 :                 uvmadvice[MADV_NORMAL].nback = npages - 1;
     210           0 :         }
     211             : 
     212             :         npages = atop(32768);
     213           0 :         if (npages > 0) {
     214           0 :                 KASSERT(npages <= UVM_MAXRANGE / 2);
     215           0 :                 uvmadvice[MADV_SEQUENTIAL].nforw = npages - 1;
     216           0 :                 uvmadvice[MADV_SEQUENTIAL].nback = npages;
     217           0 :         }
     218           0 : }
     219             : 
     220             : /*
     221             :  * uvmfault_amapcopy: clear "needs_copy" in a map.
     222             :  *
     223             :  * => if we are out of RAM we sleep (waiting for more)
     224             :  */
     225             : static void
     226           0 : uvmfault_amapcopy(struct uvm_faultinfo *ufi)
     227             : {
     228             : 
     229             :         /* while we haven't done the job */
     230           0 :         while (1) {
     231             :                 /* no mapping?  give up. */
     232           0 :                 if (uvmfault_lookup(ufi, TRUE) == FALSE)
     233             :                         return;
     234             : 
     235             :                 /* copy if needed. */
     236           0 :                 if (UVM_ET_ISNEEDSCOPY(ufi->entry))
     237           0 :                         amap_copy(ufi->map, ufi->entry, M_NOWAIT,
     238           0 :                                 UVM_ET_ISSTACK(ufi->entry) ? FALSE : TRUE,
     239           0 :                                 ufi->orig_rvaddr, ufi->orig_rvaddr + 1);
     240             : 
     241             :                 /* didn't work?  must be out of RAM.  sleep. */
     242           0 :                 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) {
     243             :                         uvmfault_unlockmaps(ufi, TRUE);
     244           0 :                         uvm_wait("fltamapcopy");
     245           0 :                         continue;
     246             :                 }
     247             : 
     248             :                 /* got it! */
     249             :                 uvmfault_unlockmaps(ufi, TRUE);
     250             :                 return;
     251             :         }
     252             :         /*NOTREACHED*/
     253           0 : }
     254             : 
     255             : /*
     256             :  * uvmfault_anonget: get data in an anon into a non-busy, non-released
     257             :  * page in that anon.
     258             :  *
     259             :  * => we don't move the page on the queues [gets moved later]
     260             :  * => if we allocate a new page [we_own], it gets put on the queues.
     261             :  *    either way, the result is that the page is on the queues at return time
     262             :  */
     263             : int
     264           0 : uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap,
     265             :     struct vm_anon *anon)
     266             : {
     267             :         boolean_t we_own;       /* we own anon's page? */
     268             :         boolean_t locked;       /* did we relock? */
     269             :         struct vm_page *pg;
     270             :         int result;
     271             : 
     272             :         result = 0;             /* XXX shut up gcc */
     273          60 :         uvmexp.fltanget++;
     274             :         /* bump rusage counters */
     275           0 :         if (anon->an_page)
     276           0 :                 curproc->p_ru.ru_minflt++;
     277             :         else
     278           0 :                 curproc->p_ru.ru_majflt++;
     279             : 
     280             :         /* loop until we get it, or fail. */
     281           0 :         while (1) {
     282             :                 we_own = FALSE;         /* TRUE if we set PG_BUSY on a page */
     283           0 :                 pg = anon->an_page;
     284             : 
     285             :                 /* page there?   make sure it is not busy/released. */
     286           0 :                 if (pg) {
     287           0 :                         KASSERT(pg->pg_flags & PQ_ANON);
     288           0 :                         KASSERT(pg->uanon == anon);
     289             :                         
     290             :                         /*
     291             :                          * if the page is busy, we drop all the locks and
     292             :                          * try again.
     293             :                          */
     294          60 :                         if ((pg->pg_flags & (PG_BUSY|PG_RELEASED)) == 0)
     295           0 :                                 return (VM_PAGER_OK);
     296           0 :                         atomic_setbits_int(&pg->pg_flags, PG_WANTED);
     297           0 :                         uvmexp.fltpgwait++;
     298             : 
     299             :                         /*
     300             :                          * the last unlock must be an atomic unlock+wait on
     301             :                          * the owner of page
     302             :                          */
     303           0 :                         uvmfault_unlockall(ufi, amap, NULL, NULL);
     304           0 :                         UVM_WAIT(pg, 0, "anonget2", 0);
     305             :                         /* ready to relock and try again */
     306           0 :                 } else {
     307             :                         /* no page, we must try and bring it in. */
     308           0 :                         pg = uvm_pagealloc(NULL, 0, anon, 0);
     309             : 
     310           0 :                         if (pg == NULL) {               /* out of RAM.  */
     311           0 :                                 uvmfault_unlockall(ufi, amap, NULL, anon);
     312           0 :                                 uvmexp.fltnoram++;
     313           0 :                                 uvm_wait("flt_noram1");
     314             :                                 /* ready to relock and try again */
     315           0 :                         } else {
     316             :                                 /* we set the PG_BUSY bit */
     317             :                                 we_own = TRUE;  
     318           0 :                                 uvmfault_unlockall(ufi, amap, NULL, anon);
     319             : 
     320             :                                 /*
     321             :                                  * we are passing a PG_BUSY+PG_FAKE+PG_CLEAN
     322             :                                  * page into the uvm_swap_get function with
     323             :                                  * all data structures unlocked.  note that
     324             :                                  * it is ok to read an_swslot here because
     325             :                                  * we hold PG_BUSY on the page.
     326             :                                  */
     327           0 :                                 uvmexp.pageins++;
     328           0 :                                 result = uvm_swap_get(pg, anon->an_swslot,
     329             :                                     PGO_SYNCIO);
     330             : 
     331             :                                 /*
     332             :                                  * we clean up after the i/o below in the
     333             :                                  * "we_own" case
     334             :                                  */
     335             :                                 /* ready to relock and try again */
     336             :                         }
     337             :                 }
     338             : 
     339             :                 /* now relock and try again */
     340           0 :                 locked = uvmfault_relock(ufi);
     341             : 
     342             :                 /*
     343             :                  * if we own the page (i.e. we set PG_BUSY), then we need
     344             :                  * to clean up after the I/O. there are three cases to
     345             :                  * consider:
     346             :                  *   [1] page released during I/O: free anon and ReFault.
     347             :                  *   [2] I/O not OK.   free the page and cause the fault 
     348             :                  *       to fail.
     349             :                  *   [3] I/O OK!   activate the page and sync with the
     350             :                  *       non-we_own case (i.e. drop anon lock if not locked).
     351             :                  */
     352           0 :                 if (we_own) {
     353           0 :                         if (pg->pg_flags & PG_WANTED) {
     354           0 :                                 wakeup(pg);     
     355           0 :                         }
     356             :                         /* un-busy! */
     357           0 :                         atomic_clearbits_int(&pg->pg_flags,
     358             :                             PG_WANTED|PG_BUSY|PG_FAKE);
     359             :                         UVM_PAGE_OWN(pg, NULL);
     360             : 
     361             :                         /* 
     362             :                          * if we were RELEASED during I/O, then our anon is
     363             :                          * no longer part of an amap.   we need to free the
     364             :                          * anon and try again.
     365             :                          */
     366           0 :                         if (pg->pg_flags & PG_RELEASED) {
     367           0 :                                 pmap_page_protect(pg, PROT_NONE);
     368           0 :                                 uvm_anfree(anon);       /* frees page for us */
     369           0 :                                 if (locked)
     370           0 :                                         uvmfault_unlockall(ufi, amap, NULL,
     371             :                                                            NULL);
     372           0 :                                 uvmexp.fltpgrele++;
     373           0 :                                 return (VM_PAGER_REFAULT);      /* refault! */
     374             :                         }
     375             : 
     376           0 :                         if (result != VM_PAGER_OK) {
     377           0 :                                 KASSERT(result != VM_PAGER_PEND);
     378             : 
     379             :                                 /* remove page from anon */
     380           0 :                                 anon->an_page = NULL;
     381             : 
     382             :                                 /*
     383             :                                  * remove the swap slot from the anon
     384             :                                  * and mark the anon as having no real slot.
     385             :                                  * don't free the swap slot, thus preventing
     386             :                                  * it from being used again.
     387             :                                  */
     388           0 :                                 uvm_swap_markbad(anon->an_swslot, 1);
     389           0 :                                 anon->an_swslot = SWSLOT_BAD;
     390             : 
     391             :                                 /*
     392             :                                  * note: page was never !PG_BUSY, so it
     393             :                                  * can't be mapped and thus no need to
     394             :                                  * pmap_page_protect it...
     395             :                                  */
     396           0 :                                 uvm_lock_pageq();
     397           0 :                                 uvm_pagefree(pg);
     398           0 :                                 uvm_unlock_pageq();
     399             : 
     400           0 :                                 if (locked)
     401           0 :                                         uvmfault_unlockall(ufi, amap, NULL,
     402             :                                             anon);
     403           0 :                                 return (VM_PAGER_ERROR);
     404             :                         }
     405             :                         
     406             :                         /*
     407             :                          * must be OK, clear modify (already PG_CLEAN)
     408             :                          * and activate
     409             :                          */
     410           0 :                         pmap_clear_modify(pg);
     411           0 :                         uvm_lock_pageq();
     412           0 :                         uvm_pageactivate(pg);
     413           0 :                         uvm_unlock_pageq();
     414           0 :                 }
     415             : 
     416             :                 /* we were not able to relock.   restart fault. */
     417           0 :                 if (!locked)
     418           0 :                         return (VM_PAGER_REFAULT);
     419             : 
     420             :                 /* verify no one touched the amap and moved the anon on us. */
     421           0 :                 if (ufi != NULL &&
     422           0 :                     amap_lookup(&ufi->entry->aref, 
     423           0 :                                 ufi->orig_rvaddr - ufi->entry->start) != anon) {
     424             :                         
     425           0 :                         uvmfault_unlockall(ufi, amap, NULL, anon);
     426           0 :                         return (VM_PAGER_REFAULT);
     427             :                 }
     428             : 
     429             :                 /* try it again! */
     430           0 :                 uvmexp.fltanretry++;
     431           0 :                 continue;
     432             : 
     433             :         } /* while (1) */
     434             :         /*NOTREACHED*/
     435           0 : }
     436             : 
     437             : /*
     438             :  * Update statistics after fault resolution.
     439             :  * - maxrss
     440             :  */
     441             : void
     442           0 : uvmfault_update_stats(struct uvm_faultinfo *ufi)
     443             : {
     444             :         struct vm_map           *map;
     445             :         struct proc             *p;
     446             :         vsize_t                  res;
     447             : 
     448           0 :         map = ufi->orig_map;
     449             : 
     450             :         /*
     451             :          * If this is a nested pmap (eg, a virtual machine pmap managed
     452             :          * by vmm(4) on amd64/i386), don't do any updating, just return.
     453             :          *
     454             :          * pmap_nested() on other archs is #defined to 0, so this is a
     455             :          * no-op.
     456             :          */
     457           0 :         if (pmap_nested(map->pmap))
     458           0 :                 return;
     459             : 
     460             :         /* Update the maxrss for the process. */
     461          60 :         if (map->flags & VM_MAP_ISVMSPACE) {
     462           0 :                 p = curproc;
     463           0 :                 KASSERT(p != NULL && &p->p_vmspace->vm_map == map);
     464             : 
     465           0 :                 res = pmap_resident_count(map->pmap);
     466             :                 /* Convert res from pages to kilobytes. */
     467           0 :                 res <<= (PAGE_SHIFT - 10);
     468             : 
     469           0 :                 if (p->p_ru.ru_maxrss < res)
     470           0 :                         p->p_ru.ru_maxrss = res;
     471             :         }
     472           0 : }
     473             : 
     474             : /*
     475             :  *   F A U L T   -   m a i n   e n t r y   p o i n t
     476             :  */
     477             : 
     478             : /*
     479             :  * uvm_fault: page fault handler
     480             :  *
     481             :  * => called from MD code to resolve a page fault
     482             :  * => VM data structures usually should be unlocked.   however, it is 
     483             :  *      possible to call here with the main map locked if the caller
     484             :  *      gets a write lock, sets it recursive, and then calls us (c.f.
     485             :  *      uvm_map_pageable).   this should be avoided because it keeps
     486             :  *      the map locked off during I/O.
     487             :  */
     488             : #define MASK(entry)     (UVM_ET_ISCOPYONWRITE(entry) ? \
     489             :                          ~PROT_WRITE : PROT_MASK)
     490             : int
     491           0 : uvm_fault(vm_map_t orig_map, vaddr_t vaddr, vm_fault_t fault_type,
     492             :     vm_prot_t access_type)
     493             : {
     494           0 :         struct uvm_faultinfo ufi;
     495             :         vm_prot_t enter_prot;
     496             :         boolean_t wired, narrow, promote, locked, shadowed;
     497           0 :         int npages, nback, nforw, centeridx, result, lcv, gotpages, ret;
     498             :         vaddr_t startva, currva;
     499             :         voff_t uoff;
     500           0 :         paddr_t pa; 
     501             :         struct vm_amap *amap;
     502             :         struct uvm_object *uobj;
     503           0 :         struct vm_anon *anons_store[UVM_MAXRANGE], **anons, *anon, *oanon;
     504           0 :         struct vm_page *pages[UVM_MAXRANGE], *pg, *uobjpage;
     505             : 
     506             :         anon = NULL;
     507             :         pg = NULL;
     508             : 
     509          76 :         uvmexp.faults++;        /* XXX: locking? */
     510             : 
     511             :         /* init the IN parameters in the ufi */
     512           0 :         ufi.orig_map = orig_map;
     513           0 :         ufi.orig_rvaddr = trunc_page(vaddr);
     514           0 :         ufi.orig_size = PAGE_SIZE;      /* can't get any smaller than this */
     515           0 :         if (fault_type == VM_FAULT_WIRE)
     516           0 :                 narrow = TRUE;          /* don't look for neighborhood
     517             :                                          * pages on wire */
     518             :         else
     519             :                 narrow = FALSE;         /* normal fault */
     520             : 
     521             :         /* "goto ReFault" means restart the page fault from ground zero. */
     522             : ReFault:
     523             :         /* lookup and lock the maps */
     524           0 :         if (uvmfault_lookup(&ufi, FALSE) == FALSE) {
     525           0 :                 return (EFAULT);
     526             :         }
     527             : 
     528             : #ifdef DIAGNOSTIC
     529           0 :         if ((ufi.map->flags & VM_MAP_PAGEABLE) == 0)
     530           0 :                 panic("uvm_fault: fault on non-pageable map (%p, 0x%lx)",
     531             :                     ufi.map, vaddr);
     532             : #endif
     533             : 
     534             :         /* check protection */
     535           0 :         if ((ufi.entry->protection & access_type) != access_type) {
     536           0 :                 uvmfault_unlockmaps(&ufi, FALSE);
     537           0 :                 return (EACCES);
     538             :         }
     539             : 
     540             :         /*
     541             :          * "enter_prot" is the protection we want to enter the page in at.
     542             :          * for certain pages (e.g. copy-on-write pages) this protection can
     543             :          * be more strict than ufi.entry->protection.  "wired" means either
     544             :          * the entry is wired or we are fault-wiring the pg.
     545             :          */
     546             : 
     547             :         enter_prot = ufi.entry->protection;
     548           0 :         wired = VM_MAPENT_ISWIRED(ufi.entry) || (fault_type == VM_FAULT_WIRE);
     549           0 :         if (wired)
     550           0 :                 access_type = enter_prot; /* full access for wired */
     551             : 
     552             :         /* handle "needs_copy" case. */
     553           0 :         if (UVM_ET_ISNEEDSCOPY(ufi.entry)) {
     554           0 :                 if ((access_type & PROT_WRITE) ||
     555           0 :                     (ufi.entry->object.uvm_obj == NULL)) {
     556             :                         /* need to clear */
     557           0 :                         uvmfault_unlockmaps(&ufi, FALSE);
     558           0 :                         uvmfault_amapcopy(&ufi);
     559           0 :                         uvmexp.fltamcopy++;
     560           0 :                         goto ReFault;
     561             :                 } else {
     562             :                         /*
     563             :                          * ensure that we pmap_enter page R/O since
     564             :                          * needs_copy is still true
     565             :                          */
     566           2 :                         enter_prot &= ~PROT_WRITE; 
     567             :                 }
     568           0 :         }
     569             : 
     570             :         /* identify the players */
     571           0 :         amap = ufi.entry->aref.ar_amap;              /* top layer */
     572           0 :         uobj = ufi.entry->object.uvm_obj;    /* bottom layer */
     573             : 
     574             :         /*
     575             :          * check for a case 0 fault.  if nothing backing the entry then
     576             :          * error now.
     577             :          */
     578           0 :         if (amap == NULL && uobj == NULL) {
     579           0 :                 uvmfault_unlockmaps(&ufi, FALSE);
     580           0 :                 return (EFAULT);
     581             :         }
     582             : 
     583             :         /*
     584             :          * establish range of interest based on advice from mapper
     585             :          * and then clip to fit map entry.   note that we only want
     586             :          * to do this the first time through the fault.   if we 
     587             :          * ReFault we will disable this by setting "narrow" to true.
     588             :          */
     589          60 :         if (narrow == FALSE) {
     590             : 
     591             :                 /* wide fault (!narrow) */
     592          62 :                 nback = min(uvmadvice[ufi.entry->advice].nback,
     593           0 :                             (ufi.orig_rvaddr - ufi.entry->start) >> PAGE_SHIFT);
     594           0 :                 startva = ufi.orig_rvaddr - ((vsize_t)nback << PAGE_SHIFT);
     595           0 :                 nforw = min(uvmadvice[ufi.entry->advice].nforw,
     596           0 :                             ((ufi.entry->end - ufi.orig_rvaddr) >>
     597           0 :                              PAGE_SHIFT) - 1);
     598             :                 /*
     599             :                  * note: "-1" because we don't want to count the
     600             :                  * faulting page as forw
     601             :                  */
     602           0 :                 npages = nback + nforw + 1;
     603             :                 centeridx = nback;
     604             : 
     605             :                 narrow = TRUE;  /* ensure only once per-fault */
     606           0 :         } else {
     607             :                 /* narrow fault! */
     608             :                 nback = nforw = 0;
     609           0 :                 startva = ufi.orig_rvaddr;
     610             :                 npages = 1;
     611             :                 centeridx = 0;
     612             :         }
     613             : 
     614             :         /* if we've got an amap, extract current anons. */
     615           2 :         if (amap) {
     616           0 :                 anons = anons_store;
     617          60 :                 amap_lookups(&ufi.entry->aref, startva - ufi.entry->start,
     618             :                     anons, npages);
     619           0 :         } else {
     620             :                 anons = NULL;   /* to be safe */
     621             :         }
     622             : 
     623             :         /*
     624             :          * for MADV_SEQUENTIAL mappings we want to deactivate the back pages
     625             :          * now and then forget about them (for the rest of the fault).
     626             :          */
     627          62 :         if (ufi.entry->advice == MADV_SEQUENTIAL && nback != 0) {
     628             :                 /* flush back-page anons? */
     629           0 :                 if (amap) 
     630           0 :                         uvmfault_anonflush(anons, nback);
     631             : 
     632             :                 /* flush object? */
     633           0 :                 if (uobj) {
     634           0 :                         uoff = (startva - ufi.entry->start) + ufi.entry->offset;
     635           0 :                         (void) uobj->pgops->pgo_flush(uobj, uoff, uoff + 
     636           0 :                             ((vsize_t)nback << PAGE_SHIFT), PGO_DEACTIVATE);
     637           0 :                 }
     638             : 
     639             :                 /* now forget about the backpages */
     640           0 :                 if (amap)
     641           0 :                         anons += nback;
     642           0 :                 startva += ((vsize_t)nback << PAGE_SHIFT);
     643           0 :                 npages -= nback;
     644             :                 centeridx = 0;
     645           0 :         }
     646             : 
     647             :         /*
     648             :          * map in the backpages and frontpages we found in the amap in hopes
     649             :          * of preventing future faults.    we also init the pages[] array as
     650             :          * we go.
     651             :          */
     652             :         currva = startva;
     653             :         shadowed = FALSE;
     654          60 :         for (lcv = 0 ; lcv < npages ; lcv++, currva += PAGE_SIZE) {
     655             :                 /*
     656             :                  * dont play with VAs that are already mapped
     657             :                  * except for center)
     658             :                  */
     659           3 :                 if (lcv != centeridx &&
     660           0 :                     pmap_extract(ufi.orig_map->pmap, currva, &pa)) {
     661           2 :                         pages[lcv] = PGO_DONTCARE;
     662           0 :                         continue;
     663             :                 }
     664             : 
     665             :                 /* unmapped or center page.   check if any anon at this level. */
     666          61 :                 if (amap == NULL || anons[lcv] == NULL) {
     667           1 :                         pages[lcv] = NULL;
     668           0 :                         continue;
     669             :                 }
     670             : 
     671             :                 /* check for present page and map if possible.   re-activate it. */
     672           0 :                 pages[lcv] = PGO_DONTCARE;
     673          60 :                 if (lcv == centeridx) {         /* save center for later! */
     674             :                         shadowed = TRUE;
     675           0 :                         continue;
     676             :                 }
     677           0 :                 anon = anons[lcv];
     678           0 :                 if (anon->an_page &&
     679           0 :                     (anon->an_page->pg_flags & (PG_RELEASED|PG_BUSY)) == 0) {
     680           0 :                         uvm_lock_pageq();
     681           0 :                         uvm_pageactivate(anon->an_page);     /* reactivate */
     682           0 :                         uvm_unlock_pageq();
     683           0 :                         uvmexp.fltnamap++;
     684             : 
     685             :                         /*
     686             :                          * Since this isn't the page that's actually faulting,
     687             :                          * ignore pmap_enter() failures; it's not critical
     688             :                          * that we enter these right now.
     689             :                          */
     690           0 :                         (void) pmap_enter(ufi.orig_map->pmap, currva,
     691           0 :                             VM_PAGE_TO_PHYS(anon->an_page),
     692           0 :                             (anon->an_ref > 1) ? (enter_prot & ~PROT_WRITE) :
     693             :                             enter_prot,
     694           0 :                             PMAP_CANFAIL |
     695           0 :                              (VM_MAPENT_ISWIRED(ufi.entry) ? PMAP_WIRED : 0));
     696           0 :                 }
     697             :         }
     698             :         if (npages > 1)
     699             :                 pmap_update(ufi.orig_map->pmap);
     700             : 
     701             :         /* (shadowed == TRUE) if there is an anon at the faulting address */
     702             :         /*
     703             :          * note that if we are really short of RAM we could sleep in the above
     704             :          * call to pmap_enter.   bad?
     705             :          *
     706             :          * XXX Actually, that is bad; pmap_enter() should just fail in that
     707             :          * XXX case.  --thorpej
     708             :          */
     709             :         /*
     710             :          * if the desired page is not shadowed by the amap and we have a
     711             :          * backing object, then we check to see if the backing object would
     712             :          * prefer to handle the fault itself (rather than letting us do it
     713             :          * with the usual pgo_get hook).  the backing object signals this by
     714             :          * providing a pgo_fault routine.
     715             :          */
     716           0 :         if (uobj && shadowed == FALSE && uobj->pgops->pgo_fault != NULL) {
     717           0 :                 result = uobj->pgops->pgo_fault(&ufi, startva, pages, npages,
     718             :                                     centeridx, fault_type, access_type,
     719             :                                     PGO_LOCKED);
     720             : 
     721           0 :                 if (result == VM_PAGER_OK)
     722           0 :                         return (0);             /* pgo_fault did pmap enter */
     723           0 :                 else if (result == VM_PAGER_REFAULT)
     724           0 :                         goto ReFault;           /* try again! */
     725             :                 else
     726           0 :                         return (EACCES);
     727             :         }
     728             : 
     729             :         /*
     730             :          * now, if the desired page is not shadowed by the amap and we have
     731             :          * a backing object that does not have a special fault routine, then
     732             :          * we ask (with pgo_get) the object for resident pages that we care
     733             :          * about and attempt to map them in.  we do not let pgo_get block
     734             :          * (PGO_LOCKED).
     735             :          *
     736             :          * ("get" has the option of doing a pmap_enter for us)
     737             :          */
     738           0 :         if (uobj && shadowed == FALSE) {
     739           0 :                 uvmexp.fltlget++;
     740           0 :                 gotpages = npages;
     741           0 :                 (void) uobj->pgops->pgo_get(uobj, ufi.entry->offset +
     742           0 :                                 (startva - ufi.entry->start),
     743           0 :                                 pages, &gotpages, centeridx,
     744           0 :                                 access_type & MASK(ufi.entry),
     745           0 :                                 ufi.entry->advice, PGO_LOCKED);
     746             : 
     747             :                 /* check for pages to map, if we got any */
     748           0 :                 uobjpage = NULL;
     749           0 :                 if (gotpages) {
     750             :                         currva = startva;
     751           0 :                         for (lcv = 0 ; lcv < npages ;
     752           0 :                             lcv++, currva += PAGE_SIZE) {
     753           0 :                                 if (pages[lcv] == NULL ||
     754           0 :                                     pages[lcv] == PGO_DONTCARE)
     755             :                                         continue;
     756             : 
     757           0 :                                 KASSERT((pages[lcv]->pg_flags & PG_RELEASED) == 0);
     758             : 
     759             :                                 /*
     760             :                                  * if center page is resident and not
     761             :                                  * PG_BUSY, then pgo_get made it PG_BUSY
     762             :                                  * for us and gave us a handle to it.
     763             :                                  * remember this page as "uobjpage."
     764             :                                  * (for later use).
     765             :                                  */
     766           0 :                                 if (lcv == centeridx) {
     767           0 :                                         uobjpage = pages[lcv];
     768           0 :                                         continue;
     769             :                                 }
     770             :         
     771             :                                 /* 
     772             :                                  * note: calling pgo_get with locked data
     773             :                                  * structures returns us pages which are
     774             :                                  * neither busy nor released, so we don't
     775             :                                  * need to check for this.   we can just
     776             :                                  * directly enter the page (after moving it
     777             :                                  * to the head of the active queue [useful?]).
     778             :                                  */
     779             : 
     780           0 :                                 uvm_lock_pageq();
     781           0 :                                 uvm_pageactivate(pages[lcv]);   /* reactivate */
     782           0 :                                 uvm_unlock_pageq();
     783           0 :                                 uvmexp.fltnomap++;
     784             : 
     785             :                                 /*
     786             :                                  * Since this page isn't the page that's
     787             :                                  * actually faulting, ignore pmap_enter()
     788             :                                  * failures; it's not critical that we
     789             :                                  * enter these right now.
     790             :                                  */
     791           0 :                                 (void) pmap_enter(ufi.orig_map->pmap, currva,
     792           0 :                                     VM_PAGE_TO_PHYS(pages[lcv]),
     793           0 :                                     enter_prot & MASK(ufi.entry),
     794           0 :                                     PMAP_CANFAIL |
     795           0 :                                      (wired ? PMAP_WIRED : 0));
     796             : 
     797             :                                 /* 
     798             :                                  * NOTE: page can't be PG_WANTED because
     799             :                                  * we've held the lock the whole time
     800             :                                  * we've had the handle.
     801             :                                  */
     802           0 :                                 atomic_clearbits_int(&pages[lcv]->pg_flags,
     803             :                                     PG_BUSY);
     804             :                                 UVM_PAGE_OWN(pages[lcv], NULL);
     805           0 :                         }       /* for "lcv" loop */
     806             :                         pmap_update(ufi.orig_map->pmap);
     807             :                 }   /* "gotpages" != 0 */
     808             :                 /* note: object still _locked_ */
     809             :         } else {
     810          60 :                 uobjpage = NULL;
     811             :         }
     812             : 
     813             :         /*
     814             :          * note that at this point we are done with any front or back pages.
     815             :          * we are now going to focus on the center page (i.e. the one we've
     816             :          * faulted on).  if we have faulted on the top (anon) layer
     817             :          * [i.e. case 1], then the anon we want is anons[centeridx] (we have
     818             :          * not touched it yet).  if we have faulted on the bottom (uobj)
     819             :          * layer [i.e. case 2] and the page was both present and available,
     820             :          * then we've got a pointer to it as "uobjpage" and we've already
     821             :          * made it BUSY.
     822             :          */
     823             :         /*
     824             :          * there are four possible cases we must address: 1A, 1B, 2A, and 2B
     825             :          */
     826             :         /* redirect case 2: if we are not shadowed, go to case 2. */
     827           0 :         if (shadowed == FALSE) 
     828             :                 goto Case2;
     829             : 
     830             :         /* handle case 1: fault on an anon in our amap */
     831           0 :         anon = anons[centeridx];
     832             : 
     833             :         /*
     834             :          * no matter if we have case 1A or case 1B we are going to need to
     835             :          * have the anon's memory resident.   ensure that now.
     836             :          */
     837             :         /*
     838             :          * let uvmfault_anonget do the dirty work.
     839             :          * also, if it is OK, then the anon's page is on the queues.
     840             :          */
     841           0 :         result = uvmfault_anonget(&ufi, amap, anon);
     842           0 :         switch (result) {
     843             :         case VM_PAGER_OK:
     844             :                 break; 
     845             : 
     846             :         case VM_PAGER_REFAULT:
     847           0 :                 goto ReFault;
     848             : 
     849             :         case VM_PAGER_ERROR:
     850             :                 /*
     851             :                  * An error occured while trying to bring in the
     852             :                  * page -- this is the only error we return right
     853             :                  * now.
     854             :                  */
     855           0 :                 return (EACCES);        /* XXX */
     856             :         default:
     857             : #ifdef DIAGNOSTIC
     858           0 :                 panic("uvm_fault: uvmfault_anonget -> %d", result);
     859             : #else
     860             :                 return (EACCES);
     861             : #endif
     862             :         }
     863             : 
     864             :         /*
     865             :          * if we are case 1B then we will need to allocate a new blank
     866             :          * anon to transfer the data into.   note that we have a lock
     867             :          * on anon, so no one can busy or release the page until we are done.
     868             :          * also note that the ref count can't drop to zero here because
     869             :          * it is > 1 and we are only dropping one ref.
     870             :          *
     871             :          * in the (hopefully very rare) case that we are out of RAM we 
     872             :          * will wait for more RAM, and refault.    
     873             :          *
     874             :          * if we are out of anon VM we wait for RAM to become available.
     875             :          */
     876             : 
     877           0 :         if ((access_type & PROT_WRITE) != 0 && anon->an_ref > 1) {
     878           0 :                 uvmexp.flt_acow++;
     879             :                 oanon = anon;           /* oanon = old */
     880           0 :                 anon = uvm_analloc();
     881           0 :                 if (anon) {
     882           0 :                         pg = uvm_pagealloc(NULL, 0, anon, 0);
     883           0 :                 }
     884             : 
     885             :                 /* check for out of RAM */
     886           0 :                 if (anon == NULL || pg == NULL) {
     887           0 :                         uvmfault_unlockall(&ufi, amap, NULL, oanon);
     888           0 :                         KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
     889           0 :                         if (anon == NULL)
     890           0 :                                 uvmexp.fltnoanon++;
     891             :                         else {
     892           0 :                                 uvm_anfree(anon);
     893           0 :                                 uvmexp.fltnoram++;
     894             :                         }
     895             : 
     896           0 :                         if (uvmexp.swpgonly == uvmexp.swpages)
     897           0 :                                 return (ENOMEM);
     898             : 
     899             :                         /* out of RAM, wait for more */
     900           0 :                         if (anon == NULL)
     901           0 :                                 uvm_anwait();
     902             :                         else
     903           0 :                                 uvm_wait("flt_noram3");
     904           0 :                         goto ReFault;
     905             :                 }
     906             : 
     907             :                 /* got all resources, replace anon with nanon */
     908           0 :                 uvm_pagecopy(oanon->an_page, pg);    /* pg now !PG_CLEAN */
     909             :                 /* un-busy! new page */
     910           0 :                 atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_FAKE);
     911             :                 UVM_PAGE_OWN(pg, NULL);
     912           0 :                 ret = amap_add(&ufi.entry->aref,
     913           0 :                     ufi.orig_rvaddr - ufi.entry->start, anon, 1);
     914           0 :                 KASSERT(ret == 0);
     915             : 
     916             :                 /* deref: can not drop to zero here by defn! */
     917           0 :                 oanon->an_ref--;
     918             : 
     919             :                 /*
     920             :                  * note: anon is _not_ locked, but we have the sole references
     921             :                  * to in from amap.
     922             :                  * thus, no one can get at it until we are done with it.
     923             :                  */
     924           0 :         } else {
     925          60 :                 uvmexp.flt_anon++;
     926             :                 oanon = anon;
     927           0 :                 pg = anon->an_page;
     928           0 :                 if (anon->an_ref > 1)     /* disallow writes to ref > 1 anons */
     929           0 :                         enter_prot = enter_prot & ~PROT_WRITE;
     930             :         }
     931             : 
     932             :         /*
     933             :          * now map the page in ...
     934             :          * XXX: old fault unlocks object before pmap_enter.  this seems
     935             :          * suspect since some other thread could blast the page out from
     936             :          * under us between the unlock and the pmap_enter.
     937             :          */
     938           0 :         if (pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg),
     939           0 :             enter_prot, access_type | PMAP_CANFAIL | (wired ? PMAP_WIRED : 0))
     940           0 :             != 0) {
     941             :                 /*
     942             :                  * No need to undo what we did; we can simply think of
     943             :                  * this as the pmap throwing away the mapping information.
     944             :                  *
     945             :                  * We do, however, have to go through the ReFault path,
     946             :                  * as the map may change while we're asleep.
     947             :                  */
     948           0 :                 uvmfault_unlockall(&ufi, amap, NULL, oanon);
     949           0 :                 KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
     950           0 :                 if (uvmexp.swpgonly == uvmexp.swpages) {
     951             :                         /* XXX instrumentation */
     952           0 :                         return (ENOMEM);
     953             :                 }
     954             :                 /* XXX instrumentation */
     955           0 :                 uvm_wait("flt_pmfail1");
     956           0 :                 goto ReFault;
     957             :         }
     958             : 
     959             :         /* ... update the page queues. */
     960           0 :         uvm_lock_pageq();
     961             : 
     962           0 :         if (fault_type == VM_FAULT_WIRE) {
     963           0 :                 uvm_pagewire(pg);
     964             :                 /*
     965             :                  * since the now-wired page cannot be paged out,
     966             :                  * release its swap resources for others to use.
     967             :                  * since an anon with no swap cannot be PG_CLEAN,
     968             :                  * clear its clean flag now.
     969             :                  */
     970           0 :                 atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
     971           0 :                 uvm_anon_dropswap(anon);
     972           0 :         } else {
     973             :                 /* activate it */
     974          60 :                 uvm_pageactivate(pg);
     975             :         }
     976             : 
     977           0 :         uvm_unlock_pageq();
     978             : 
     979             :         /* done case 1!  finish up by unlocking everything and returning success */
     980           0 :         uvmfault_unlockall(&ufi, amap, NULL, oanon);
     981             :         pmap_update(ufi.orig_map->pmap);
     982           0 :         return (0);
     983             : 
     984             : 
     985             : Case2:
     986             :         /* handle case 2: faulting on backing object or zero fill */
     987             :         /*
     988             :          * note that uobjpage can not be PGO_DONTCARE at this point.  we now
     989             :          * set uobjpage to PGO_DONTCARE if we are doing a zero fill.  if we
     990             :          * have a backing object, check and see if we are going to promote
     991             :          * the data up to an anon during the fault.
     992             :          */
     993           0 :         if (uobj == NULL) {
     994           0 :                 uobjpage = PGO_DONTCARE;        
     995             :                 promote = TRUE;         /* always need anon here */
     996           0 :         } else {
     997           0 :                 KASSERT(uobjpage != PGO_DONTCARE);
     998           0 :                 promote = (access_type & PROT_WRITE) &&
     999           0 :                      UVM_ET_ISCOPYONWRITE(ufi.entry);
    1000             :         }
    1001             : 
    1002             :         /*
    1003             :          * if uobjpage is not null then we do not need to do I/O to get the
    1004             :          * uobjpage.
    1005             :          *
    1006             :          * if uobjpage is null, then we need to ask the pager to 
    1007             :          * get the data for us.   once we have the data, we need to reverify
    1008             :          * the state the world.   we are currently not holding any resources.
    1009             :          */
    1010           0 :         if (uobjpage) {
    1011             :                 /* update rusage counters */
    1012           0 :                 curproc->p_ru.ru_minflt++;
    1013           0 :         } else {
    1014             :                 /* update rusage counters */
    1015           0 :                 curproc->p_ru.ru_majflt++;
    1016             :                 
    1017           0 :                 uvmfault_unlockall(&ufi, amap, NULL, NULL);
    1018             : 
    1019           0 :                 uvmexp.fltget++;
    1020           0 :                 gotpages = 1;
    1021           0 :                 uoff = (ufi.orig_rvaddr - ufi.entry->start) + ufi.entry->offset;
    1022           0 :                 result = uobj->pgops->pgo_get(uobj, uoff, &uobjpage, &gotpages,
    1023           0 :                     0, access_type & MASK(ufi.entry), ufi.entry->advice,
    1024             :                     PGO_SYNCIO);
    1025             : 
    1026             :                 /* recover from I/O */
    1027           0 :                 if (result != VM_PAGER_OK) {
    1028           0 :                         KASSERT(result != VM_PAGER_PEND);
    1029             : 
    1030           0 :                         if (result == VM_PAGER_AGAIN) {
    1031           0 :                                 tsleep(&lbolt, PVM, "fltagain2", 0);
    1032           0 :                                 goto ReFault;
    1033             :                         }
    1034             : 
    1035           0 :                         if (!UVM_ET_ISNOFAULT(ufi.entry))
    1036           0 :                                 return (EIO);
    1037             : 
    1038           0 :                         uobjpage = PGO_DONTCARE;        
    1039             :                         promote = TRUE;
    1040           0 :                 }
    1041             : 
    1042             :                 /* re-verify the state of the world.  */
    1043           0 :                 locked = uvmfault_relock(&ufi);
    1044             :                 
    1045             :                 /*
    1046             :                  * Re-verify that amap slot is still free. if there is
    1047             :                  * a problem, we clean up.
    1048             :                  */
    1049           0 :                 if (locked && amap && amap_lookup(&ufi.entry->aref,
    1050           0 :                       ufi.orig_rvaddr - ufi.entry->start)) {
    1051           0 :                         if (locked) 
    1052           0 :                                 uvmfault_unlockall(&ufi, amap, NULL, NULL);
    1053             :                         locked = FALSE;
    1054           0 :                 }
    1055             : 
    1056             :                 /* didn't get the lock?   release the page and retry. */
    1057           0 :                 if (locked == FALSE && uobjpage != PGO_DONTCARE) {
    1058           0 :                         uvm_lock_pageq();
    1059             :                         /* make sure it is in queues */
    1060           0 :                         uvm_pageactivate(uobjpage);
    1061           0 :                         uvm_unlock_pageq();
    1062             : 
    1063           0 :                         if (uobjpage->pg_flags & PG_WANTED)
    1064             :                                 /* still holding object lock */
    1065           0 :                                 wakeup(uobjpage);
    1066           0 :                         atomic_clearbits_int(&uobjpage->pg_flags,
    1067             :                             PG_BUSY|PG_WANTED);
    1068             :                         UVM_PAGE_OWN(uobjpage, NULL);
    1069           0 :                         goto ReFault;
    1070             :                 }
    1071             : 
    1072             :                 /*
    1073             :                  * we have the data in uobjpage which is PG_BUSY
    1074             :                  */
    1075             :         }
    1076             : 
    1077             :         /*
    1078             :          * notes:
    1079             :          *  - at this point uobjpage can not be NULL
    1080             :          *  - at this point uobjpage could be PG_WANTED (handle later)
    1081             :          */
    1082           0 :         if (promote == FALSE) {
    1083             :                 /*
    1084             :                  * we are not promoting.   if the mapping is COW ensure that we
    1085             :                  * don't give more access than we should (e.g. when doing a read
    1086             :                  * fault on a COPYONWRITE mapping we want to map the COW page in
    1087             :                  * R/O even though the entry protection could be R/W).
    1088             :                  *
    1089             :                  * set "pg" to the page we want to map in (uobjpage, usually)
    1090             :                  */
    1091           0 :                 uvmexp.flt_obj++;
    1092           0 :                 if (UVM_ET_ISCOPYONWRITE(ufi.entry))
    1093           0 :                         enter_prot &= ~PROT_WRITE;
    1094           0 :                 pg = uobjpage;          /* map in the actual object */
    1095             : 
    1096             :                 /* assert(uobjpage != PGO_DONTCARE) */
    1097             : 
    1098             :                 /*
    1099             :                  * we are faulting directly on the page.
    1100             :                  */
    1101           0 :         } else {
    1102             :                 /*
    1103             :                  * if we are going to promote the data to an anon we
    1104             :                  * allocate a blank anon here and plug it into our amap.
    1105             :                  */
    1106             : #ifdef DIAGNOSTIC
    1107           0 :                 if (amap == NULL)
    1108           0 :                         panic("uvm_fault: want to promote data, but no anon");
    1109             : #endif
    1110             : 
    1111           0 :                 anon = uvm_analloc();
    1112           0 :                 if (anon) {
    1113             :                         /*
    1114             :                          * In `Fill in data...' below, if
    1115             :                          * uobjpage == PGO_DONTCARE, we want
    1116             :                          * a zero'd, dirty page, so have
    1117             :                          * uvm_pagealloc() do that for us.
    1118             :                          */
    1119           0 :                         pg = uvm_pagealloc(NULL, 0, anon,
    1120           0 :                             (uobjpage == PGO_DONTCARE) ? UVM_PGA_ZERO : 0);
    1121           0 :                 }
    1122             : 
    1123             :                 /*
    1124             :                  * out of memory resources?
    1125             :                  */
    1126           0 :                 if (anon == NULL || pg == NULL) {
    1127             :                         /* arg!  must unbusy our page and fail or sleep. */
    1128           0 :                         if (uobjpage != PGO_DONTCARE) {
    1129           0 :                                 uvm_lock_pageq();
    1130           0 :                                 uvm_pageactivate(uobjpage);
    1131           0 :                                 uvm_unlock_pageq();
    1132             : 
    1133           0 :                                 if (uobjpage->pg_flags & PG_WANTED)
    1134           0 :                                         wakeup(uobjpage);
    1135           0 :                                 atomic_clearbits_int(&uobjpage->pg_flags,
    1136             :                                     PG_BUSY|PG_WANTED);
    1137             :                                 UVM_PAGE_OWN(uobjpage, NULL);
    1138           0 :                         }
    1139             : 
    1140             :                         /* unlock and fail ... */
    1141           0 :                         uvmfault_unlockall(&ufi, amap, uobj, NULL);
    1142           0 :                         KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
    1143           0 :                         if (anon == NULL)
    1144           0 :                                 uvmexp.fltnoanon++;
    1145             :                         else {
    1146           0 :                                 uvm_anfree(anon);
    1147           0 :                                 uvmexp.fltnoram++;
    1148             :                         }
    1149             : 
    1150           0 :                         if (uvmexp.swpgonly == uvmexp.swpages)
    1151           0 :                                 return (ENOMEM);
    1152             : 
    1153             :                         /* out of RAM, wait for more */
    1154           0 :                         if (anon == NULL)
    1155           0 :                                 uvm_anwait();
    1156             :                         else
    1157           0 :                                 uvm_wait("flt_noram5");
    1158           0 :                         goto ReFault;
    1159             :                 }
    1160             : 
    1161             :                 /* fill in the data */
    1162           0 :                 if (uobjpage != PGO_DONTCARE) {
    1163           0 :                         uvmexp.flt_prcopy++;
    1164             :                         /* copy page [pg now dirty] */
    1165           0 :                         uvm_pagecopy(uobjpage, pg);
    1166             : 
    1167             :                         /*
    1168             :                          * promote to shared amap?  make sure all sharing
    1169             :                          * procs see it
    1170             :                          */
    1171           0 :                         if ((amap_flags(amap) & AMAP_SHARED) != 0) {
    1172           0 :                                 pmap_page_protect(uobjpage, PROT_NONE);
    1173           0 :                         }
    1174             :                         
    1175             :                         /* dispose of uobjpage. drop handle to uobj as well. */
    1176           0 :                         if (uobjpage->pg_flags & PG_WANTED)
    1177           0 :                                 wakeup(uobjpage);
    1178           0 :                         atomic_clearbits_int(&uobjpage->pg_flags,
    1179             :                             PG_BUSY|PG_WANTED);
    1180             :                         UVM_PAGE_OWN(uobjpage, NULL);
    1181           0 :                         uvm_lock_pageq();
    1182           0 :                         uvm_pageactivate(uobjpage);
    1183           0 :                         uvm_unlock_pageq();
    1184             :                         uobj = NULL;
    1185           0 :                 } else {
    1186           0 :                         uvmexp.flt_przero++;
    1187             :                         /*
    1188             :                          * Page is zero'd and marked dirty by uvm_pagealloc()
    1189             :                          * above.
    1190             :                          */
    1191             :                 }
    1192             : 
    1193           0 :                 if (amap_add(&ufi.entry->aref,
    1194           0 :                     ufi.orig_rvaddr - ufi.entry->start, anon, 0)) {
    1195           0 :                         uvmfault_unlockall(&ufi, amap, NULL, oanon);
    1196           0 :                         KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
    1197           0 :                         uvm_anfree(anon);
    1198           0 :                         uvmexp.fltnoamap++;
    1199             : 
    1200           0 :                         if (uvmexp.swpgonly == uvmexp.swpages)
    1201           0 :                                 return (ENOMEM);
    1202             : 
    1203           0 :                         amap_populate(&ufi.entry->aref,
    1204           0 :                             ufi.orig_rvaddr - ufi.entry->start);
    1205           0 :                         goto ReFault;
    1206             :                 }
    1207             :         }
    1208             : 
    1209             :         /* note: pg is either the uobjpage or the new page in the new anon */
    1210             :         /*
    1211             :          * all resources are present.   we can now map it in and free our
    1212             :          * resources.
    1213             :          */
    1214           0 :         if (pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg),
    1215           0 :             enter_prot, access_type | PMAP_CANFAIL | (wired ? PMAP_WIRED : 0))
    1216           0 :             != 0) {
    1217             :                 /*
    1218             :                  * No need to undo what we did; we can simply think of
    1219             :                  * this as the pmap throwing away the mapping information.
    1220             :                  *
    1221             :                  * We do, however, have to go through the ReFault path,
    1222             :                  * as the map may change while we're asleep.
    1223             :                  */
    1224           0 :                 if (pg->pg_flags & PG_WANTED)
    1225           0 :                         wakeup(pg);
    1226             : 
    1227           0 :                 atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_FAKE|PG_WANTED);
    1228             :                 UVM_PAGE_OWN(pg, NULL);
    1229           0 :                 uvmfault_unlockall(&ufi, amap, uobj, NULL);
    1230           0 :                 KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
    1231           0 :                 if (uvmexp.swpgonly == uvmexp.swpages) {
    1232             :                         /* XXX instrumentation */
    1233           0 :                         return (ENOMEM);
    1234             :                 }
    1235             :                 /* XXX instrumentation */
    1236           0 :                 uvm_wait("flt_pmfail2");
    1237           0 :                 goto ReFault;
    1238             :         }
    1239             : 
    1240           0 :         uvm_lock_pageq();
    1241             : 
    1242           0 :         if (fault_type == VM_FAULT_WIRE) {
    1243           0 :                 uvm_pagewire(pg);
    1244           0 :                 if (pg->pg_flags & PQ_AOBJ) {
    1245             :                         /*
    1246             :                          * since the now-wired page cannot be paged out,
    1247             :                          * release its swap resources for others to use.
    1248             :                          * since an aobj page with no swap cannot be PG_CLEAN,
    1249             :                          * clear its clean flag now.
    1250             :                          */
    1251           0 :                         atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
    1252           0 :                         uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
    1253           0 :                 }
    1254             :         } else {
    1255             :                 /* activate it */
    1256           0 :                 uvm_pageactivate(pg);
    1257             :         }
    1258           0 :         uvm_unlock_pageq();
    1259             : 
    1260           0 :         if (pg->pg_flags & PG_WANTED)
    1261           0 :                 wakeup(pg);
    1262             : 
    1263           0 :         atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_FAKE|PG_WANTED);
    1264             :         UVM_PAGE_OWN(pg, NULL);
    1265           0 :         uvmfault_unlockall(&ufi, amap, uobj, NULL);
    1266             :         pmap_update(ufi.orig_map->pmap);
    1267             : 
    1268           0 :         return (0);
    1269           0 : }
    1270             : 
    1271             : 
    1272             : /*
    1273             :  * uvm_fault_wire: wire down a range of virtual addresses in a map.
    1274             :  *
    1275             :  * => map may be read-locked by caller, but MUST NOT be write-locked.
    1276             :  * => if map is read-locked, any operations which may cause map to
    1277             :  *      be write-locked in uvm_fault() must be taken care of by
    1278             :  *      the caller.  See uvm_map_pageable().
    1279             :  */
    1280             : int
    1281           0 : uvm_fault_wire(vm_map_t map, vaddr_t start, vaddr_t end, vm_prot_t access_type)
    1282             : {
    1283             :         vaddr_t va;
    1284             :         int rv;
    1285             : 
    1286             :         /*
    1287             :          * now fault it in a page at a time.   if the fault fails then we have
    1288             :          * to undo what we have done.   note that in uvm_fault PROT_NONE 
    1289             :          * is replaced with the max protection if fault_type is VM_FAULT_WIRE.
    1290             :          */
    1291           0 :         for (va = start ; va < end ; va += PAGE_SIZE) {
    1292           0 :                 rv = uvm_fault(map, va, VM_FAULT_WIRE, access_type);
    1293           0 :                 if (rv) {
    1294           0 :                         if (va != start) {
    1295           0 :                                 uvm_fault_unwire(map, start, va);
    1296           0 :                         }
    1297           0 :                         return (rv);
    1298             :                 }
    1299             :         }
    1300             : 
    1301           0 :         return (0);
    1302           0 : }
    1303             : 
    1304             : /*
    1305             :  * uvm_fault_unwire(): unwire range of virtual space.
    1306             :  */
    1307             : void
    1308           0 : uvm_fault_unwire(vm_map_t map, vaddr_t start, vaddr_t end)
    1309             : {
    1310             : 
    1311           0 :         vm_map_lock_read(map);
    1312           0 :         uvm_fault_unwire_locked(map, start, end);
    1313           0 :         vm_map_unlock_read(map);
    1314           0 : }
    1315             : 
    1316             : /*
    1317             :  * uvm_fault_unwire_locked(): the guts of uvm_fault_unwire().
    1318             :  *
    1319             :  * => map must be at least read-locked.
    1320             :  */
    1321             : void
    1322           0 : uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end)
    1323             : {
    1324           0 :         vm_map_entry_t entry, next;
    1325           0 :         pmap_t pmap = vm_map_pmap(map);
    1326             :         vaddr_t va;
    1327           0 :         paddr_t pa;
    1328             :         struct vm_page *pg;
    1329             : 
    1330           0 :         KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
    1331             : 
    1332             :         /*
    1333             :          * we assume that the area we are unwiring has actually been wired
    1334             :          * in the first place.   this means that we should be able to extract
    1335             :          * the PAs from the pmap.   we also lock out the page daemon so that
    1336             :          * we can call uvm_pageunwire.
    1337             :          */
    1338           0 :         uvm_lock_pageq();
    1339             : 
    1340             :         /* find the beginning map entry for the region. */
    1341           0 :         KASSERT(start >= vm_map_min(map) && end <= vm_map_max(map));
    1342           0 :         if (uvm_map_lookup_entry(map, start, &entry) == FALSE)
    1343           0 :                 panic("uvm_fault_unwire_locked: address not in map");
    1344             : 
    1345           0 :         for (va = start; va < end ; va += PAGE_SIZE) {
    1346           0 :                 if (pmap_extract(pmap, va, &pa) == FALSE)
    1347             :                         continue;
    1348             : 
    1349             :                 /* find the map entry for the current address. */
    1350           0 :                 KASSERT(va >= entry->start);
    1351           0 :                 while (va >= entry->end) {
    1352           0 :                         next = RBT_NEXT(uvm_map_addr, entry);
    1353           0 :                         KASSERT(next != NULL && next->start <= entry->end);
    1354           0 :                         entry = next;
    1355             :                 }
    1356             : 
    1357             :                 /* if the entry is no longer wired, tell the pmap. */
    1358           0 :                 if (VM_MAPENT_ISWIRED(entry) == 0)
    1359           0 :                         pmap_unwire(pmap, va);
    1360             : 
    1361           0 :                 pg = PHYS_TO_VM_PAGE(pa);
    1362           0 :                 if (pg)
    1363           0 :                         uvm_pageunwire(pg);
    1364             :         }
    1365             : 
    1366           0 :         uvm_unlock_pageq();
    1367           0 : }
    1368             : 
    1369             : /*
    1370             :  * uvmfault_unlockmaps: unlock the maps
    1371             :  */
    1372             : void
    1373           0 : uvmfault_unlockmaps(struct uvm_faultinfo *ufi, boolean_t write_locked)
    1374             : {
    1375             :         /*
    1376             :          * ufi can be NULL when this isn't really a fault,
    1377             :          * but merely paging in anon data.
    1378             :          */
    1379           0 :         if (ufi == NULL) {
    1380             :                 return;
    1381             :         }
    1382             : 
    1383           0 :         uvmfault_update_stats(ufi);
    1384           0 :         if (write_locked) {
    1385           0 :                 vm_map_unlock(ufi->map);
    1386           0 :         } else {
    1387           0 :                 vm_map_unlock_read(ufi->map);
    1388             :         }
    1389           0 : }
    1390             : 
    1391             : /*
    1392             :  * uvmfault_unlockall: unlock everything passed in.
    1393             :  *
    1394             :  * => maps must be read-locked (not write-locked).
    1395             :  */
    1396             : void
    1397           0 : uvmfault_unlockall(struct uvm_faultinfo *ufi, struct vm_amap *amap,
    1398             :     struct uvm_object *uobj, struct vm_anon *anon)
    1399             : {
    1400             : 
    1401           0 :         uvmfault_unlockmaps(ufi, FALSE);
    1402           0 : }
    1403             : 
    1404             : /*
    1405             :  * uvmfault_lookup: lookup a virtual address in a map
    1406             :  *
    1407             :  * => caller must provide a uvm_faultinfo structure with the IN
    1408             :  *      params properly filled in
    1409             :  * => we will lookup the map entry (handling submaps) as we go
    1410             :  * => if the lookup is a success we will return with the maps locked
    1411             :  * => if "write_lock" is TRUE, we write_lock the map, otherwise we only
    1412             :  *      get a read lock.
    1413             :  * => note that submaps can only appear in the kernel and they are 
    1414             :  *      required to use the same virtual addresses as the map they
    1415             :  *      are referenced by (thus address translation between the main
    1416             :  *      map and the submap is unnecessary).
    1417             :  */
    1418             : 
    1419             : boolean_t
    1420           0 : uvmfault_lookup(struct uvm_faultinfo *ufi, boolean_t write_lock)
    1421             : {
    1422             :         vm_map_t tmpmap;
    1423             : 
    1424             :         /* init ufi values for lookup. */
    1425          76 :         ufi->map = ufi->orig_map;
    1426           0 :         ufi->size = ufi->orig_size;
    1427             : 
    1428             :         /*
    1429             :          * keep going down levels until we are done.   note that there can
    1430             :          * only be two levels so we won't loop very long.
    1431             :          */
    1432           0 :         while (1) {
    1433           0 :                 if (ufi->orig_rvaddr < ufi->map->min_offset ||
    1434           0 :                     ufi->orig_rvaddr >= ufi->map->max_offset)
    1435           0 :                         return(FALSE);
    1436             : 
    1437             :                 /* lock map */
    1438           0 :                 if (write_lock) {
    1439           0 :                         vm_map_lock(ufi->map);
    1440           0 :                 } else {
    1441          75 :                         vm_map_lock_read(ufi->map);
    1442             :                 }
    1443             : 
    1444             :                 /* lookup */
    1445           0 :                 if (!uvm_map_lookup_entry(ufi->map, ufi->orig_rvaddr, 
    1446           0 :                     &ufi->entry)) {
    1447           0 :                         uvmfault_unlockmaps(ufi, write_lock);
    1448           0 :                         return(FALSE);
    1449             :                 }
    1450             : 
    1451             :                 /* reduce size if necessary */
    1452           0 :                 if (ufi->entry->end - ufi->orig_rvaddr < ufi->size)
    1453           0 :                         ufi->size = ufi->entry->end - ufi->orig_rvaddr;
    1454             : 
    1455             :                 /*
    1456             :                  * submap?    replace map with the submap and lookup again.
    1457             :                  * note: VAs in submaps must match VAs in main map.
    1458             :                  */
    1459          63 :                 if (UVM_ET_ISSUBMAP(ufi->entry)) {
    1460           0 :                         tmpmap = ufi->entry->object.sub_map;
    1461           0 :                         uvmfault_unlockmaps(ufi, write_lock);
    1462           0 :                         ufi->map = tmpmap;
    1463           0 :                         continue;
    1464             :                 }
    1465             : 
    1466             :                 /* got it! */
    1467          62 :                 ufi->mapv = ufi->map->timestamp;
    1468           0 :                 return(TRUE);
    1469             : 
    1470             :         }
    1471             :         /*NOTREACHED*/
    1472           0 : }
    1473             : 
    1474             : /*
    1475             :  * uvmfault_relock: attempt to relock the same version of the map
    1476             :  *
    1477             :  * => fault data structures should be unlocked before calling.
    1478             :  * => if a success (TRUE) maps will be locked after call.
    1479             :  */
    1480             : boolean_t
    1481           0 : uvmfault_relock(struct uvm_faultinfo *ufi)
    1482             : {
    1483             :         /*
    1484             :          * ufi can be NULL when this isn't really a fault,
    1485             :          * but merely paging in anon data.
    1486             :          */
    1487           0 :         if (ufi == NULL) {
    1488           0 :                 return TRUE;
    1489             :         }
    1490             : 
    1491           0 :         uvmexp.fltrelck++;
    1492             : 
    1493             :         /*
    1494             :          * relock map.   fail if version mismatch (in which case nothing 
    1495             :          * gets locked).
    1496             :          */
    1497           0 :         vm_map_lock_read(ufi->map);
    1498           0 :         if (ufi->mapv != ufi->map->timestamp) {
    1499           0 :                 vm_map_unlock_read(ufi->map);
    1500           0 :                 return(FALSE);
    1501             :         }
    1502             : 
    1503           0 :         uvmexp.fltrelckok++;
    1504           0 :         return(TRUE);           /* got it! */
    1505           0 : }

Generated by: LCOV version 1.13