LCOV - code coverage report
Current view: top level - uvm - uvm_vnode.c (source / functions) Hit Total Coverage
Test: 6.4 Lines: 3 387 0.8 %
Date: 2018-10-19 03:25:38 Functions: 0 13 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*      $OpenBSD: uvm_vnode.c,v 1.103 2018/07/16 16:44:09 helg Exp $    */
       2             : /*      $NetBSD: uvm_vnode.c,v 1.36 2000/11/24 20:34:01 chs Exp $       */
       3             : 
       4             : /*
       5             :  * Copyright (c) 1997 Charles D. Cranor and Washington University.
       6             :  * Copyright (c) 1991, 1993
       7             :  *      The Regents of the University of California.
       8             :  * Copyright (c) 1990 University of Utah.
       9             :  *
      10             :  * All rights reserved.
      11             :  *
      12             :  * This code is derived from software contributed to Berkeley by
      13             :  * the Systems Programming Group of the University of Utah Computer
      14             :  * Science Department.
      15             :  *
      16             :  * Redistribution and use in source and binary forms, with or without
      17             :  * modification, are permitted provided that the following conditions
      18             :  * are met:
      19             :  * 1. Redistributions of source code must retain the above copyright
      20             :  *    notice, this list of conditions and the following disclaimer.
      21             :  * 2. Redistributions in binary form must reproduce the above copyright
      22             :  *    notice, this list of conditions and the following disclaimer in the
      23             :  *    documentation and/or other materials provided with the distribution.
      24             :  * 3. Neither the name of the University nor the names of its contributors
      25             :  *    may be used to endorse or promote products derived from this software
      26             :  *    without specific prior written permission.
      27             :  *
      28             :  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
      29             :  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
      30             :  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
      31             :  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
      32             :  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
      33             :  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
      34             :  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
      35             :  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
      36             :  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
      37             :  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
      38             :  * SUCH DAMAGE.
      39             :  *
      40             :  *      @(#)vnode_pager.c       8.8 (Berkeley) 2/13/94
      41             :  * from: Id: uvm_vnode.c,v 1.1.2.26 1998/02/02 20:38:07 chuck Exp
      42             :  */
      43             : 
      44             : /*
      45             :  * uvm_vnode.c: the vnode pager.
      46             :  */
      47             : 
      48             : #include <sys/param.h>
      49             : #include <sys/systm.h>
      50             : #include <sys/proc.h>
      51             : #include <sys/malloc.h>
      52             : #include <sys/vnode.h>
      53             : #include <sys/lock.h>
      54             : #include <sys/disklabel.h>
      55             : #include <sys/fcntl.h>
      56             : #include <sys/conf.h>
      57             : #include <sys/rwlock.h>
      58             : #include <sys/dkio.h>
      59             : #include <sys/specdev.h>
      60             : 
      61             : #include <uvm/uvm.h>
      62             : #include <uvm/uvm_vnode.h>
      63             : 
      64             : /*
      65             :  * private global data structure
      66             :  *
      67             :  * we keep a list of writeable active vnode-backed VM objects for sync op.
      68             :  * we keep a simpleq of vnodes that are currently being sync'd.
      69             :  */
      70             : 
      71             : LIST_HEAD(uvn_list_struct, uvm_vnode);
      72             : struct uvn_list_struct uvn_wlist;       /* writeable uvns */
      73             : 
      74             : SIMPLEQ_HEAD(uvn_sq_struct, uvm_vnode);
      75             : struct uvn_sq_struct uvn_sync_q;                /* sync'ing uvns */
      76             : struct rwlock uvn_sync_lock;                    /* locks sync operation */
      77             : 
      78             : extern int rebooting;
      79             : 
      80             : /*
      81             :  * functions
      82             :  */
      83             : void             uvn_cluster(struct uvm_object *, voff_t, voff_t *, voff_t *);
      84             : void             uvn_detach(struct uvm_object *);
      85             : boolean_t        uvn_flush(struct uvm_object *, voff_t, voff_t, int);
      86             : int              uvn_get(struct uvm_object *, voff_t, vm_page_t *, int *, int,
      87             :                      vm_prot_t, int, int);
      88             : void             uvn_init(void);
      89             : int              uvn_io(struct uvm_vnode *, vm_page_t *, int, int, int);
      90             : int              uvn_put(struct uvm_object *, vm_page_t *, int, boolean_t);
      91             : void             uvn_reference(struct uvm_object *);
      92             : 
      93             : /*
      94             :  * master pager structure
      95             :  */
      96             : struct uvm_pagerops uvm_vnodeops = {
      97             :         uvn_init,
      98             :         uvn_reference,
      99             :         uvn_detach,
     100             :         NULL,                   /* no specialized fault routine required */
     101             :         uvn_flush,
     102             :         uvn_get,
     103             :         uvn_put,
     104             :         uvn_cluster,
     105             :         uvm_mk_pcluster, /* use generic version of this: see uvm_pager.c */
     106             : };
     107             : 
     108             : /*
     109             :  * the ops!
     110             :  */
     111             : /*
     112             :  * uvn_init
     113             :  *
     114             :  * init pager private data structures.
     115             :  */
     116             : void
     117           0 : uvn_init(void)
     118             : {
     119             : 
     120           0 :         LIST_INIT(&uvn_wlist);
     121             :         /* note: uvn_sync_q init'd in uvm_vnp_sync() */
     122           0 :         rw_init_flags(&uvn_sync_lock, "uvnsync", RWL_IS_VNODE);
     123           0 : }
     124             : 
     125             : /*
     126             :  * uvn_attach
     127             :  *
     128             :  * attach a vnode structure to a VM object.  if the vnode is already
     129             :  * attached, then just bump the reference count by one and return the
     130             :  * VM object.   if not already attached, attach and return the new VM obj.
     131             :  * the "accessprot" tells the max access the attaching thread wants to
     132             :  * our pages.
     133             :  *
     134             :  * => in fact, nothing should be locked so that we can sleep here.
     135             :  * => note that uvm_object is first thing in vnode structure, so their
     136             :  *    pointers are equiv.
     137             :  */
     138             : struct uvm_object *
     139           0 : uvn_attach(struct vnode *vp, vm_prot_t accessprot)
     140             : {
     141           0 :         struct uvm_vnode *uvn = vp->v_uvm;
     142           0 :         struct vattr vattr;
     143             :         int oldflags, result;
     144           0 :         struct partinfo pi;
     145             :         u_quad_t used_vnode_size = 0;
     146             : 
     147             :         /* first get a lock on the uvn. */
     148           0 :         while (uvn->u_flags & UVM_VNODE_BLOCKED) {
     149           0 :                 uvn->u_flags |= UVM_VNODE_WANTED;
     150           0 :                 UVM_WAIT(uvn, FALSE, "uvn_attach", 0);
     151             :         }
     152             : 
     153             :         /* if we're mapping a BLK device, make sure it is a disk. */
     154           0 :         if (vp->v_type == VBLK && bdevsw[major(vp->v_rdev)].d_type != D_DISK) {
     155           0 :                 return(NULL);
     156             :         }
     157             : 
     158             :         /*
     159             :          * now uvn must not be in a blocked state.
     160             :          * first check to see if it is already active, in which case
     161             :          * we can bump the reference count, check to see if we need to
     162             :          * add it to the writeable list, and then return.
     163             :          */
     164           0 :         if (uvn->u_flags & UVM_VNODE_VALID) {    /* already active? */
     165             : 
     166             :                 /* regain vref if we were persisting */
     167           0 :                 if (uvn->u_obj.uo_refs == 0) {
     168           0 :                         vref(vp);
     169           0 :                 }
     170           0 :                 uvn->u_obj.uo_refs++;                /* bump uvn ref! */
     171             : 
     172             :                 /* check for new writeable uvn */
     173           0 :                 if ((accessprot & PROT_WRITE) != 0 &&
     174           0 :                     (uvn->u_flags & UVM_VNODE_WRITEABLE) == 0) {
     175           0 :                         LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
     176             :                         /* we are now on wlist! */
     177           0 :                         uvn->u_flags |= UVM_VNODE_WRITEABLE;
     178           0 :                 }
     179             : 
     180           0 :                 return (&uvn->u_obj);
     181             :         }
     182             : 
     183             :         /*
     184             :          * need to call VOP_GETATTR() to get the attributes, but that could
     185             :          * block (due to I/O), so we want to unlock the object before calling.
     186             :          * however, we want to keep anyone else from playing with the object
     187             :          * while it is unlocked.   to do this we set UVM_VNODE_ALOCK which
     188             :          * prevents anyone from attaching to the vnode until we are done with
     189             :          * it.
     190             :          */
     191           0 :         uvn->u_flags = UVM_VNODE_ALOCK;
     192             : 
     193           0 :         if (vp->v_type == VBLK) {
     194             :                 /*
     195             :                  * We could implement this as a specfs getattr call, but:
     196             :                  *
     197             :                  *      (1) VOP_GETATTR() would get the file system
     198             :                  *          vnode operation, not the specfs operation.
     199             :                  *
     200             :                  *      (2) All we want is the size, anyhow.
     201             :                  */
     202           0 :                 result = (*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev,
     203           0 :                     DIOCGPART, (caddr_t)&pi, FREAD, curproc);
     204           0 :                 if (result == 0) {
     205             :                         /* XXX should remember blocksize */
     206           0 :                         used_vnode_size = (u_quad_t)pi.disklab->d_secsize *
     207           0 :                             (u_quad_t)DL_GETPSIZE(pi.part);
     208           0 :                 }
     209             :         } else {
     210           0 :                 result = VOP_GETATTR(vp, &vattr, curproc->p_ucred, curproc);
     211           0 :                 if (result == 0)
     212           0 :                         used_vnode_size = vattr.va_size;
     213             :         }
     214             : 
     215           0 :         if (result != 0) {
     216           0 :                 if (uvn->u_flags & UVM_VNODE_WANTED)
     217           0 :                         wakeup(uvn);
     218           0 :                 uvn->u_flags = 0;
     219           0 :                 return(NULL);
     220             :         }
     221             : 
     222             :         /*
     223             :          * make sure that the newsize fits within a vaddr_t
     224             :          * XXX: need to revise addressing data types
     225             :          */
     226             : #ifdef DEBUG
     227             :         if (vp->v_type == VBLK)
     228             :                 printf("used_vnode_size = %llu\n", (long long)used_vnode_size);
     229             : #endif
     230             : 
     231             :         /* now set up the uvn. */
     232           0 :         uvm_objinit(&uvn->u_obj, &uvm_vnodeops, 1);
     233           0 :         oldflags = uvn->u_flags;
     234           0 :         uvn->u_flags = UVM_VNODE_VALID|UVM_VNODE_CANPERSIST;
     235           0 :         uvn->u_nio = 0;
     236           0 :         uvn->u_size = used_vnode_size;
     237             : 
     238             :         /* if write access, we need to add it to the wlist */
     239           0 :         if (accessprot & PROT_WRITE) {
     240           0 :                 LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
     241           0 :                 uvn->u_flags |= UVM_VNODE_WRITEABLE; /* we are on wlist! */
     242           0 :         }
     243             : 
     244             :         /*
     245             :          * add a reference to the vnode.   this reference will stay as long
     246             :          * as there is a valid mapping of the vnode.   dropped when the
     247             :          * reference count goes to zero [and we either free or persist].
     248             :          */
     249           0 :         vref(vp);
     250           0 :         if (oldflags & UVM_VNODE_WANTED)
     251           0 :                 wakeup(uvn);
     252             : 
     253           0 :         return(&uvn->u_obj);
     254           0 : }
     255             : 
     256             : 
     257             : /*
     258             :  * uvn_reference
     259             :  *
     260             :  * duplicate a reference to a VM object.  Note that the reference
     261             :  * count must already be at least one (the passed in reference) so
     262             :  * there is no chance of the uvn being killed out here.
     263             :  *
     264             :  * => caller must be using the same accessprot as was used at attach time
     265             :  */
     266             : 
     267             : 
     268             : void
     269           0 : uvn_reference(struct uvm_object *uobj)
     270             : {
     271             : #ifdef DEBUG
     272             :         struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
     273             : #endif
     274             : 
     275             : #ifdef DEBUG
     276             :         if ((uvn->u_flags & UVM_VNODE_VALID) == 0) {
     277             :                 printf("uvn_reference: ref=%d, flags=0x%x\n", uvn->u_flags,
     278             :                     uobj->uo_refs);
     279             :                 panic("uvn_reference: invalid state");
     280             :         }
     281             : #endif
     282          60 :         uobj->uo_refs++;
     283           0 : }
     284             : 
     285             : /*
     286             :  * uvn_detach
     287             :  *
     288             :  * remove a reference to a VM object.
     289             :  *
     290             :  * => caller must call with map locked.
     291             :  * => this starts the detach process, but doesn't have to finish it
     292             :  *    (async i/o could still be pending).
     293             :  */
     294             : void
     295           0 : uvn_detach(struct uvm_object *uobj)
     296             : {
     297             :         struct uvm_vnode *uvn;
     298             :         struct vnode *vp;
     299             :         int oldflags;
     300             : 
     301             : 
     302          54 :         uobj->uo_refs--;                     /* drop ref! */
     303           0 :         if (uobj->uo_refs) {                 /* still more refs */
     304           0 :                 return;
     305             :         }
     306             : 
     307             :         /* get other pointers ... */
     308           0 :         uvn = (struct uvm_vnode *) uobj;
     309           0 :         vp = uvn->u_vnode;
     310             : 
     311             :         /*
     312             :          * clear VTEXT flag now that there are no mappings left (VTEXT is used
     313             :          * to keep an active text file from being overwritten).
     314             :          */
     315           0 :         vp->v_flag &= ~VTEXT;
     316             : 
     317             :         /*
     318             :          * we just dropped the last reference to the uvn.   see if we can
     319             :          * let it "stick around".
     320             :          */
     321           0 :         if (uvn->u_flags & UVM_VNODE_CANPERSIST) {
     322             :                 /* won't block */
     323           0 :                 uvn_flush(uobj, 0, 0, PGO_DEACTIVATE|PGO_ALLPAGES);
     324           0 :                 vrele(vp);                      /* drop vnode reference */
     325           0 :                 return;
     326             :         }
     327             : 
     328             :         /* its a goner! */
     329           0 :         uvn->u_flags |= UVM_VNODE_DYING;
     330             : 
     331             :         /*
     332             :          * even though we may unlock in flush, no one can gain a reference
     333             :          * to us until we clear the "dying" flag [because it blocks
     334             :          * attaches].  we will not do that until after we've disposed of all
     335             :          * the pages with uvn_flush().  note that before the flush the only
     336             :          * pages that could be marked PG_BUSY are ones that are in async
     337             :          * pageout by the daemon.  (there can't be any pending "get"'s
     338             :          * because there are no references to the object).
     339             :          */
     340           0 :         (void) uvn_flush(uobj, 0, 0, PGO_CLEANIT|PGO_FREE|PGO_ALLPAGES);
     341             : 
     342             :         /*
     343             :          * given the structure of this pager, the above flush request will
     344             :          * create the following state: all the pages that were in the object
     345             :          * have either been free'd or they are marked PG_BUSY and in the 
     346             :          * middle of an async io. If we still have pages we set the "relkill"
     347             :          * state, so that in the case the vnode gets terminated we know 
     348             :          * to leave it alone. Otherwise we'll kill the vnode when it's empty.
     349             :          */
     350           0 :         uvn->u_flags |= UVM_VNODE_RELKILL;
     351             :         /* wait on any outstanding io */
     352           0 :         while (uobj->uo_npages && uvn->u_flags & UVM_VNODE_RELKILL) {
     353           0 :                 uvn->u_flags |= UVM_VNODE_IOSYNC;
     354           0 :                 UVM_WAIT(&uvn->u_nio, FALSE, "uvn_term", 0);
     355             :         }
     356             : 
     357           0 :         if ((uvn->u_flags & UVM_VNODE_RELKILL) == 0)
     358           0 :                 return;
     359             : 
     360             :         /*
     361             :          * kill object now.   note that we can't be on the sync q because
     362             :          * all references are gone.
     363             :          */
     364           0 :         if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
     365           0 :                 LIST_REMOVE(uvn, u_wlist);
     366           0 :         }
     367           0 :         KASSERT(RBT_EMPTY(uvm_objtree, &uobj->memt));
     368           0 :         oldflags = uvn->u_flags;
     369           0 :         uvn->u_flags = 0;
     370             : 
     371             :         /* wake up any sleepers */
     372           0 :         if (oldflags & UVM_VNODE_WANTED)
     373           0 :                 wakeup(uvn);
     374             : 
     375             :         /* drop our reference to the vnode. */
     376           0 :         vrele(vp);
     377             : 
     378           0 :         return;
     379          54 : }
     380             : 
     381             : /*
     382             :  * uvm_vnp_terminate: external hook to clear out a vnode's VM
     383             :  *
     384             :  * called in two cases:
     385             :  *  [1] when a persisting vnode vm object (i.e. one with a zero reference
     386             :  *      count) needs to be freed so that a vnode can be reused.  this
     387             :  *      happens under "getnewvnode" in vfs_subr.c.   if the vnode from
     388             :  *      the free list is still attached (i.e. not VBAD) then vgone is
     389             :  *      called.   as part of the vgone trace this should get called to
     390             :  *      free the vm object.   this is the common case.
     391             :  *  [2] when a filesystem is being unmounted by force (MNT_FORCE,
     392             :  *      "umount -f") the vgone() function is called on active vnodes
     393             :  *      on the mounted file systems to kill their data (the vnodes become
     394             :  *      "dead" ones [see src/sys/miscfs/deadfs/...]).  that results in a
     395             :  *      call here (even if the uvn is still in use -- i.e. has a non-zero
     396             :  *      reference count).  this case happens at "umount -f" and during a
     397             :  *      "reboot/halt" operation.
     398             :  *
     399             :  * => the caller must XLOCK and VOP_LOCK the vnode before calling us
     400             :  *      [protects us from getting a vnode that is already in the DYING
     401             :  *       state...]
     402             :  * => in case [2] the uvn is still alive after this call, but all I/O
     403             :  *      ops will fail (due to the backing vnode now being "dead").  this
     404             :  *      will prob. kill any process using the uvn due to pgo_get failing.
     405             :  */
     406             : void
     407           0 : uvm_vnp_terminate(struct vnode *vp)
     408             : {
     409           0 :         struct uvm_vnode *uvn = vp->v_uvm;
     410             :         int oldflags;
     411             : 
     412             :         /* check if it is valid */
     413           0 :         if ((uvn->u_flags & UVM_VNODE_VALID) == 0) {
     414           0 :                 return;
     415             :         }
     416             : 
     417             :         /*
     418             :          * must be a valid uvn that is not already dying (because XLOCK
     419             :          * protects us from that).   the uvn can't in the ALOCK state
     420             :          * because it is valid, and uvn's that are in the ALOCK state haven't
     421             :          * been marked valid yet.
     422             :          */
     423             : #ifdef DEBUG
     424             :         /*
     425             :          * debug check: are we yanking the vnode out from under our uvn?
     426             :          */
     427             :         if (uvn->u_obj.uo_refs) {
     428             :                 printf("uvm_vnp_terminate(%p): terminating active vnode "
     429             :                     "(refs=%d)\n", uvn, uvn->u_obj.uo_refs);
     430             :         }
     431             : #endif
     432             : 
     433             :         /*
     434             :          * it is possible that the uvn was detached and is in the relkill
     435             :          * state [i.e. waiting for async i/o to finish].
     436             :          * we take over the vnode now and cancel the relkill.
     437             :          * we want to know when the i/o is done so we can recycle right
     438             :          * away.   note that a uvn can only be in the RELKILL state if it
     439             :          * has a zero reference count.
     440             :          */
     441           0 :         if (uvn->u_flags & UVM_VNODE_RELKILL)
     442           0 :                 uvn->u_flags &= ~UVM_VNODE_RELKILL;      /* cancel RELKILL */
     443             : 
     444             :         /*
     445             :          * block the uvn by setting the dying flag, and then flush the
     446             :          * pages.
     447             :          *
     448             :          * also, note that we tell I/O that we are already VOP_LOCK'd so
     449             :          * that uvn_io doesn't attempt to VOP_LOCK again.
     450             :          *
     451             :          * XXXCDC: setting VNISLOCKED on an active uvn which is being terminated
     452             :          *      due to a forceful unmount might not be a good idea.  maybe we
     453             :          *      need a way to pass in this info to uvn_flush through a
     454             :          *      pager-defined PGO_ constant [currently there are none].
     455             :          */
     456           0 :         uvn->u_flags |= UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED;
     457             : 
     458           0 :         (void) uvn_flush(&uvn->u_obj, 0, 0, PGO_CLEANIT|PGO_FREE|PGO_ALLPAGES);
     459             : 
     460             :         /*
     461             :          * as we just did a flush we expect all the pages to be gone or in
     462             :          * the process of going.  sleep to wait for the rest to go [via iosync].
     463             :          */
     464           0 :         while (uvn->u_obj.uo_npages) {
     465             : #ifdef DEBUG
     466             :                 struct vm_page *pp;
     467             :                 RBT_FOREACH(pp, uvm_objtree, &uvn->u_obj.memt) {
     468             :                         if ((pp->pg_flags & PG_BUSY) == 0)
     469             :                                 panic("uvm_vnp_terminate: detected unbusy pg");
     470             :                 }
     471             :                 if (uvn->u_nio == 0)
     472             :                         panic("uvm_vnp_terminate: no I/O to wait for?");
     473             :                 printf("uvm_vnp_terminate: waiting for I/O to fin.\n");
     474             :                 /*
     475             :                  * XXXCDC: this is unlikely to happen without async i/o so we
     476             :                  * put a printf in just to keep an eye on it.
     477             :                  */
     478             : #endif
     479           0 :                 uvn->u_flags |= UVM_VNODE_IOSYNC;
     480           0 :                 UVM_WAIT(&uvn->u_nio, FALSE, "uvn_term", 0);
     481             :         }
     482             : 
     483             :         /*
     484             :          * done.   now we free the uvn if its reference count is zero
     485             :          * (true if we are zapping a persisting uvn).   however, if we are
     486             :          * terminating a uvn with active mappings we let it live ... future
     487             :          * calls down to the vnode layer will fail.
     488             :          */
     489             :         oldflags = uvn->u_flags;
     490           0 :         if (uvn->u_obj.uo_refs) {
     491             :                 /*
     492             :                  * uvn must live on it is dead-vnode state until all references
     493             :                  * are gone.   restore flags.    clear CANPERSIST state.
     494             :                  */
     495           0 :                 uvn->u_flags &= ~(UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED|
     496             :                       UVM_VNODE_WANTED|UVM_VNODE_CANPERSIST);
     497           0 :         } else {
     498             :                 /*
     499             :                  * free the uvn now.   note that the vref reference is already
     500             :                  * gone [it is dropped when we enter the persist state].
     501             :                  */
     502           0 :                 if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED)
     503           0 :                         panic("uvm_vnp_terminate: io sync wanted bit set");
     504             : 
     505           0 :                 if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
     506           0 :                         LIST_REMOVE(uvn, u_wlist);
     507           0 :                 }
     508           0 :                 uvn->u_flags = 0;    /* uvn is history, clear all bits */
     509             :         }
     510             : 
     511           0 :         if (oldflags & UVM_VNODE_WANTED)
     512           0 :                 wakeup(uvn);
     513           0 : }
     514             : 
     515             : /*
     516             :  * NOTE: currently we have to use VOP_READ/VOP_WRITE because they go
     517             :  * through the buffer cache and allow I/O in any size.  These VOPs use
     518             :  * synchronous i/o.  [vs. VOP_STRATEGY which can be async, but doesn't
     519             :  * go through the buffer cache or allow I/O sizes larger than a
     520             :  * block].  we will eventually want to change this.
     521             :  *
     522             :  * issues to consider:
     523             :  *   uvm provides the uvm_aiodesc structure for async i/o management.
     524             :  * there are two tailq's in the uvm. structure... one for pending async
     525             :  * i/o and one for "done" async i/o.   to do an async i/o one puts
     526             :  * an aiodesc on the "pending" list (protected by splbio()), starts the
     527             :  * i/o and returns VM_PAGER_PEND.    when the i/o is done, we expect
     528             :  * some sort of "i/o done" function to be called (at splbio(), interrupt
     529             :  * time).   this function should remove the aiodesc from the pending list
     530             :  * and place it on the "done" list and wakeup the daemon.   the daemon
     531             :  * will run at normal spl() and will remove all items from the "done"
     532             :  * list and call the "aiodone" hook for each done request (see uvm_pager.c).
     533             :  * [in the old vm code, this was done by calling the "put" routine with
     534             :  * null arguments which made the code harder to read and understand because
     535             :  * you had one function ("put") doing two things.]
     536             :  *
     537             :  * so the current pager needs:
     538             :  *   int uvn_aiodone(struct uvm_aiodesc *)
     539             :  *
     540             :  * => return 0 (aio finished, free it). otherwise requeue for later collection.
     541             :  * => called with pageq's locked by the daemon.
     542             :  *
     543             :  * general outline:
     544             :  * - drop "u_nio" (this req is done!)
     545             :  * - if (object->iosync && u_naio == 0) { wakeup &uvn->u_naio }
     546             :  * - get "page" structures (atop?).
     547             :  * - handle "wanted" pages
     548             :  * dont forget to look at "object" wanted flag in all cases.
     549             :  */
     550             : 
     551             : /*
     552             :  * uvn_flush: flush pages out of a uvm object.
     553             :  *
     554             :  * => if PGO_CLEANIT is set, we may block (due to I/O).   thus, a caller
     555             :  *      might want to unlock higher level resources (e.g. vm_map)
     556             :  *      before calling flush.
     557             :  * => if PGO_CLEANIT is not set, then we will not block
     558             :  * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
     559             :  *      for flushing.
     560             :  * => NOTE: we are allowed to lock the page queues, so the caller
     561             :  *      must not be holding the lock on them [e.g. pagedaemon had
     562             :  *      better not call us with the queues locked]
     563             :  * => we return TRUE unless we encountered some sort of I/O error
     564             :  *
     565             :  * comment on "cleaning" object and PG_BUSY pages:
     566             :  *      this routine is holding the lock on the object.   the only time
     567             :  *      that it can run into a PG_BUSY page that it does not own is if
     568             :  *      some other process has started I/O on the page (e.g. either
     569             :  *      a pagein, or a pageout).    if the PG_BUSY page is being paged
     570             :  *      in, then it can not be dirty (!PG_CLEAN) because no one has
     571             :  *      had a chance to modify it yet.    if the PG_BUSY page is being
     572             :  *      paged out then it means that someone else has already started
     573             :  *      cleaning the page for us (how nice!).    in this case, if we
     574             :  *      have syncio specified, then after we make our pass through the
     575             :  *      object we need to wait for the other PG_BUSY pages to clear
     576             :  *      off (i.e. we need to do an iosync).   also note that once a
     577             :  *      page is PG_BUSY it must stay in its object until it is un-busyed.
     578             :  */
     579             : boolean_t
     580           0 : uvn_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
     581             : {
     582           0 :         struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
     583             :         struct vm_page *pp, *ptmp;
     584           0 :         struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp;
     585           0 :         int npages, result, lcv;
     586             :         boolean_t retval, need_iosync, needs_clean;
     587             :         voff_t curoff;
     588             : 
     589             :         /* get init vals and determine how we are going to traverse object */
     590             :         need_iosync = FALSE;
     591             :         retval = TRUE;          /* return value */
     592           0 :         if (flags & PGO_ALLPAGES) {
     593             :                 start = 0;
     594           0 :                 stop = round_page(uvn->u_size);
     595           0 :         } else {
     596           0 :                 start = trunc_page(start);
     597           0 :                 stop = MIN(round_page(stop), round_page(uvn->u_size));
     598             :         }
     599             : 
     600             :         /*
     601             :          * PG_CLEANCHK: this bit is used by the pgo_mk_pcluster function as
     602             :          * a _hint_ as to how up to date the PG_CLEAN bit is.   if the hint
     603             :          * is wrong it will only prevent us from clustering... it won't break
     604             :          * anything.   we clear all PG_CLEANCHK bits here, and pgo_mk_pcluster
     605             :          * will set them as it syncs PG_CLEAN.   This is only an issue if we
     606             :          * are looking at non-inactive pages (because inactive page's PG_CLEAN
     607             :          * bit is always up to date since there are no mappings).
     608             :          * [borrowed PG_CLEANCHK idea from FreeBSD VM]
     609             :          */
     610           0 :         if ((flags & PGO_CLEANIT) != 0) {
     611           0 :                 KASSERT(uobj->pgops->pgo_mk_pcluster != 0);
     612           0 :                 for (curoff = start ; curoff < stop; curoff += PAGE_SIZE) {
     613           0 :                         if ((pp = uvm_pagelookup(uobj, curoff)) != NULL)
     614           0 :                                 atomic_clearbits_int(&pp->pg_flags,
     615             :                                     PG_CLEANCHK);
     616             :                 }
     617             :         }
     618             : 
     619           0 :         ppsp = NULL;            /* XXX: shut up gcc */
     620           0 :         uvm_lock_pageq();
     621             :         /* locked: both page queues */
     622           0 :         for (curoff = start; curoff < stop; curoff += PAGE_SIZE) {
     623           0 :                 if ((pp = uvm_pagelookup(uobj, curoff)) == NULL)
     624             :                         continue;
     625             :                 /*
     626             :                  * handle case where we do not need to clean page (either
     627             :                  * because we are not clean or because page is not dirty or
     628             :                  * is busy):
     629             :                  *
     630             :                  * NOTE: we are allowed to deactivate a non-wired active
     631             :                  * PG_BUSY page, but once a PG_BUSY page is on the inactive
     632             :                  * queue it must stay put until it is !PG_BUSY (so as not to
     633             :                  * confuse pagedaemon).
     634             :                  */
     635           0 :                 if ((flags & PGO_CLEANIT) == 0 || (pp->pg_flags & PG_BUSY) != 0) {
     636             :                         needs_clean = FALSE;
     637           0 :                         if ((pp->pg_flags & PG_BUSY) != 0 &&
     638           0 :                             (flags & (PGO_CLEANIT|PGO_SYNCIO)) ==
     639             :                                      (PGO_CLEANIT|PGO_SYNCIO))
     640           0 :                                 need_iosync = TRUE;
     641             :                 } else {
     642             :                         /*
     643             :                          * freeing: nuke all mappings so we can sync
     644             :                          * PG_CLEAN bit with no race
     645             :                          */
     646           0 :                         if ((pp->pg_flags & PG_CLEAN) != 0 &&
     647           0 :                             (flags & PGO_FREE) != 0 &&
     648           0 :                             (pp->pg_flags & PQ_ACTIVE) != 0)
     649           0 :                                 pmap_page_protect(pp, PROT_NONE);
     650           0 :                         if ((pp->pg_flags & PG_CLEAN) != 0 &&
     651           0 :                             pmap_is_modified(pp))
     652           0 :                                 atomic_clearbits_int(&pp->pg_flags, PG_CLEAN);
     653           0 :                         atomic_setbits_int(&pp->pg_flags, PG_CLEANCHK);
     654             : 
     655           0 :                         needs_clean = ((pp->pg_flags & PG_CLEAN) == 0);
     656             :                 }
     657             : 
     658             :                 /* if we don't need a clean, deactivate/free pages then cont. */
     659           0 :                 if (!needs_clean) {
     660           0 :                         if (flags & PGO_DEACTIVATE) {
     661           0 :                                 if (pp->wire_count == 0) {
     662           0 :                                         pmap_page_protect(pp, PROT_NONE);
     663           0 :                                         uvm_pagedeactivate(pp);
     664           0 :                                 }
     665           0 :                         } else if (flags & PGO_FREE) {
     666           0 :                                 if (pp->pg_flags & PG_BUSY) {
     667           0 :                                         atomic_setbits_int(&pp->pg_flags,
     668             :                                             PG_WANTED);
     669           0 :                                         uvm_unlock_pageq();
     670           0 :                                         UVM_WAIT(pp, 0, "uvn_flsh", 0);
     671           0 :                                         uvm_lock_pageq();
     672           0 :                                         curoff -= PAGE_SIZE;
     673           0 :                                         continue;
     674             :                                 } else {
     675           0 :                                         pmap_page_protect(pp, PROT_NONE);
     676             :                                         /* removed page from object */
     677           0 :                                         uvm_pagefree(pp);
     678             :                                 }
     679           0 :                         }
     680             :                         continue;
     681             :                 }
     682             : 
     683             :                 /*
     684             :                  * pp points to a page in the object that we are
     685             :                  * working on.  if it is !PG_CLEAN,!PG_BUSY and we asked
     686             :                  * for cleaning (PGO_CLEANIT).  we clean it now.
     687             :                  *
     688             :                  * let uvm_pager_put attempted a clustered page out.
     689             :                  * note: locked: page queues.
     690             :                  */
     691           0 :                 atomic_setbits_int(&pp->pg_flags, PG_BUSY);
     692             :                 UVM_PAGE_OWN(pp, "uvn_flush");
     693           0 :                 pmap_page_protect(pp, PROT_READ);
     694             :                 /* if we're async, free the page in aiodoned */
     695           0 :                 if ((flags & (PGO_FREE|PGO_SYNCIO)) == PGO_FREE)
     696           0 :                         atomic_setbits_int(&pp->pg_flags, PG_RELEASED);
     697             : ReTry:
     698           0 :                 ppsp = pps;
     699           0 :                 npages = sizeof(pps) / sizeof(struct vm_page *);
     700             : 
     701           0 :                 result = uvm_pager_put(uobj, pp, &ppsp, &npages,
     702           0 :                            flags | PGO_DOACTCLUST, start, stop);
     703             : 
     704             :                 /*
     705             :                  * if we did an async I/O it is remotely possible for the
     706             :                  * async i/o to complete and the page "pp" be freed or what
     707             :                  * not before we get a chance to relock the object. Therefore,
     708             :                  * we only touch it when it won't be freed, RELEASED took care
     709             :                  * of the rest.
     710             :                  */
     711           0 :                 uvm_lock_pageq();
     712             : 
     713             :                 /*
     714             :                  * VM_PAGER_AGAIN: given the structure of this pager, this
     715             :                  * can only happen when we are doing async I/O and can't
     716             :                  * map the pages into kernel memory (pager_map) due to lack
     717             :                  * of vm space.   if this happens we drop back to sync I/O.
     718             :                  */
     719           0 :                 if (result == VM_PAGER_AGAIN) {
     720             :                         /*
     721             :                          * it is unlikely, but page could have been released
     722             :                          * we ignore this now and retry the I/O.
     723             :                          * we will detect and
     724             :                          * handle the released page after the syncio I/O
     725             :                          * completes.
     726             :                          */
     727             : #ifdef DIAGNOSTIC
     728           0 :                         if (flags & PGO_SYNCIO)
     729           0 :         panic("uvn_flush: PGO_SYNCIO return 'try again' error (impossible)");
     730             : #endif
     731           0 :                         flags |= PGO_SYNCIO;
     732           0 :                         if (flags & PGO_FREE)
     733           0 :                                 atomic_clearbits_int(&pp->pg_flags,
     734             :                                     PG_RELEASED);
     735             : 
     736           0 :                         goto ReTry;
     737             :                 }
     738             : 
     739             :                 /*
     740             :                  * the cleaning operation is now done.   finish up.  note that
     741             :                  * on error (!OK, !PEND) uvm_pager_put drops the cluster for us.
     742             :                  * if success (OK, PEND) then uvm_pager_put returns the cluster
     743             :                  * to us in ppsp/npages.
     744             :                  */
     745             :                 /*
     746             :                  * for pending async i/o if we are not deactivating
     747             :                  * we can move on to the next page. aiodoned deals with
     748             :                  * the freeing case for us.
     749             :                  */
     750           0 :                 if (result == VM_PAGER_PEND && (flags & PGO_DEACTIVATE) == 0)
     751             :                         continue;
     752             : 
     753             :                 /*
     754             :                  * need to look at each page of the I/O operation, and do what
     755             :                  * we gotta do.
     756             :                  */
     757           0 :                 for (lcv = 0 ; lcv < npages; lcv++) {
     758           0 :                         ptmp = ppsp[lcv];
     759             :                         /*
     760             :                          * verify the page didn't get moved
     761             :                          */
     762           0 :                         if (result == VM_PAGER_PEND && ptmp->uobject != uobj)
     763             :                                 continue;
     764             : 
     765             :                         /*
     766             :                          * unbusy the page if I/O is done.   note that for
     767             :                          * pending I/O it is possible that the I/O op
     768             :                          * finished
     769             :                          * (in which case the page is no longer busy).
     770             :                          */
     771           0 :                         if (result != VM_PAGER_PEND) {
     772           0 :                                 if (ptmp->pg_flags & PG_WANTED)
     773           0 :                                         wakeup(ptmp);
     774             : 
     775           0 :                                 atomic_clearbits_int(&ptmp->pg_flags,
     776             :                                     PG_WANTED|PG_BUSY);
     777             :                                 UVM_PAGE_OWN(ptmp, NULL);
     778           0 :                                 atomic_setbits_int(&ptmp->pg_flags,
     779             :                                     PG_CLEAN|PG_CLEANCHK);
     780           0 :                                 if ((flags & PGO_FREE) == 0)
     781           0 :                                         pmap_clear_modify(ptmp);
     782             :                         }
     783             : 
     784             :                         /* dispose of page */
     785           0 :                         if (flags & PGO_DEACTIVATE) {
     786           0 :                                 if (ptmp->wire_count == 0) {
     787           0 :                                         pmap_page_protect(ptmp, PROT_NONE);
     788           0 :                                         uvm_pagedeactivate(ptmp);
     789           0 :                                 }
     790           0 :                         } else if (flags & PGO_FREE &&
     791             :                             result != VM_PAGER_PEND) {
     792           0 :                                 if (result != VM_PAGER_OK) {
     793           0 :                                         printf("uvn_flush: obj=%p, "
     794             :                                            "offset=0x%llx.  error "
     795             :                                            "during pageout.\n",
     796           0 :                                             pp->uobject,
     797           0 :                                             (long long)pp->offset);
     798           0 :                                         printf("uvn_flush: WARNING: "
     799             :                                             "changes to page may be "
     800             :                                             "lost!\n");
     801             :                                         retval = FALSE;
     802           0 :                                 }
     803           0 :                                 pmap_page_protect(ptmp, PROT_NONE);
     804           0 :                                 uvm_pagefree(ptmp);
     805           0 :                         }
     806             : 
     807             :                 }               /* end of "lcv" for loop */
     808             : 
     809             :         }               /* end of "pp" for loop */
     810             : 
     811             :         /* done with pagequeues: unlock */
     812           0 :         uvm_unlock_pageq();
     813             : 
     814             :         /* now wait for all I/O if required. */
     815           0 :         if (need_iosync) {
     816           0 :                 while (uvn->u_nio != 0) {
     817           0 :                         uvn->u_flags |= UVM_VNODE_IOSYNC;
     818           0 :                         UVM_WAIT(&uvn->u_nio, FALSE, "uvn_flush", 0);
     819             :                 }
     820           0 :                 if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED)
     821           0 :                         wakeup(&uvn->u_flags);
     822           0 :                 uvn->u_flags &= ~(UVM_VNODE_IOSYNC|UVM_VNODE_IOSYNCWANTED);
     823           0 :         }
     824             : 
     825           0 :         return(retval);
     826           0 : }
     827             : 
     828             : /*
     829             :  * uvn_cluster
     830             :  *
     831             :  * we are about to do I/O in an object at offset.   this function is called
     832             :  * to establish a range of offsets around "offset" in which we can cluster
     833             :  * I/O.
     834             :  */
     835             : 
     836             : void
     837           0 : uvn_cluster(struct uvm_object *uobj, voff_t offset, voff_t *loffset,
     838             :     voff_t *hoffset)
     839             : {
     840           0 :         struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
     841           0 :         *loffset = offset;
     842             : 
     843           0 :         if (*loffset >= uvn->u_size)
     844           0 :                 panic("uvn_cluster: offset out of range");
     845             : 
     846             :         /*
     847             :          * XXX: old pager claims we could use VOP_BMAP to get maxcontig value.
     848             :          */
     849           0 :         *hoffset = *loffset + MAXBSIZE;
     850           0 :         if (*hoffset > round_page(uvn->u_size))   /* past end? */
     851           0 :                 *hoffset = round_page(uvn->u_size);
     852             : 
     853             :         return;
     854           0 : }
     855             : 
     856             : /*
     857             :  * uvn_put: flush page data to backing store.
     858             :  *
     859             :  * => prefer map unlocked (not required)
     860             :  * => flags: PGO_SYNCIO -- use sync. I/O
     861             :  * => note: caller must set PG_CLEAN and pmap_clear_modify (if needed)
     862             :  * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync.
     863             :  *      [thus we never do async i/o!  see iodone comment]
     864             :  */
     865             : int
     866           0 : uvn_put(struct uvm_object *uobj, struct vm_page **pps, int npages, int flags)
     867             : {
     868             :         int retval;
     869             : 
     870           0 :         retval = uvn_io((struct uvm_vnode*)uobj, pps, npages, flags, UIO_WRITE);
     871             : 
     872           0 :         return(retval);
     873             : }
     874             : 
     875             : /*
     876             :  * uvn_get: get pages (synchronously) from backing store
     877             :  *
     878             :  * => prefer map unlocked (not required)
     879             :  * => flags: PGO_ALLPAGES: get all of the pages
     880             :  *           PGO_LOCKED: fault data structures are locked
     881             :  * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
     882             :  * => NOTE: caller must check for released pages!!
     883             :  */
     884             : int
     885           0 : uvn_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
     886             :     int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
     887             : {
     888             :         voff_t current_offset;
     889           0 :         struct vm_page *ptmp;
     890             :         int lcv, result, gotpages;
     891             :         boolean_t done;
     892             : 
     893             :         /* step 1: handled the case where fault data structures are locked. */
     894           0 :         if (flags & PGO_LOCKED) {
     895             :                 /*
     896             :                  * gotpages is the current number of pages we've gotten (which
     897             :                  * we pass back up to caller via *npagesp.
     898             :                  */
     899             :                 gotpages = 0;
     900             : 
     901             :                 /*
     902             :                  * step 1a: get pages that are already resident.   only do this
     903             :                  * if the data structures are locked (i.e. the first time
     904             :                  * through).
     905             :                  */
     906             :                 done = TRUE;    /* be optimistic */
     907             : 
     908           0 :                 for (lcv = 0, current_offset = offset ; lcv < *npagesp ;
     909           0 :                     lcv++, current_offset += PAGE_SIZE) {
     910             : 
     911             :                         /* do we care about this page?  if not, skip it */
     912           0 :                         if (pps[lcv] == PGO_DONTCARE)
     913             :                                 continue;
     914             : 
     915             :                         /* lookup page */
     916           0 :                         ptmp = uvm_pagelookup(uobj, current_offset);
     917             : 
     918             :                         /* to be useful must get a non-busy, non-released pg */
     919           0 :                         if (ptmp == NULL ||
     920           0 :                             (ptmp->pg_flags & PG_BUSY) != 0) {
     921           0 :                                 if (lcv == centeridx || (flags & PGO_ALLPAGES)
     922           0 :                                     != 0)
     923           0 :                                         done = FALSE;   /* need to do a wait or I/O! */
     924             :                                 continue;
     925             :                         }
     926             : 
     927             :                         /*
     928             :                          * useful page: busy it and plug it in our
     929             :                          * result array
     930             :                          */
     931           0 :                         atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
     932             :                         UVM_PAGE_OWN(ptmp, "uvn_get1");
     933           0 :                         pps[lcv] = ptmp;
     934           0 :                         gotpages++;
     935             : 
     936           0 :                 }
     937             : 
     938             :                 /*
     939             :                  * XXX: given the "advice", should we consider async read-ahead?
     940             :                  * XXX: fault current does deactive of pages behind us.  is
     941             :                  * this good (other callers might now).
     942             :                  */
     943             :                 /*
     944             :                  * XXX: read-ahead currently handled by buffer cache (bread)
     945             :                  * level.
     946             :                  * XXX: no async i/o available.
     947             :                  * XXX: so we don't do anything now.
     948             :                  */
     949             : 
     950             :                 /*
     951             :                  * step 1c: now we've either done everything needed or we to
     952             :                  * unlock and do some waiting or I/O.
     953             :                  */
     954             : 
     955           0 :                 *npagesp = gotpages;            /* let caller know */
     956           0 :                 if (done)
     957           0 :                         return(VM_PAGER_OK);            /* bingo! */
     958             :                 else
     959           0 :                         return(VM_PAGER_UNLOCK);
     960             :         }
     961             : 
     962             :         /*
     963             :          * step 2: get non-resident or busy pages.
     964             :          * data structures are unlocked.
     965             :          *
     966             :          * XXX: because we can't do async I/O at this level we get things
     967             :          * page at a time (otherwise we'd chunk).   the VOP_READ() will do
     968             :          * async-read-ahead for us at a lower level.
     969             :          */
     970           0 :         for (lcv = 0, current_offset = offset;
     971           0 :                          lcv < *npagesp ; lcv++, current_offset += PAGE_SIZE) {
     972             : 
     973             :                 /* skip over pages we've already gotten or don't want */
     974             :                 /* skip over pages we don't _have_ to get */
     975           0 :                 if (pps[lcv] != NULL || (lcv != centeridx &&
     976           0 :                     (flags & PGO_ALLPAGES) == 0))
     977             :                         continue;
     978             : 
     979             :                 /*
     980             :                  * we have yet to locate the current page (pps[lcv]).   we first
     981             :                  * look for a page that is already at the current offset.   if
     982             :                  * we fine a page, we check to see if it is busy or released.
     983             :                  * if that is the case, then we sleep on the page until it is
     984             :                  * no longer busy or released and repeat the lookup.    if the
     985             :                  * page we found is neither busy nor released, then we busy it
     986             :                  * (so we own it) and plug it into pps[lcv].   this breaks the
     987             :                  * following while loop and indicates we are ready to move on
     988             :                  * to the next page in the "lcv" loop above.
     989             :                  *
     990             :                  * if we exit the while loop with pps[lcv] still set to NULL,
     991             :                  * then it means that we allocated a new busy/fake/clean page
     992             :                  * ptmp in the object and we need to do I/O to fill in the data.
     993             :                  */
     994           0 :                 while (pps[lcv] == NULL) {      /* top of "pps" while loop */
     995             :                         /* look for a current page */
     996           0 :                         ptmp = uvm_pagelookup(uobj, current_offset);
     997             : 
     998             :                         /* nope?   allocate one now (if we can) */
     999           0 :                         if (ptmp == NULL) {
    1000           0 :                                 ptmp = uvm_pagealloc(uobj, current_offset,
    1001             :                                     NULL, 0);
    1002             : 
    1003             :                                 /* out of RAM? */
    1004           0 :                                 if (ptmp == NULL) {
    1005           0 :                                         uvm_wait("uvn_getpage");
    1006             : 
    1007             :                                         /* goto top of pps while loop */
    1008           0 :                                         continue;
    1009             :                                 }
    1010             : 
    1011             :                                 /*
    1012             :                                  * got new page ready for I/O.  break pps
    1013             :                                  * while loop.  pps[lcv] is still NULL.
    1014             :                                  */
    1015             :                                 break;
    1016             :                         }
    1017             : 
    1018             :                         /* page is there, see if we need to wait on it */
    1019           0 :                         if ((ptmp->pg_flags & PG_BUSY) != 0) {
    1020           0 :                                 atomic_setbits_int(&ptmp->pg_flags, PG_WANTED);
    1021           0 :                                 UVM_WAIT(ptmp, FALSE, "uvn_get", 0);
    1022           0 :                                 continue;       /* goto top of pps while loop */
    1023             :                         }
    1024             : 
    1025             :                         /*
    1026             :                          * if we get here then the page has become resident
    1027             :                          * and unbusy between steps 1 and 2.  we busy it
    1028             :                          * now (so we own it) and set pps[lcv] (so that we
    1029             :                          * exit the while loop).
    1030             :                          */
    1031           0 :                         atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
    1032             :                         UVM_PAGE_OWN(ptmp, "uvn_get2");
    1033           0 :                         pps[lcv] = ptmp;
    1034             :                 }
    1035             : 
    1036             :                 /*
    1037             :                  * if we own the a valid page at the correct offset, pps[lcv]
    1038             :                  * will point to it.   nothing more to do except go to the
    1039             :                  * next page.
    1040             :                  */
    1041           0 :                 if (pps[lcv])
    1042             :                         continue;                       /* next lcv */
    1043             : 
    1044             :                 /*
    1045             :                  * we have a "fake/busy/clean" page that we just allocated.  do
    1046             :                  * I/O to fill it with valid data.
    1047             :                  */
    1048           0 :                 result = uvn_io((struct uvm_vnode *) uobj, &ptmp, 1,
    1049             :                     PGO_SYNCIO, UIO_READ);
    1050             : 
    1051             :                 /*
    1052             :                  * I/O done.  because we used syncio the result can not be
    1053             :                  * PEND or AGAIN.
    1054             :                  */
    1055           0 :                 if (result != VM_PAGER_OK) {
    1056           0 :                         if (ptmp->pg_flags & PG_WANTED)
    1057           0 :                                 wakeup(ptmp);
    1058             : 
    1059           0 :                         atomic_clearbits_int(&ptmp->pg_flags,
    1060             :                             PG_WANTED|PG_BUSY);
    1061             :                         UVM_PAGE_OWN(ptmp, NULL);
    1062           0 :                         uvm_lock_pageq();
    1063           0 :                         uvm_pagefree(ptmp);
    1064           0 :                         uvm_unlock_pageq();
    1065           0 :                         return(result);
    1066             :                 }
    1067             : 
    1068             :                 /*
    1069             :                  * we got the page!   clear the fake flag (indicates valid
    1070             :                  * data now in page) and plug into our result array.   note
    1071             :                  * that page is still busy.
    1072             :                  *
    1073             :                  * it is the callers job to:
    1074             :                  * => check if the page is released
    1075             :                  * => unbusy the page
    1076             :                  * => activate the page
    1077             :                  */
    1078             : 
    1079             :                 /* data is valid ... */
    1080           0 :                 atomic_clearbits_int(&ptmp->pg_flags, PG_FAKE);
    1081           0 :                 pmap_clear_modify(ptmp);                /* ... and clean */
    1082           0 :                 pps[lcv] = ptmp;
    1083             : 
    1084           0 :         }
    1085             : 
    1086           0 :         return (VM_PAGER_OK);
    1087           0 : }
    1088             : 
    1089             : /*
    1090             :  * uvn_io: do I/O to a vnode
    1091             :  *
    1092             :  * => prefer map unlocked (not required)
    1093             :  * => flags: PGO_SYNCIO -- use sync. I/O
    1094             :  * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync.
    1095             :  *      [thus we never do async i/o!  see iodone comment]
    1096             :  */
    1097             : 
    1098             : int
    1099           0 : uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages, int flags, int rw)
    1100             : {
    1101             :         struct vnode *vn;
    1102           0 :         struct uio uio;
    1103           0 :         struct iovec iov;
    1104             :         vaddr_t kva;
    1105             :         off_t file_offset;
    1106             :         int waitf, result, mapinflags;
    1107             :         size_t got, wanted;
    1108             :         int netunlocked = 0;
    1109             : 
    1110             :         /* init values */
    1111           0 :         waitf = (flags & PGO_SYNCIO) ? M_WAITOK : M_NOWAIT;
    1112           0 :         vn = uvn->u_vnode;
    1113           0 :         file_offset = pps[0]->offset;
    1114             : 
    1115             :         /* check for sync'ing I/O. */
    1116           0 :         while (uvn->u_flags & UVM_VNODE_IOSYNC) {
    1117           0 :                 if (waitf == M_NOWAIT) {
    1118           0 :                         return(VM_PAGER_AGAIN);
    1119             :                 }
    1120           0 :                 uvn->u_flags |= UVM_VNODE_IOSYNCWANTED;
    1121           0 :                 UVM_WAIT(&uvn->u_flags, FALSE, "uvn_iosync", 0);
    1122             :         }
    1123             : 
    1124             :         /* check size */
    1125           0 :         if (file_offset >= uvn->u_size) {
    1126           0 :                 return(VM_PAGER_BAD);
    1127             :         }
    1128             : 
    1129             :         /* first try and map the pages in (without waiting) */
    1130           0 :         mapinflags = (rw == UIO_READ) ?
    1131             :             UVMPAGER_MAPIN_READ : UVMPAGER_MAPIN_WRITE;
    1132             : 
    1133           0 :         kva = uvm_pagermapin(pps, npages, mapinflags);
    1134           0 :         if (kva == 0 && waitf == M_NOWAIT) {
    1135           0 :                 return(VM_PAGER_AGAIN);
    1136             :         }
    1137             : 
    1138             :         /*
    1139             :          * ok, now bump u_nio up.   at this point we are done with uvn
    1140             :          * and can unlock it.   if we still don't have a kva, try again
    1141             :          * (this time with sleep ok).
    1142             :          */
    1143           0 :         uvn->u_nio++;                        /* we have an I/O in progress! */
    1144           0 :         if (kva == 0)
    1145           0 :                 kva = uvm_pagermapin(pps, npages,
    1146           0 :                     mapinflags | UVMPAGER_MAPIN_WAITOK);
    1147             : 
    1148             :         /*
    1149             :          * ok, mapped in.  our pages are PG_BUSY so they are not going to
    1150             :          * get touched (so we can look at "offset" without having to lock
    1151             :          * the object).  set up for I/O.
    1152             :          */
    1153             :         /* fill out uio/iov */
    1154           0 :         iov.iov_base = (caddr_t) kva;
    1155           0 :         wanted = (size_t)npages << PAGE_SHIFT;
    1156           0 :         if (file_offset + wanted > uvn->u_size)
    1157           0 :                 wanted = uvn->u_size - file_offset;  /* XXX: needed? */
    1158           0 :         iov.iov_len = wanted;
    1159           0 :         uio.uio_iov = &iov;
    1160           0 :         uio.uio_iovcnt = 1;
    1161           0 :         uio.uio_offset = file_offset;
    1162           0 :         uio.uio_segflg = UIO_SYSSPACE;
    1163           0 :         uio.uio_rw = rw;
    1164           0 :         uio.uio_resid = wanted;
    1165           0 :         uio.uio_procp = curproc;
    1166             : 
    1167             :         /*
    1168             :          * This process may already have the NET_LOCK(), if we
    1169             :          * faulted in copyin() or copyout() in the network stack.
    1170             :          */
    1171           0 :         if (rw_status(&netlock) == RW_WRITE) {
    1172           0 :                 NET_UNLOCK();
    1173             :                 netunlocked = 1;
    1174           0 :         }
    1175             : 
    1176             :         /* do the I/O!  (XXX: curproc?) */
    1177             :         /*
    1178             :          * This process may already have this vnode locked, if we faulted in
    1179             :          * copyin() or copyout() on a region backed by this vnode
    1180             :          * while doing I/O to the vnode.  If this is the case, don't
    1181             :          * panic.. instead, return the error to the user.
    1182             :          *
    1183             :          * XXX this is a stopgap to prevent a panic.
    1184             :          * Ideally, this kind of operation *should* work.
    1185             :          */
    1186             :         result = 0;
    1187           0 :         if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
    1188           0 :                 result = vn_lock(vn, LK_EXCLUSIVE | LK_RECURSEFAIL);
    1189             : 
    1190           0 :         if (result == 0) {
    1191             :                 /* NOTE: vnode now locked! */
    1192           0 :                 if (rw == UIO_READ)
    1193           0 :                         result = VOP_READ(vn, &uio, 0, curproc->p_ucred);
    1194             :                 else
    1195           0 :                         result = VOP_WRITE(vn, &uio,
    1196           0 :                             (flags & PGO_PDFREECLUST) ? IO_NOCACHE : 0,
    1197           0 :                             curproc->p_ucred);
    1198             : 
    1199           0 :                 if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
    1200           0 :                         VOP_UNLOCK(vn);
    1201             : 
    1202             :         }
    1203             : 
    1204           0 :         if (netunlocked)
    1205           0 :                 NET_LOCK();
    1206             : 
    1207             : 
    1208             :         /* NOTE: vnode now unlocked (unless vnislocked) */
    1209             :         /*
    1210             :          * result == unix style errno (0 == OK!)
    1211             :          *
    1212             :          * zero out rest of buffer (if needed)
    1213             :          */
    1214           0 :         if (result == 0) {
    1215           0 :                 got = wanted - uio.uio_resid;
    1216             : 
    1217           0 :                 if (wanted && got == 0) {
    1218             :                         result = EIO;           /* XXX: error? */
    1219           0 :                 } else if (got < PAGE_SIZE * npages && rw == UIO_READ) {
    1220           0 :                         memset((void *) (kva + got), 0,
    1221             :                                ((size_t)npages << PAGE_SHIFT) - got);
    1222           0 :                 }
    1223             :         }
    1224             : 
    1225             :         /* now remove pager mapping */
    1226           0 :         uvm_pagermapout(kva, npages);
    1227             : 
    1228             :         /* now clean up the object (i.e. drop I/O count) */
    1229           0 :         uvn->u_nio--;                        /* I/O DONE! */
    1230           0 :         if ((uvn->u_flags & UVM_VNODE_IOSYNC) != 0 && uvn->u_nio == 0) {
    1231           0 :                 wakeup(&uvn->u_nio);
    1232           0 :         }
    1233             : 
    1234           0 :         if (result == 0) {
    1235           0 :                 return(VM_PAGER_OK);
    1236             :         } else {
    1237           0 :                 while (rebooting)
    1238           0 :                         tsleep(&rebooting, PVM, "uvndead", 0);
    1239           0 :                 return(VM_PAGER_ERROR);
    1240             :         }
    1241           0 : }
    1242             : 
    1243             : /*
    1244             :  * uvm_vnp_uncache: disable "persisting" in a vnode... when last reference
    1245             :  * is gone we will kill the object (flushing dirty pages back to the vnode
    1246             :  * if needed).
    1247             :  *
    1248             :  * => returns TRUE if there was no uvm_object attached or if there was
    1249             :  *      one and we killed it [i.e. if there is no active uvn]
    1250             :  * => called with the vnode VOP_LOCK'd [we will unlock it for I/O, if
    1251             :  *      needed]
    1252             :  *
    1253             :  * => XXX: given that we now kill uvn's when a vnode is recycled (without
    1254             :  *      having to hold a reference on the vnode) and given a working
    1255             :  *      uvm_vnp_sync(), how does that effect the need for this function?
    1256             :  *      [XXXCDC: seems like it can die?]
    1257             :  *
    1258             :  * => XXX: this function should DIE once we merge the VM and buffer
    1259             :  *      cache.
    1260             :  *
    1261             :  * research shows that this is called in the following places:
    1262             :  * ext2fs_truncate, ffs_truncate, detrunc[msdosfs]: called when vnode
    1263             :  *      changes sizes
    1264             :  * ext2fs_write, WRITE [ufs_readwrite], msdosfs_write: called when we
    1265             :  *      are written to
    1266             :  * ex2fs_chmod, ufs_chmod: called if VTEXT vnode and the sticky bit
    1267             :  *      is off
    1268             :  * ffs_realloccg: when we can't extend the current block and have
    1269             :  *      to allocate a new one we call this [XXX: why?]
    1270             :  * nfsrv_rename, rename_files: called when the target filename is there
    1271             :  *      and we want to remove it
    1272             :  * nfsrv_remove, sys_unlink: called on file we are removing
    1273             :  * nfsrv_access: if VTEXT and we want WRITE access and we don't uncache
    1274             :  *      then return "text busy"
    1275             :  * nfs_open: seems to uncache any file opened with nfs
    1276             :  * vn_writechk: if VTEXT vnode and can't uncache return "text busy"
    1277             :  * fusefs_open: uncaches any file that is opened
    1278             :  * fusefs_write: uncaches on every write
    1279             :  */
    1280             : 
    1281             : int
    1282           0 : uvm_vnp_uncache(struct vnode *vp)
    1283             : {
    1284           0 :         struct uvm_vnode *uvn = vp->v_uvm;
    1285             : 
    1286             :         /* lock uvn part of the vnode and check if we need to do anything */
    1287             : 
    1288           0 :         if ((uvn->u_flags & UVM_VNODE_VALID) == 0 ||
    1289           0 :                         (uvn->u_flags & UVM_VNODE_BLOCKED) != 0) {
    1290           0 :                 return(TRUE);
    1291             :         }
    1292             : 
    1293             :         /*
    1294             :          * we have a valid, non-blocked uvn.   clear persist flag.
    1295             :          * if uvn is currently active we can return now.
    1296             :          */
    1297           0 :         uvn->u_flags &= ~UVM_VNODE_CANPERSIST;
    1298           0 :         if (uvn->u_obj.uo_refs) {
    1299           0 :                 return(FALSE);
    1300             :         }
    1301             : 
    1302             :         /*
    1303             :          * uvn is currently persisting!   we have to gain a reference to
    1304             :          * it so that we can call uvn_detach to kill the uvn.
    1305             :          */
    1306           0 :         vref(vp);                       /* seems ok, even with VOP_LOCK */
    1307           0 :         uvn->u_obj.uo_refs++;                /* value is now 1 */
    1308             : 
    1309             : #ifdef VFSLCKDEBUG
    1310             :         /*
    1311             :          * carry over sanity check from old vnode pager: the vnode should
    1312             :          * be VOP_LOCK'd, and we confirm it here.
    1313             :          */
    1314             :         if ((vp->v_flag & VLOCKSWORK) && !VOP_ISLOCKED(vp))
    1315             :                 panic("uvm_vnp_uncache: vnode not locked!");
    1316             : #endif
    1317             : 
    1318             :         /*
    1319             :          * now drop our reference to the vnode.   if we have the sole
    1320             :          * reference to the vnode then this will cause it to die [as we
    1321             :          * just cleared the persist flag].   we have to unlock the vnode
    1322             :          * while we are doing this as it may trigger I/O.
    1323             :          *
    1324             :          * XXX: it might be possible for uvn to get reclaimed while we are
    1325             :          * unlocked causing us to return TRUE when we should not.   we ignore
    1326             :          * this as a false-positive return value doesn't hurt us.
    1327             :          */
    1328           0 :         VOP_UNLOCK(vp);
    1329           0 :         uvn_detach(&uvn->u_obj);
    1330           0 :         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    1331             : 
    1332           0 :         return(TRUE);
    1333           0 : }
    1334             : 
    1335             : /*
    1336             :  * uvm_vnp_setsize: grow or shrink a vnode uvn
    1337             :  *
    1338             :  * grow   => just update size value
    1339             :  * shrink => toss un-needed pages
    1340             :  *
    1341             :  * => we assume that the caller has a reference of some sort to the
    1342             :  *      vnode in question so that it will not be yanked out from under
    1343             :  *      us.
    1344             :  *
    1345             :  * called from:
    1346             :  *  => truncate fns (ext2fs_truncate, ffs_truncate, detrunc[msdos],
    1347             :  *     fusefs_setattr)
    1348             :  *  => "write" fns (ext2fs_write, WRITE [ufs/ufs], msdosfs_write, nfs_write
    1349             :  *     fusefs_write)
    1350             :  *  => ffs_balloc [XXX: why? doesn't WRITE handle?]
    1351             :  *  => NFS: nfs_loadattrcache, nfs_getattrcache, nfs_setattr
    1352             :  *  => union fs: union_newsize
    1353             :  */
    1354             : 
    1355             : void
    1356           0 : uvm_vnp_setsize(struct vnode *vp, off_t newsize)
    1357             : {
    1358           0 :         struct uvm_vnode *uvn = vp->v_uvm;
    1359             : 
    1360             :         /* lock uvn and check for valid object, and if valid: do it! */
    1361           0 :         if (uvn->u_flags & UVM_VNODE_VALID) {
    1362             : 
    1363             :                 /*
    1364             :                  * now check if the size has changed: if we shrink we had better
    1365             :                  * toss some pages...
    1366             :                  */
    1367             : 
    1368           0 :                 if (uvn->u_size > newsize) {
    1369           0 :                         (void)uvn_flush(&uvn->u_obj, newsize,
    1370             :                             uvn->u_size, PGO_FREE);
    1371           0 :                 }
    1372           0 :                 uvn->u_size = newsize;
    1373           0 :         }
    1374           0 : }
    1375             : 
    1376             : /*
    1377             :  * uvm_vnp_sync: flush all dirty VM pages back to their backing vnodes.
    1378             :  *
    1379             :  * => called from sys_sync with no VM structures locked
    1380             :  * => only one process can do a sync at a time (because the uvn
    1381             :  *    structure only has one queue for sync'ing).  we ensure this
    1382             :  *    by holding the uvn_sync_lock while the sync is in progress.
    1383             :  *    other processes attempting a sync will sleep on this lock
    1384             :  *    until we are done.
    1385             :  */
    1386             : void
    1387           0 : uvm_vnp_sync(struct mount *mp)
    1388             : {
    1389             :         struct uvm_vnode *uvn;
    1390             :         struct vnode *vp;
    1391             : 
    1392             :         /*
    1393             :          * step 1: ensure we are only ones using the uvn_sync_q by locking
    1394             :          * our lock...
    1395             :          */
    1396           0 :         rw_enter_write(&uvn_sync_lock);
    1397             : 
    1398             :         /*
    1399             :          * step 2: build up a simpleq of uvns of interest based on the
    1400             :          * write list.   we gain a reference to uvns of interest. 
    1401             :          */
    1402           0 :         SIMPLEQ_INIT(&uvn_sync_q);
    1403           0 :         LIST_FOREACH(uvn, &uvn_wlist, u_wlist) {
    1404           0 :                 vp = uvn->u_vnode;
    1405           0 :                 if (mp && vp->v_mount != mp)
    1406             :                         continue;
    1407             : 
    1408             :                 /*
    1409             :                  * If the vnode is "blocked" it means it must be dying, which
    1410             :                  * in turn means its in the process of being flushed out so
    1411             :                  * we can safely skip it.
    1412             :                  *
    1413             :                  * note that uvn must already be valid because we found it on
    1414             :                  * the wlist (this also means it can't be ALOCK'd).
    1415             :                  */
    1416           0 :                 if ((uvn->u_flags & UVM_VNODE_BLOCKED) != 0)
    1417             :                         continue;
    1418             : 
    1419             :                 /*
    1420             :                  * gain reference.   watch out for persisting uvns (need to
    1421             :                  * regain vnode REF).
    1422             :                  */
    1423           0 :                 if (uvn->u_obj.uo_refs == 0)
    1424           0 :                         vref(vp);
    1425           0 :                 uvn->u_obj.uo_refs++;
    1426             : 
    1427           0 :                 SIMPLEQ_INSERT_HEAD(&uvn_sync_q, uvn, u_syncq);
    1428           0 :         }
    1429             : 
    1430             :         /* step 3: we now have a list of uvn's that may need cleaning. */
    1431           0 :         SIMPLEQ_FOREACH(uvn, &uvn_sync_q, u_syncq) {
    1432             : #ifdef DEBUG
    1433             :                 if (uvn->u_flags & UVM_VNODE_DYING) {
    1434             :                         printf("uvm_vnp_sync: dying vnode on sync list\n");
    1435             :                 }
    1436             : #endif
    1437           0 :                 uvn_flush(&uvn->u_obj, 0, 0, PGO_CLEANIT|PGO_ALLPAGES|PGO_DOACTCLUST);
    1438             : 
    1439             :                 /*
    1440             :                  * if we have the only reference and we just cleaned the uvn,
    1441             :                  * then we can pull it out of the UVM_VNODE_WRITEABLE state
    1442             :                  * thus allowing us to avoid thinking about flushing it again
    1443             :                  * on later sync ops.
    1444             :                  */
    1445           0 :                 if (uvn->u_obj.uo_refs == 1 &&
    1446           0 :                     (uvn->u_flags & UVM_VNODE_WRITEABLE)) {
    1447           0 :                         LIST_REMOVE(uvn, u_wlist);
    1448           0 :                         uvn->u_flags &= ~UVM_VNODE_WRITEABLE;
    1449           0 :                 }
    1450             : 
    1451             :                 /* now drop our reference to the uvn */
    1452           0 :                 uvn_detach(&uvn->u_obj);
    1453             :         }
    1454             : 
    1455           0 :         rw_exit_write(&uvn_sync_lock);
    1456           0 : }

Generated by: LCOV version 1.13