LCOV - code coverage report
Current view: top level - uvm - uvm_swap.c (source / functions) Hit Total Coverage
Test: 6.4 Lines: 0 697 0.0 %
Date: 2018-10-19 03:25:38 Functions: 0 29 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*      $OpenBSD: uvm_swap.c,v 1.143 2018/02/19 08:59:53 mpi Exp $      */
       2             : /*      $NetBSD: uvm_swap.c,v 1.40 2000/11/17 11:39:39 mrg Exp $        */
       3             : 
       4             : /*
       5             :  * Copyright (c) 1995, 1996, 1997 Matthew R. Green
       6             :  * All rights reserved.
       7             :  *
       8             :  * Redistribution and use in source and binary forms, with or without
       9             :  * modification, are permitted provided that the following conditions
      10             :  * are met:
      11             :  * 1. Redistributions of source code must retain the above copyright
      12             :  *    notice, this list of conditions and the following disclaimer.
      13             :  * 2. Redistributions in binary form must reproduce the above copyright
      14             :  *    notice, this list of conditions and the following disclaimer in the
      15             :  *    documentation and/or other materials provided with the distribution.
      16             :  * 3. The name of the author may not be used to endorse or promote products
      17             :  *    derived from this software without specific prior written permission.
      18             :  *
      19             :  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
      20             :  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
      21             :  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
      22             :  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
      23             :  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
      24             :  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
      25             :  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
      26             :  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
      27             :  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
      28             :  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
      29             :  * SUCH DAMAGE.
      30             :  *
      31             :  * from: NetBSD: vm_swap.c,v 1.52 1997/12/02 13:47:37 pk Exp
      32             :  * from: Id: uvm_swap.c,v 1.1.2.42 1998/02/02 20:38:06 chuck Exp
      33             :  */
      34             : 
      35             : #include <sys/param.h>
      36             : #include <sys/systm.h>
      37             : #include <sys/buf.h>
      38             : #include <sys/conf.h>
      39             : #include <sys/proc.h>
      40             : #include <sys/namei.h>
      41             : #include <sys/disklabel.h>
      42             : #include <sys/errno.h>
      43             : #include <sys/kernel.h>
      44             : #include <sys/malloc.h>
      45             : #include <sys/vnode.h>
      46             : #include <sys/fcntl.h>
      47             : #include <sys/extent.h>
      48             : #include <sys/mount.h>
      49             : #include <sys/pool.h>
      50             : #include <sys/syscallargs.h>
      51             : #include <sys/swap.h>
      52             : #include <sys/disk.h>
      53             : #include <sys/task.h>
      54             : #include <sys/pledge.h>
      55             : #if defined(NFSCLIENT)
      56             : #include <sys/socket.h>
      57             : #include <sys/domain.h>
      58             : #include <netinet/in.h>
      59             : #include <nfs/nfsproto.h>
      60             : #include <nfs/nfsdiskless.h>
      61             : #endif
      62             : 
      63             : #include <uvm/uvm.h>
      64             : #ifdef UVM_SWAP_ENCRYPT
      65             : #include <uvm/uvm_swap_encrypt.h>
      66             : #endif
      67             : 
      68             : #include <sys/specdev.h>
      69             : 
      70             : #include "vnd.h"
      71             : 
      72             : /*
      73             :  * uvm_swap.c: manage configuration and i/o to swap space.
      74             :  */
      75             : 
      76             : /*
      77             :  * swap space is managed in the following way:
      78             :  *
      79             :  * each swap partition or file is described by a "swapdev" structure.
      80             :  * each "swapdev" structure contains a "swapent" structure which contains
      81             :  * information that is passed up to the user (via system calls).
      82             :  *
      83             :  * each swap partition is assigned a "priority" (int) which controls
      84             :  * swap partition usage.
      85             :  *
      86             :  * the system maintains a global data structure describing all swap
      87             :  * partitions/files.   there is a sorted LIST of "swappri" structures
      88             :  * which describe "swapdev"'s at that priority.   this LIST is headed
      89             :  * by the "swap_priority" global var.    each "swappri" contains a 
      90             :  * TAILQ of "swapdev" structures at that priority.
      91             :  *
      92             :  * locking:
      93             :  *  - swap_syscall_lock (sleep lock): this lock serializes the swapctl
      94             :  *    system call and prevents the swap priority list from changing
      95             :  *    while we are in the middle of a system call (e.g. SWAP_STATS).
      96             :  *
      97             :  * each swap device has the following info:
      98             :  *  - swap device in use (could be disabled, preventing future use)
      99             :  *  - swap enabled (allows new allocations on swap)
     100             :  *  - map info in /dev/drum
     101             :  *  - vnode pointer
     102             :  * for swap files only:
     103             :  *  - block size
     104             :  *  - max byte count in buffer
     105             :  *  - buffer
     106             :  *  - credentials to use when doing i/o to file
     107             :  *
     108             :  * userland controls and configures swap with the swapctl(2) system call.
     109             :  * the sys_swapctl performs the following operations:
     110             :  *  [1] SWAP_NSWAP: returns the number of swap devices currently configured
     111             :  *  [2] SWAP_STATS: given a pointer to an array of swapent structures 
     112             :  *      (passed in via "arg") of a size passed in via "misc" ... we load
     113             :  *      the current swap config into the array.
     114             :  *  [3] SWAP_ON: given a pathname in arg (could be device or file) and a
     115             :  *      priority in "misc", start swapping on it.
     116             :  *  [4] SWAP_OFF: as SWAP_ON, but stops swapping to a device
     117             :  *  [5] SWAP_CTL: changes the priority of a swap device (new priority in
     118             :  *      "misc")
     119             :  */
     120             : 
     121             : /*
     122             :  * swapdev: describes a single swap partition/file
     123             :  *
     124             :  * note the following should be true:
     125             :  * swd_inuse <= swd_nblks  [number of blocks in use is <= total blocks]
     126             :  * swd_nblks <= swd_mapsize [because mapsize includes disklabel]
     127             :  */
     128             : struct swapdev {
     129             :         struct swapent  swd_se;
     130             : #define swd_dev         swd_se.se_dev           /* device id */
     131             : #define swd_flags       swd_se.se_flags         /* flags:inuse/enable/fake */
     132             : #define swd_priority    swd_se.se_priority      /* our priority */
     133             : #define swd_inuse       swd_se.se_inuse         /* blocks used */
     134             : #define swd_nblks       swd_se.se_nblks         /* total blocks */
     135             :         char                    *swd_path;      /* saved pathname of device */
     136             :         int                     swd_pathlen;    /* length of pathname */
     137             :         int                     swd_npages;     /* #pages we can use */
     138             :         int                     swd_npginuse;   /* #pages in use */
     139             :         int                     swd_npgbad;     /* #pages bad */
     140             :         int                     swd_drumoffset; /* page0 offset in drum */
     141             :         int                     swd_drumsize;   /* #pages in drum */
     142             :         struct extent           *swd_ex;        /* extent for this swapdev */
     143             :         char                    swd_exname[12]; /* name of extent above */
     144             :         struct vnode            *swd_vp;        /* backing vnode */
     145             :         TAILQ_ENTRY(swapdev)    swd_next;       /* priority tailq */
     146             : 
     147             :         int                     swd_bsize;      /* blocksize (bytes) */
     148             :         int                     swd_maxactive;  /* max active i/o reqs */
     149             :         int                     swd_active;     /* # of active i/o reqs */
     150             :         struct bufq             swd_bufq;
     151             :         struct ucred            *swd_cred;      /* cred for file access */
     152             : #ifdef UVM_SWAP_ENCRYPT
     153             : #define SWD_KEY_SHIFT           7               /* One key per 0.5 MByte */
     154             : #define SWD_KEY(x,y)            &((x)->swd_keys[((y) - (x)->swd_drumoffset) >> SWD_KEY_SHIFT])
     155             : #define SWD_KEY_SIZE(x) (((x) + (1 << SWD_KEY_SHIFT) - 1) >> SWD_KEY_SHIFT)
     156             : 
     157             : #define SWD_DCRYPT_SHIFT        5
     158             : #define SWD_DCRYPT_BITS         32
     159             : #define SWD_DCRYPT_MASK         (SWD_DCRYPT_BITS - 1)
     160             : #define SWD_DCRYPT_OFF(x)       ((x) >> SWD_DCRYPT_SHIFT)
     161             : #define SWD_DCRYPT_BIT(x)       ((x) & SWD_DCRYPT_MASK)
     162             : #define SWD_DCRYPT_SIZE(x)      (SWD_DCRYPT_OFF((x) + SWD_DCRYPT_MASK) * sizeof(u_int32_t))
     163             :         u_int32_t               *swd_decrypt;   /* bitmap for decryption */
     164             :         struct swap_key         *swd_keys;      /* keys for different parts */
     165             : #endif
     166             : };
     167             : 
     168             : /*
     169             :  * swap device priority entry; the list is kept sorted on `spi_priority'.
     170             :  */
     171             : struct swappri {
     172             :         int                     spi_priority;     /* priority */
     173             :         TAILQ_HEAD(spi_swapdev, swapdev)        spi_swapdev;
     174             :         /* tailq of swapdevs at this priority */
     175             :         LIST_ENTRY(swappri)     spi_swappri;      /* global list of pri's */
     176             : };
     177             : 
     178             : /*
     179             :  * The following two structures are used to keep track of data transfers
     180             :  * on swap devices associated with regular files.
     181             :  * NOTE: this code is more or less a copy of vnd.c; we use the same
     182             :  * structure names here to ease porting..
     183             :  */
     184             : struct vndxfer {
     185             :         struct buf      *vx_bp;         /* Pointer to parent buffer */
     186             :         struct swapdev  *vx_sdp;
     187             :         int             vx_error;
     188             :         int             vx_pending;     /* # of pending aux buffers */
     189             :         int             vx_flags;
     190             : #define VX_BUSY         1
     191             : #define VX_DEAD         2
     192             : };
     193             : 
     194             : struct vndbuf {
     195             :         struct buf      vb_buf;
     196             :         struct vndxfer  *vb_vnx;
     197             :         struct task     vb_task;
     198             : };
     199             : 
     200             : /*
     201             :  * We keep a of pool vndbuf's and vndxfer structures.
     202             :  */
     203             : struct pool vndxfer_pool;
     204             : struct pool vndbuf_pool;
     205             : 
     206             : 
     207             : /*
     208             :  * local variables
     209             :  */
     210             : struct extent *swapmap;         /* controls the mapping of /dev/drum */
     211             : 
     212             : /* list of all active swap devices [by priority] */
     213             : LIST_HEAD(swap_priority, swappri);
     214             : struct swap_priority swap_priority;
     215             : 
     216             : /* locks */
     217             : struct rwlock swap_syscall_lock = RWLOCK_INITIALIZER("swplk");
     218             : 
     219             : /*
     220             :  * prototypes
     221             :  */
     222             : void             swapdrum_add(struct swapdev *, int);
     223             : struct swapdev  *swapdrum_getsdp(int);
     224             : 
     225             : struct swapdev  *swaplist_find(struct vnode *, int);
     226             : void             swaplist_insert(struct swapdev *, 
     227             :                                      struct swappri *, int);
     228             : void             swaplist_trim(void);
     229             : 
     230             : int swap_on(struct proc *, struct swapdev *);
     231             : int swap_off(struct proc *, struct swapdev *);
     232             : 
     233             : void sw_reg_strategy(struct swapdev *, struct buf *, int);
     234             : void sw_reg_iodone(struct buf *);
     235             : void sw_reg_iodone_internal(void *);
     236             : void sw_reg_start(struct swapdev *);
     237             : 
     238             : int uvm_swap_io(struct vm_page **, int, int, int);
     239             : 
     240             : void swapmount(void);
     241             : boolean_t uvm_swap_allocpages(struct vm_page **, int);
     242             : 
     243             : #ifdef UVM_SWAP_ENCRYPT
     244             : /* for swap encrypt */
     245             : void uvm_swap_markdecrypt(struct swapdev *, int, int, int);
     246             : boolean_t uvm_swap_needdecrypt(struct swapdev *, int);
     247             : void uvm_swap_initcrypt(struct swapdev *, int);
     248             : #endif
     249             : 
     250             : /*
     251             :  * uvm_swap_init: init the swap system data structures and locks
     252             :  *
     253             :  * => called at boot time from init_main.c after the filesystems
     254             :  *      are brought up (which happens after uvm_init())
     255             :  */
     256             : void
     257           0 : uvm_swap_init(void)
     258             : {
     259             :         /*
     260             :          * first, init the swap list, its counter, and its lock.
     261             :          * then get a handle on the vnode for /dev/drum by using
     262             :          * the its dev_t number ("swapdev", from MD conf.c).
     263             :          */
     264           0 :         LIST_INIT(&swap_priority);
     265           0 :         uvmexp.nswapdev = 0;
     266             : 
     267           0 :         if (!swapdev_vp && bdevvp(swapdev, &swapdev_vp))
     268           0 :                 panic("uvm_swap_init: can't get vnode for swap device");
     269             : 
     270             :         /*
     271             :          * create swap block extent to map /dev/drum. The extent spans
     272             :          * 1 to INT_MAX allows 2 gigablocks of swap space.  Note that
     273             :          * block 0 is reserved (used to indicate an allocation failure,
     274             :          * or no allocation).
     275             :          */
     276           0 :         swapmap = extent_create("swapmap", 1, INT_MAX,
     277             :                                 M_VMSWAP, 0, 0, EX_NOWAIT);
     278           0 :         if (swapmap == 0)
     279           0 :                 panic("uvm_swap_init: extent_create failed");
     280             : 
     281             :         /* allocate pools for structures used for swapping to files. */
     282           0 :         pool_init(&vndxfer_pool, sizeof(struct vndxfer), 0, IPL_BIO, 0,
     283             :             "swp vnx", NULL);
     284           0 :         pool_init(&vndbuf_pool, sizeof(struct vndbuf), 0, IPL_BIO, 0,
     285             :             "swp vnd", NULL);
     286             : 
     287             :         /* Setup the initial swap partition */
     288           0 :         swapmount();
     289           0 : }
     290             : 
     291             : #ifdef UVM_SWAP_ENCRYPT
     292             : void
     293           0 : uvm_swap_initcrypt_all(void)
     294             : {
     295             :         struct swapdev *sdp;
     296             :         struct swappri *spp;
     297             :         int npages;
     298             : 
     299             : 
     300           0 :         LIST_FOREACH(spp, &swap_priority, spi_swappri) {
     301           0 :                 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) {
     302           0 :                         if (sdp->swd_decrypt == NULL) {
     303           0 :                                 npages = dbtob((uint64_t)sdp->swd_nblks) >>
     304             :                                     PAGE_SHIFT;
     305           0 :                                 uvm_swap_initcrypt(sdp, npages);
     306           0 :                         }
     307             :                 }
     308             :         }
     309           0 : }
     310             : 
     311             : void
     312           0 : uvm_swap_initcrypt(struct swapdev *sdp, int npages)
     313             : {
     314             :         /*
     315             :          * keep information if a page needs to be decrypted when we get it
     316             :          * from the swap device.
     317             :          * We cannot chance a malloc later, if we are doing ASYNC puts,
     318             :          * we may not call malloc with M_WAITOK.  This consumes only
     319             :          * 8KB memory for a 256MB swap partition.
     320             :          */
     321           0 :         sdp->swd_decrypt = malloc(SWD_DCRYPT_SIZE(npages), M_VMSWAP,
     322             :             M_WAITOK|M_ZERO);
     323           0 :         sdp->swd_keys = mallocarray(SWD_KEY_SIZE(npages),
     324             :             sizeof(struct swap_key), M_VMSWAP, M_WAITOK|M_ZERO);
     325           0 : }
     326             : 
     327             : #endif /* UVM_SWAP_ENCRYPT */
     328             : 
     329             : boolean_t
     330           0 : uvm_swap_allocpages(struct vm_page **pps, int npages)
     331             : {
     332           0 :         struct pglist   pgl;
     333             :         int i;
     334             :         boolean_t fail;
     335             : 
     336             :         /* Estimate if we will succeed */
     337           0 :         uvm_lock_fpageq();
     338             : 
     339           0 :         fail = uvmexp.free - npages < uvmexp.reserve_kernel;
     340             : 
     341           0 :         uvm_unlock_fpageq();
     342             : 
     343           0 :         if (fail)
     344           0 :                 return FALSE;
     345             : 
     346           0 :         TAILQ_INIT(&pgl);
     347           0 :         if (uvm_pglistalloc(npages * PAGE_SIZE, dma_constraint.ucr_low,
     348           0 :             dma_constraint.ucr_high, 0, 0, &pgl, npages, UVM_PLA_NOWAIT))
     349           0 :                 return FALSE;
     350             : 
     351           0 :         for (i = 0; i < npages; i++) {
     352           0 :                 pps[i] = TAILQ_FIRST(&pgl);
     353             :                 /* *sigh* */
     354           0 :                 atomic_setbits_int(&pps[i]->pg_flags, PG_BUSY);
     355           0 :                 TAILQ_REMOVE(&pgl, pps[i], pageq);
     356             :         }
     357             : 
     358           0 :         return TRUE;
     359           0 : }
     360             : 
     361             : void
     362           0 : uvm_swap_freepages(struct vm_page **pps, int npages)
     363             : {
     364             :         int i;
     365             : 
     366           0 :         uvm_lock_pageq();
     367           0 :         for (i = 0; i < npages; i++)
     368           0 :                 uvm_pagefree(pps[i]);
     369           0 :         uvm_unlock_pageq();
     370           0 : }
     371             : 
     372             : #ifdef UVM_SWAP_ENCRYPT
     373             : /*
     374             :  * Mark pages on the swap device for later decryption
     375             :  */
     376             : 
     377             : void
     378           0 : uvm_swap_markdecrypt(struct swapdev *sdp, int startslot, int npages,
     379             :     int decrypt)
     380             : {
     381             :         int pagestart, i;
     382             :         int off, bit;
     383             : 
     384           0 :         if (!sdp)
     385           0 :                 return;
     386             : 
     387           0 :         pagestart = startslot - sdp->swd_drumoffset;
     388           0 :         for (i = 0; i < npages; i++, pagestart++) {
     389           0 :                 off = SWD_DCRYPT_OFF(pagestart);
     390           0 :                 bit = SWD_DCRYPT_BIT(pagestart);
     391           0 :                 if (decrypt)
     392             :                         /* pages read need decryption */
     393           0 :                         sdp->swd_decrypt[off] |= 1 << bit;
     394             :                 else
     395             :                         /* pages read do not need decryption */
     396           0 :                         sdp->swd_decrypt[off] &= ~(1 << bit);
     397             :         }
     398           0 : }
     399             : 
     400             : /*
     401             :  * Check if the page that we got from disk needs to be decrypted
     402             :  */
     403             : 
     404             : boolean_t
     405           0 : uvm_swap_needdecrypt(struct swapdev *sdp, int off)
     406             : {
     407           0 :         if (!sdp)
     408           0 :                 return FALSE;
     409             : 
     410           0 :         off -= sdp->swd_drumoffset;
     411           0 :         return sdp->swd_decrypt[SWD_DCRYPT_OFF(off)] & (1 << SWD_DCRYPT_BIT(off)) ?
     412             :                 TRUE : FALSE;
     413           0 : }
     414             : 
     415             : void
     416           0 : uvm_swap_finicrypt_all(void)
     417             : {
     418             :         struct swapdev *sdp;
     419             :         struct swappri *spp;
     420             :         struct swap_key *key;
     421             :         unsigned int nkeys;
     422             : 
     423           0 :         LIST_FOREACH(spp, &swap_priority, spi_swappri) {
     424           0 :                 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) {
     425           0 :                         if (sdp->swd_decrypt == NULL)
     426             :                                 continue;
     427             : 
     428           0 :                         nkeys = dbtob((uint64_t)sdp->swd_nblks) >> PAGE_SHIFT;
     429           0 :                         key = sdp->swd_keys + (SWD_KEY_SIZE(nkeys) - 1);
     430           0 :                         do {
     431           0 :                                 if (key->refcount != 0)
     432           0 :                                         swap_key_delete(key);
     433           0 :                         } while (key-- != sdp->swd_keys);
     434             :                 }
     435             :         }
     436           0 : }
     437             : #endif /* UVM_SWAP_ENCRYPT */
     438             : 
     439             : /*
     440             :  * swaplist functions: functions that operate on the list of swap
     441             :  * devices on the system.
     442             :  */
     443             : 
     444             : /*
     445             :  * swaplist_insert: insert swap device "sdp" into the global list
     446             :  *
     447             :  * => caller must hold both swap_syscall_lock and uvm.swap_data_lock
     448             :  * => caller must provide a newly malloc'd swappri structure (we will
     449             :  *      FREE it if we don't need it... this it to prevent malloc blocking
     450             :  *      here while adding swap)
     451             :  */
     452             : void
     453           0 : swaplist_insert(struct swapdev *sdp, struct swappri *newspp, int priority)
     454             : {
     455             :         struct swappri *spp, *pspp;
     456             : 
     457             :         /*
     458             :          * find entry at or after which to insert the new device.
     459             :          */
     460           0 :         for (pspp = NULL, spp = LIST_FIRST(&swap_priority); spp != NULL;
     461           0 :              spp = LIST_NEXT(spp, spi_swappri)) {
     462           0 :                 if (priority <= spp->spi_priority)
     463             :                         break;
     464             :                 pspp = spp;
     465             :         }
     466             : 
     467             :         /*
     468             :          * new priority?
     469             :          */
     470           0 :         if (spp == NULL || spp->spi_priority != priority) {
     471             :                 spp = newspp;  /* use newspp! */
     472             : 
     473           0 :                 spp->spi_priority = priority;
     474           0 :                 TAILQ_INIT(&spp->spi_swapdev);
     475             : 
     476           0 :                 if (pspp)
     477           0 :                         LIST_INSERT_AFTER(pspp, spp, spi_swappri);
     478             :                 else
     479           0 :                         LIST_INSERT_HEAD(&swap_priority, spp, spi_swappri);
     480             :         } else {
     481             :                 /* we don't need a new priority structure, free it */
     482           0 :                 free(newspp, M_VMSWAP, sizeof(*newspp));
     483             :         }
     484             : 
     485             :         /*
     486             :          * priority found (or created).   now insert on the priority's
     487             :          * tailq list and bump the total number of swapdevs.
     488             :          */
     489           0 :         sdp->swd_priority = priority;
     490           0 :         TAILQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
     491           0 :         uvmexp.nswapdev++;
     492           0 : }
     493             : 
     494             : /*
     495             :  * swaplist_find: find and optionally remove a swap device from the
     496             :  *      global list.
     497             :  *
     498             :  * => caller must hold both swap_syscall_lock and uvm.swap_data_lock
     499             :  * => we return the swapdev we found (and removed)
     500             :  */
     501             : struct swapdev *
     502           0 : swaplist_find(struct vnode *vp, boolean_t remove)
     503             : {
     504             :         struct swapdev *sdp;
     505             :         struct swappri *spp;
     506             : 
     507             :         /*
     508             :          * search the lists for the requested vp
     509             :          */
     510           0 :         LIST_FOREACH(spp, &swap_priority, spi_swappri) {
     511           0 :                 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) {
     512           0 :                         if (sdp->swd_vp != vp)
     513             :                                 continue;
     514           0 :                         if (remove) {
     515           0 :                                 TAILQ_REMOVE(&spp->spi_swapdev, sdp, swd_next);
     516           0 :                                 uvmexp.nswapdev--;
     517           0 :                         }
     518           0 :                         return (sdp);
     519             :                 }
     520             :         }
     521           0 :         return (NULL);
     522           0 : }
     523             : 
     524             : 
     525             : /*
     526             :  * swaplist_trim: scan priority list for empty priority entries and kill
     527             :  *      them.
     528             :  *
     529             :  * => caller must hold both swap_syscall_lock and uvm.swap_data_lock
     530             :  */
     531             : void
     532           0 : swaplist_trim(void)
     533             : {
     534             :         struct swappri *spp, *nextspp;
     535             : 
     536           0 :         LIST_FOREACH_SAFE(spp, &swap_priority, spi_swappri, nextspp) {
     537           0 :                 if (!TAILQ_EMPTY(&spp->spi_swapdev))
     538             :                         continue;
     539           0 :                 LIST_REMOVE(spp, spi_swappri);
     540           0 :                 free(spp, M_VMSWAP, sizeof(*spp));
     541           0 :         }
     542           0 : }
     543             : 
     544             : /*
     545             :  * swapdrum_add: add a "swapdev"'s blocks into /dev/drum's area.
     546             :  *
     547             :  * => caller must hold swap_syscall_lock
     548             :  * => uvm.swap_data_lock should be unlocked (we may sleep)
     549             :  */
     550             : void
     551           0 : swapdrum_add(struct swapdev *sdp, int npages)
     552             : {
     553           0 :         u_long result;
     554             : 
     555           0 :         if (extent_alloc(swapmap, npages, EX_NOALIGN, 0, EX_NOBOUNDARY,
     556             :             EX_WAITOK, &result))
     557           0 :                 panic("swapdrum_add");
     558             : 
     559           0 :         sdp->swd_drumoffset = result;
     560           0 :         sdp->swd_drumsize = npages;
     561           0 : }
     562             : 
     563             : /*
     564             :  * swapdrum_getsdp: given a page offset in /dev/drum, convert it back
     565             :  *      to the "swapdev" that maps that section of the drum.
     566             :  *
     567             :  * => each swapdev takes one big contig chunk of the drum
     568             :  * => caller must hold uvm.swap_data_lock
     569             :  */
     570             : struct swapdev *
     571           0 : swapdrum_getsdp(int pgno)
     572             : {
     573             :         struct swapdev *sdp;
     574             :         struct swappri *spp;
     575             : 
     576           0 :         LIST_FOREACH(spp, &swap_priority, spi_swappri) {
     577           0 :                 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) {
     578           0 :                         if (pgno >= sdp->swd_drumoffset &&
     579           0 :                             pgno < (sdp->swd_drumoffset + sdp->swd_drumsize)) {
     580           0 :                                 return sdp;
     581             :                         }
     582             :                 }
     583             :         }
     584           0 :         return NULL;
     585           0 : }
     586             : 
     587             : 
     588             : /*
     589             :  * sys_swapctl: main entry point for swapctl(2) system call
     590             :  *      [with two helper functions: swap_on and swap_off]
     591             :  */
     592             : int
     593           0 : sys_swapctl(struct proc *p, void *v, register_t *retval)
     594             : {
     595             :         struct sys_swapctl_args /* {
     596             :                 syscallarg(int) cmd;
     597             :                 syscallarg(void *) arg;
     598             :                 syscallarg(int) misc;
     599           0 :         } */ *uap = (struct sys_swapctl_args *)v;
     600             :         struct vnode *vp;
     601           0 :         struct nameidata nd;
     602             :         struct swappri *spp;
     603             :         struct swapdev *sdp;
     604             :         struct swapent *sep;
     605           0 :         char    userpath[MAXPATHLEN];
     606           0 :         size_t  len;
     607             :         int     count, error, misc;
     608             :         int     priority;
     609             : 
     610           0 :         misc = SCARG(uap, misc);
     611             : 
     612             :         /*
     613             :          * ensure serialized syscall access by grabbing the swap_syscall_lock
     614             :          */
     615           0 :         rw_enter_write(&swap_syscall_lock);
     616             : 
     617             :         /*
     618             :          * we handle the non-priv NSWAP and STATS request first.
     619             :          *
     620             :          * SWAP_NSWAP: return number of config'd swap devices
     621             :          * [can also be obtained with uvmexp sysctl]
     622             :          */
     623           0 :         if (SCARG(uap, cmd) == SWAP_NSWAP) {
     624           0 :                 *retval = uvmexp.nswapdev;
     625             :                 error = 0;
     626           0 :                 goto out;
     627             :         }
     628             : 
     629             :         /*
     630             :          * SWAP_STATS: get stats on current # of configured swap devs
     631             :          *
     632             :          * note that the swap_priority list can't change as long
     633             :          * as we are holding the swap_syscall_lock.  we don't want
     634             :          * to grab the uvm.swap_data_lock because we may fault&sleep during
     635             :          * copyout() and we don't want to be holding that lock then!
     636             :          */
     637           0 :         if (SCARG(uap, cmd) == SWAP_STATS) {
     638           0 :                 sep = (struct swapent *)SCARG(uap, arg);
     639             :                 count = 0;
     640             : 
     641           0 :                 LIST_FOREACH(spp, &swap_priority, spi_swappri) {
     642           0 :                         TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) {
     643           0 :                                 if (count >= misc)
     644             :                                         continue;
     645             : 
     646           0 :                                 sdp->swd_inuse = 
     647           0 :                                     btodb((u_int64_t)sdp->swd_npginuse <<
     648             :                                     PAGE_SHIFT);
     649           0 :                                 error = copyout(&sdp->swd_se, sep,
     650             :                                     sizeof(struct swapent));
     651           0 :                                 if (error)
     652             :                                         goto out;
     653             : 
     654             :                                 /* now copy out the path if necessary */
     655           0 :                                 error = copyoutstr(sdp->swd_path,
     656           0 :                                     sep->se_path, sizeof(sep->se_path), NULL);
     657           0 :                                 if (error)
     658             :                                         goto out;
     659             : 
     660           0 :                                 count++;
     661           0 :                                 sep++;
     662           0 :                         }
     663             :                 }
     664             : 
     665           0 :                 *retval = count;
     666             :                 error = 0;
     667           0 :                 goto out;
     668             :         }
     669             : 
     670             :         /* all other requests require superuser privs.   verify. */
     671           0 :         if ((error = suser(p)) || (error = pledge_swapctl(p)))
     672             :                 goto out;
     673             : 
     674             :         /*
     675             :          * at this point we expect a path name in arg.   we will
     676             :          * use namei() to gain a vnode reference (vref), and lock
     677             :          * the vnode (VOP_LOCK).
     678             :          */
     679           0 :         error = copyinstr(SCARG(uap, arg), userpath, sizeof(userpath), &len);
     680           0 :         if (error)
     681             :                 goto out;
     682           0 :         disk_map(userpath, userpath, sizeof(userpath), DM_OPENBLCK);
     683           0 :         NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, userpath, p);
     684           0 :         if ((error = namei(&nd)))
     685             :                 goto out;
     686           0 :         vp = nd.ni_vp;
     687             :         /* note: "vp" is referenced and locked */
     688             : 
     689             :         error = 0;              /* assume no error */
     690           0 :         switch(SCARG(uap, cmd)) {
     691             :         case SWAP_DUMPDEV:
     692           0 :                 if (vp->v_type != VBLK) {
     693             :                         error = ENOTBLK;
     694           0 :                         break;
     695             :                 }
     696           0 :                 dumpdev = vp->v_rdev;
     697           0 :                 break;
     698             :         case SWAP_CTL:
     699             :                 /*
     700             :                  * get new priority, remove old entry (if any) and then
     701             :                  * reinsert it in the correct place.  finally, prune out
     702             :                  * any empty priority structures.
     703             :                  */
     704           0 :                 priority = SCARG(uap, misc);
     705           0 :                 spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK);
     706           0 :                 if ((sdp = swaplist_find(vp, 1)) == NULL) {
     707             :                         error = ENOENT;
     708           0 :                 } else {
     709           0 :                         swaplist_insert(sdp, spp, priority);
     710           0 :                         swaplist_trim();
     711             :                 }
     712           0 :                 if (error)
     713           0 :                         free(spp, M_VMSWAP, sizeof(*spp));
     714             :                 break;
     715             :         case SWAP_ON:
     716             :                 /*
     717             :                  * check for duplicates.   if none found, then insert a
     718             :                  * dummy entry on the list to prevent someone else from
     719             :                  * trying to enable this device while we are working on
     720             :                  * it.
     721             :                  */
     722           0 :                 priority = SCARG(uap, misc);
     723           0 :                 if ((sdp = swaplist_find(vp, 0)) != NULL) {
     724             :                         error = EBUSY;
     725           0 :                         break;
     726             :                 }
     727           0 :                 sdp = malloc(sizeof *sdp, M_VMSWAP, M_WAITOK|M_ZERO);
     728           0 :                 spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK);
     729           0 :                 sdp->swd_flags = SWF_FAKE;   /* placeholder only */
     730           0 :                 sdp->swd_vp = vp;
     731           0 :                 sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV;
     732             : 
     733             :                 /*
     734             :                  * XXX Is NFS elaboration necessary?
     735             :                  */
     736           0 :                 if (vp->v_type == VREG) {
     737           0 :                         sdp->swd_cred = crdup(p->p_ucred);
     738           0 :                 }
     739             : 
     740           0 :                 swaplist_insert(sdp, spp, priority);
     741             : 
     742           0 :                 sdp->swd_pathlen = len;
     743           0 :                 sdp->swd_path = malloc(sdp->swd_pathlen, M_VMSWAP, M_WAITOK);
     744           0 :                 strlcpy(sdp->swd_path, userpath, len);
     745             : 
     746             :                 /*
     747             :                  * we've now got a FAKE placeholder in the swap list.
     748             :                  * now attempt to enable swap on it.  if we fail, undo
     749             :                  * what we've done and kill the fake entry we just inserted.
     750             :                  * if swap_on is a success, it will clear the SWF_FAKE flag
     751             :                  */
     752             : 
     753           0 :                 if ((error = swap_on(p, sdp)) != 0) {
     754           0 :                         (void) swaplist_find(vp, 1);  /* kill fake entry */
     755           0 :                         swaplist_trim();
     756           0 :                         if (vp->v_type == VREG) {
     757           0 :                                 crfree(sdp->swd_cred);
     758           0 :                         }
     759           0 :                         free(sdp->swd_path, M_VMSWAP, sdp->swd_pathlen);
     760           0 :                         free(sdp, M_VMSWAP, sizeof(*sdp));
     761           0 :                         break;
     762             :                 }
     763             :                 break;
     764             :         case SWAP_OFF:
     765           0 :                 if ((sdp = swaplist_find(vp, 0)) == NULL) {
     766             :                         error = ENXIO;
     767           0 :                         break;
     768             :                 }
     769             : 
     770             :                 /*
     771             :                  * If a device isn't in use or enabled, we
     772             :                  * can't stop swapping from it (again).
     773             :                  */
     774           0 :                 if ((sdp->swd_flags & (SWF_INUSE|SWF_ENABLE)) == 0) {
     775             :                         error = EBUSY;
     776           0 :                         break;
     777             :                 }
     778             : 
     779             :                 /*
     780             :                  * do the real work.
     781             :                  */
     782           0 :                 error = swap_off(p, sdp);
     783           0 :                 break;
     784             :         default:
     785             :                 error = EINVAL;
     786           0 :         }
     787             : 
     788             :         /* done!  release the ref gained by namei() and unlock. */
     789           0 :         vput(vp);
     790             : 
     791             : out:
     792           0 :         rw_exit_write(&swap_syscall_lock);
     793             : 
     794           0 :         return (error);
     795           0 : }
     796             : 
     797             : /*
     798             :  * swap_on: attempt to enable a swapdev for swapping.   note that the
     799             :  *      swapdev is already on the global list, but disabled (marked
     800             :  *      SWF_FAKE).
     801             :  *
     802             :  * => we avoid the start of the disk (to protect disk labels)
     803             :  * => caller should leave uvm.swap_data_lock unlocked, we may lock it
     804             :  *      if needed.
     805             :  */
     806             : int
     807           0 : swap_on(struct proc *p, struct swapdev *sdp)
     808             : {
     809             :         static int count = 0;   /* static */
     810             :         struct vnode *vp;
     811             :         int error, npages, nblocks, size;
     812             :         long addr;
     813           0 :         struct vattr va;
     814             : #if defined(NFSCLIENT)
     815             :         extern struct vops nfs_vops;
     816             : #endif /* defined(NFSCLIENT) */
     817             :         dev_t dev;
     818             : 
     819             :         /*
     820             :          * we want to enable swapping on sdp.   the swd_vp contains
     821             :          * the vnode we want (locked and ref'd), and the swd_dev
     822             :          * contains the dev_t of the file, if it a block device.
     823             :          */
     824             : 
     825           0 :         vp = sdp->swd_vp;
     826           0 :         dev = sdp->swd_dev;
     827             : 
     828             : #if NVND > 0
     829             :         /* no swapping to vnds. */
     830           0 :         if (bdevsw[major(dev)].d_strategy == vndstrategy)
     831           0 :                 return (EOPNOTSUPP);
     832             : #endif
     833             : 
     834             :         /*
     835             :          * open the swap file (mostly useful for block device files to
     836             :          * let device driver know what is up).
     837             :          *
     838             :          * we skip the open/close for root on swap because the root
     839             :          * has already been opened when root was mounted (mountroot).
     840             :          */
     841           0 :         if (vp != rootvp) {
     842           0 :                 if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p)))
     843           0 :                         return (error);
     844             :         }
     845             : 
     846             :         /* XXX this only works for block devices */
     847             :         /*
     848             :          * we now need to determine the size of the swap area.   for
     849             :          * block specials we can call the d_psize function.
     850             :          * for normal files, we must stat [get attrs].
     851             :          *
     852             :          * we put the result in nblks.
     853             :          * for normal files, we also want the filesystem block size
     854             :          * (which we get with statfs).
     855             :          */
     856           0 :         switch (vp->v_type) {
     857             :         case VBLK:
     858           0 :                 if (bdevsw[major(dev)].d_psize == 0 ||
     859           0 :                     (nblocks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) {
     860             :                         error = ENXIO;
     861           0 :                         goto bad;
     862             :                 }
     863             :                 break;
     864             : 
     865             :         case VREG:
     866           0 :                 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)))
     867             :                         goto bad;
     868           0 :                 nblocks = (int)btodb(va.va_size);
     869           0 :                 if ((error =
     870           0 :                      VFS_STATFS(vp->v_mount, &vp->v_mount->mnt_stat, p)) != 0)
     871             :                         goto bad;
     872             : 
     873           0 :                 sdp->swd_bsize = vp->v_mount->mnt_stat.f_iosize;
     874             :                 /*
     875             :                  * limit the max # of outstanding I/O requests we issue
     876             :                  * at any one time.   take it easy on NFS servers.
     877             :                  */
     878             : #if defined(NFSCLIENT)
     879           0 :                 if (vp->v_op == &nfs_vops)
     880           0 :                         sdp->swd_maxactive = 2; /* XXX */
     881             :                 else
     882             : #endif /* defined(NFSCLIENT) */
     883           0 :                         sdp->swd_maxactive = 8; /* XXX */
     884           0 :                 bufq_init(&sdp->swd_bufq, BUFQ_FIFO);
     885           0 :                 break;
     886             : 
     887             :         default:
     888             :                 error = ENXIO;
     889           0 :                 goto bad;
     890             :         }
     891             : 
     892             :         /*
     893             :          * save nblocks in a safe place and convert to pages.
     894             :          */
     895             : 
     896           0 :         sdp->swd_nblks = nblocks;
     897           0 :         npages = dbtob((u_int64_t)nblocks) >> PAGE_SHIFT;
     898             : 
     899             :         /*
     900             :          * for block special files, we want to make sure that leave
     901             :          * the disklabel and bootblocks alone, so we arrange to skip
     902             :          * over them (arbitrarily choosing to skip PAGE_SIZE bytes).
     903             :          * note that because of this the "size" can be less than the
     904             :          * actual number of blocks on the device.
     905             :          */
     906           0 :         if (vp->v_type == VBLK) {
     907             :                 /* we use pages 1 to (size - 1) [inclusive] */
     908           0 :                 size = npages - 1;
     909             :                 addr = 1;
     910           0 :         } else {
     911             :                 /* we use pages 0 to (size - 1) [inclusive] */
     912             :                 size = npages;
     913             :                 addr = 0;
     914             :         }
     915             : 
     916             :         /*
     917             :          * make sure we have enough blocks for a reasonable sized swap
     918             :          * area.   we want at least one page.
     919             :          */
     920             : 
     921           0 :         if (size < 1) {
     922             :                 error = EINVAL;
     923           0 :                 goto bad;
     924             :         }
     925             : 
     926             :         /*
     927             :          * now we need to allocate an extent to manage this swap device
     928             :          */
     929           0 :         snprintf(sdp->swd_exname, sizeof(sdp->swd_exname), "swap0x%04x",
     930           0 :             count++);
     931             : 
     932             :         /* note that extent_create's 3rd arg is inclusive, thus "- 1" */
     933           0 :         sdp->swd_ex = extent_create(sdp->swd_exname, 0, npages - 1, M_VMSWAP,
     934             :                                     0, 0, EX_WAITOK);
     935             :         /* allocate the `saved' region from the extent so it won't be used */
     936           0 :         if (addr) {
     937           0 :                 if (extent_alloc_region(sdp->swd_ex, 0, addr, EX_WAITOK))
     938           0 :                         panic("disklabel reserve");
     939             :                 /* XXX: is extent synchronized with swd_npginuse? */
     940             :         }
     941             : #ifdef HIBERNATE
     942             :         /*
     943             :          * Lock down the last region of primary disk swap, in case
     944             :          * hibernate needs to place a signature there.
     945             :          */
     946           0 :         if (dev == swdevt[0].sw_dev && vp->v_type == VBLK && size > 3 ) {
     947           0 :                 if (extent_alloc_region(sdp->swd_ex,
     948           0 :                     npages - 1 - 1, 1, EX_WAITOK))
     949           0 :                         panic("hibernate reserve");
     950             :                 /* XXX: is extent synchronized with swd_npginuse? */
     951             :         }
     952             : #endif
     953             : 
     954             :         /* add a ref to vp to reflect usage as a swap device. */
     955           0 :         vref(vp);
     956             : 
     957             : #ifdef UVM_SWAP_ENCRYPT
     958           0 :         if (uvm_doswapencrypt)
     959           0 :                 uvm_swap_initcrypt(sdp, npages);
     960             : #endif
     961             :         /* now add the new swapdev to the drum and enable. */
     962           0 :         swapdrum_add(sdp, npages);
     963           0 :         sdp->swd_npages = size;
     964           0 :         sdp->swd_flags &= ~SWF_FAKE;     /* going live */
     965           0 :         sdp->swd_flags |= (SWF_INUSE|SWF_ENABLE);
     966           0 :         uvmexp.swpages += size;
     967           0 :         return (0);
     968             : 
     969             : bad:
     970             :         /* failure: close device if necessary and return error. */
     971           0 :         if (vp != rootvp)
     972           0 :                 (void)VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p);
     973           0 :         return (error);
     974           0 : }
     975             : 
     976             : /*
     977             :  * swap_off: stop swapping on swapdev
     978             :  *
     979             :  * => swap data should be locked, we will unlock.
     980             :  */
     981             : int
     982           0 : swap_off(struct proc *p, struct swapdev *sdp)
     983             : {
     984             :         int error = 0;
     985             : 
     986             :         /* disable the swap area being removed */
     987           0 :         sdp->swd_flags &= ~SWF_ENABLE;
     988             : 
     989             :         /*
     990             :          * the idea is to find all the pages that are paged out to this
     991             :          * device, and page them all in.  in uvm, swap-backed pageable
     992             :          * memory can take two forms: aobjs and anons.  call the
     993             :          * swapoff hook for each subsystem to bring in pages.
     994             :          */
     995             : 
     996           0 :         if (uao_swap_off(sdp->swd_drumoffset,
     997           0 :                          sdp->swd_drumoffset + sdp->swd_drumsize) ||
     998           0 :             amap_swap_off(sdp->swd_drumoffset,
     999           0 :                           sdp->swd_drumoffset + sdp->swd_drumsize)) {
    1000             : 
    1001             :                 error = ENOMEM;
    1002           0 :         } else if (sdp->swd_npginuse > sdp->swd_npgbad) {
    1003             :                 error = EBUSY;
    1004           0 :         }
    1005             : 
    1006           0 :         if (error) {
    1007           0 :                 sdp->swd_flags |= SWF_ENABLE;
    1008           0 :                 return (error);
    1009             :         }
    1010             : 
    1011             :         /*
    1012             :          * done with the vnode and saved creds.
    1013             :          * drop our ref on the vnode before calling VOP_CLOSE()
    1014             :          * so that spec_close() can tell if this is the last close.
    1015             :          */
    1016           0 :         if (sdp->swd_vp->v_type == VREG) {
    1017           0 :                 crfree(sdp->swd_cred);
    1018           0 :         }
    1019           0 :         vrele(sdp->swd_vp);
    1020           0 :         if (sdp->swd_vp != rootvp) {
    1021           0 :                 (void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, p->p_ucred, p);
    1022           0 :         }
    1023             : 
    1024           0 :         uvmexp.swpages -= sdp->swd_npages;
    1025             : 
    1026           0 :         if (swaplist_find(sdp->swd_vp, 1) == NULL)
    1027           0 :                 panic("swap_off: swapdev not in list");
    1028           0 :         swaplist_trim();
    1029             : 
    1030             :         /*
    1031             :          * free all resources!
    1032             :          */
    1033           0 :         extent_free(swapmap, sdp->swd_drumoffset, sdp->swd_drumsize,
    1034             :                     EX_WAITOK);
    1035           0 :         extent_destroy(sdp->swd_ex);
    1036             :         /* free sdp->swd_path ? */
    1037           0 :         free(sdp, M_VMSWAP, sizeof(*sdp));
    1038           0 :         return (0);
    1039           0 : }
    1040             : 
    1041             : /*
    1042             :  * /dev/drum interface and i/o functions
    1043             :  */
    1044             : 
    1045             : /*
    1046             :  * swstrategy: perform I/O on the drum
    1047             :  *
    1048             :  * => we must map the i/o request from the drum to the correct swapdev.
    1049             :  */
    1050             : void
    1051           0 : swstrategy(struct buf *bp)
    1052             : {
    1053             :         struct swapdev *sdp;
    1054             :         int s, pageno, bn;
    1055             : 
    1056             :         /*
    1057             :          * convert block number to swapdev.   note that swapdev can't
    1058             :          * be yanked out from under us because we are holding resources
    1059             :          * in it (i.e. the blocks we are doing I/O on).
    1060             :          */
    1061           0 :         pageno = dbtob((u_int64_t)bp->b_blkno) >> PAGE_SHIFT;
    1062           0 :         sdp = swapdrum_getsdp(pageno);
    1063           0 :         if (sdp == NULL) {
    1064           0 :                 bp->b_error = EINVAL;
    1065           0 :                 bp->b_flags |= B_ERROR;
    1066           0 :                 s = splbio();
    1067           0 :                 biodone(bp);
    1068           0 :                 splx(s);
    1069           0 :                 return;
    1070             :         }
    1071             : 
    1072             :         /* convert drum page number to block number on this swapdev. */
    1073           0 :         pageno -= sdp->swd_drumoffset;       /* page # on swapdev */
    1074           0 :         bn = btodb((u_int64_t)pageno << PAGE_SHIFT); /* convert to diskblock */
    1075             : 
    1076             :         /*
    1077             :          * for block devices we finish up here.
    1078             :          * for regular files we have to do more work which we delegate
    1079             :          * to sw_reg_strategy().
    1080             :          */
    1081           0 :         switch (sdp->swd_vp->v_type) {
    1082             :         default:
    1083           0 :                 panic("swstrategy: vnode type 0x%x", sdp->swd_vp->v_type);
    1084             :         case VBLK:
    1085             :                 /*
    1086             :                  * must convert "bp" from an I/O on /dev/drum to an I/O
    1087             :                  * on the swapdev (sdp).
    1088             :                  */
    1089           0 :                 s = splbio();
    1090           0 :                 buf_replacevnode(bp, sdp->swd_vp);
    1091             : 
    1092           0 :                 bp->b_blkno = bn;
    1093           0 :                 splx(s);
    1094           0 :                 VOP_STRATEGY(bp);
    1095           0 :                 return;
    1096             :         case VREG:
    1097             :                 /* delegate to sw_reg_strategy function. */
    1098           0 :                 sw_reg_strategy(sdp, bp, bn);
    1099           0 :                 return;
    1100             :         }
    1101             :         /* NOTREACHED */
    1102           0 : }
    1103             : 
    1104             : /*
    1105             :  * sw_reg_strategy: handle swap i/o to regular files
    1106             :  */
    1107             : void
    1108           0 : sw_reg_strategy(struct swapdev *sdp, struct buf *bp, int bn)
    1109             : {
    1110           0 :         struct vnode    *vp;
    1111             :         struct vndxfer  *vnx;
    1112           0 :         daddr_t nbn;
    1113             :         caddr_t         addr;
    1114             :         off_t           byteoff;
    1115           0 :         int             s, off, nra, error, sz, resid;
    1116             : 
    1117             :         /*
    1118             :          * allocate a vndxfer head for this transfer and point it to
    1119             :          * our buffer.
    1120             :          */
    1121           0 :         vnx = pool_get(&vndxfer_pool, PR_WAITOK);
    1122           0 :         vnx->vx_flags = VX_BUSY;
    1123           0 :         vnx->vx_error = 0;
    1124           0 :         vnx->vx_pending = 0;
    1125           0 :         vnx->vx_bp = bp;
    1126           0 :         vnx->vx_sdp = sdp;
    1127             : 
    1128             :         /*
    1129             :          * setup for main loop where we read filesystem blocks into
    1130             :          * our buffer.
    1131             :          */
    1132             :         error = 0;
    1133           0 :         bp->b_resid = bp->b_bcount;       /* nothing transferred yet! */
    1134           0 :         addr = bp->b_data;           /* current position in buffer */
    1135           0 :         byteoff = dbtob((u_int64_t)bn);
    1136             : 
    1137           0 :         for (resid = bp->b_resid; resid; resid -= sz) {
    1138             :                 struct vndbuf   *nbp;
    1139             :                 /*
    1140             :                  * translate byteoffset into block number.  return values:
    1141             :                  *   vp = vnode of underlying device
    1142             :                  *  nbn = new block number (on underlying vnode dev)
    1143             :                  *  nra = num blocks we can read-ahead (excludes requested
    1144             :                  *      block)
    1145             :                  */
    1146           0 :                 nra = 0;
    1147           0 :                 error = VOP_BMAP(sdp->swd_vp, byteoff / sdp->swd_bsize,
    1148             :                                         &vp, &nbn, &nra);
    1149             : 
    1150           0 :                 if (error == 0 && nbn == -1) {
    1151             :                         /*
    1152             :                          * this used to just set error, but that doesn't
    1153             :                          * do the right thing.  Instead, it causes random
    1154             :                          * memory errors.  The panic() should remain until
    1155             :                          * this condition doesn't destabilize the system.
    1156             :                          */
    1157             : #if 1
    1158           0 :                         panic("sw_reg_strategy: swap to sparse file");
    1159             : #else
    1160             :                         error = EIO;    /* failure */
    1161             : #endif
    1162             :                 }
    1163             : 
    1164             :                 /*
    1165             :                  * punt if there was an error or a hole in the file.
    1166             :                  * we must wait for any i/o ops we have already started
    1167             :                  * to finish before returning.
    1168             :                  *
    1169             :                  * XXX we could deal with holes here but it would be
    1170             :                  * a hassle (in the write case).
    1171             :                  */
    1172           0 :                 if (error) {
    1173           0 :                         s = splbio();
    1174           0 :                         vnx->vx_error = error;       /* pass error up */
    1175           0 :                         goto out;
    1176             :                 }
    1177             : 
    1178             :                 /*
    1179             :                  * compute the size ("sz") of this transfer (in bytes).
    1180             :                  */
    1181           0 :                 off = byteoff % sdp->swd_bsize;
    1182           0 :                 sz = (1 + nra) * sdp->swd_bsize - off;
    1183           0 :                 if (sz > resid)
    1184           0 :                         sz = resid;
    1185             : 
    1186             :                 /*
    1187             :                  * now get a buf structure.   note that the vb_buf is
    1188             :                  * at the front of the nbp structure so that you can
    1189             :                  * cast pointers between the two structure easily.
    1190             :                  */
    1191           0 :                 nbp = pool_get(&vndbuf_pool, PR_WAITOK);
    1192           0 :                 nbp->vb_buf.b_flags    = bp->b_flags | B_CALL;
    1193           0 :                 nbp->vb_buf.b_bcount   = sz;
    1194           0 :                 nbp->vb_buf.b_bufsize  = sz;
    1195           0 :                 nbp->vb_buf.b_error    = 0;
    1196           0 :                 nbp->vb_buf.b_data     = addr;
    1197           0 :                 nbp->vb_buf.b_bq       = NULL;
    1198           0 :                 nbp->vb_buf.b_blkno    = nbn + btodb(off);
    1199           0 :                 nbp->vb_buf.b_proc     = bp->b_proc;
    1200           0 :                 nbp->vb_buf.b_iodone   = sw_reg_iodone;
    1201           0 :                 nbp->vb_buf.b_vp       = NULLVP;
    1202           0 :                 nbp->vb_buf.b_vnbufs.le_next = NOLIST;
    1203           0 :                 LIST_INIT(&nbp->vb_buf.b_dep);
    1204             : 
    1205             :                 /*
    1206             :                  * set b_dirtyoff/end and b_validoff/end.   this is
    1207             :                  * required by the NFS client code (otherwise it will
    1208             :                  * just discard our I/O request).
    1209             :                  */
    1210           0 :                 if (bp->b_dirtyend == 0) {
    1211           0 :                         nbp->vb_buf.b_dirtyoff = 0;
    1212           0 :                         nbp->vb_buf.b_dirtyend = sz;
    1213           0 :                 } else {
    1214           0 :                         nbp->vb_buf.b_dirtyoff =
    1215           0 :                             max(0, bp->b_dirtyoff - (bp->b_bcount-resid));
    1216           0 :                         nbp->vb_buf.b_dirtyend =
    1217           0 :                             min(sz,
    1218           0 :                                 max(0, bp->b_dirtyend - (bp->b_bcount-resid)));
    1219             :                 }
    1220           0 :                 if (bp->b_validend == 0) {
    1221           0 :                         nbp->vb_buf.b_validoff = 0;
    1222           0 :                         nbp->vb_buf.b_validend = sz;
    1223           0 :                 } else {
    1224           0 :                         nbp->vb_buf.b_validoff =
    1225           0 :                             max(0, bp->b_validoff - (bp->b_bcount-resid));
    1226           0 :                         nbp->vb_buf.b_validend =
    1227           0 :                             min(sz,
    1228           0 :                                 max(0, bp->b_validend - (bp->b_bcount-resid)));
    1229             :                 }
    1230             : 
    1231             :                 /* patch it back to the vnx */
    1232           0 :                 nbp->vb_vnx = vnx;
    1233           0 :                 task_set(&nbp->vb_task, sw_reg_iodone_internal, nbp);
    1234             : 
    1235           0 :                 s = splbio();
    1236           0 :                 if (vnx->vx_error != 0) {
    1237           0 :                         pool_put(&vndbuf_pool, nbp);
    1238           0 :                         goto out;
    1239             :                 }
    1240           0 :                 vnx->vx_pending++;
    1241             : 
    1242             :                 /* assoc new buffer with underlying vnode */
    1243           0 :                 bgetvp(vp, &nbp->vb_buf);
    1244             : 
    1245             :                 /* start I/O if we are not over our limit */
    1246           0 :                 bufq_queue(&sdp->swd_bufq, &nbp->vb_buf);
    1247           0 :                 sw_reg_start(sdp);
    1248           0 :                 splx(s);
    1249             : 
    1250             :                 /*
    1251             :                  * advance to the next I/O
    1252             :                  */
    1253           0 :                 byteoff += sz;
    1254           0 :                 addr += sz;
    1255           0 :         }
    1256             : 
    1257           0 :         s = splbio();
    1258             : 
    1259             : out: /* Arrive here at splbio */
    1260           0 :         vnx->vx_flags &= ~VX_BUSY;
    1261           0 :         if (vnx->vx_pending == 0) {
    1262           0 :                 if (vnx->vx_error != 0) {
    1263           0 :                         bp->b_error = vnx->vx_error;
    1264           0 :                         bp->b_flags |= B_ERROR;
    1265           0 :                 }
    1266           0 :                 pool_put(&vndxfer_pool, vnx);
    1267           0 :                 biodone(bp);
    1268           0 :         }
    1269           0 :         splx(s);
    1270           0 : }
    1271             : 
    1272             : /* sw_reg_start: start an I/O request on the requested swapdev. */
    1273             : void
    1274           0 : sw_reg_start(struct swapdev *sdp)
    1275             : {
    1276             :         struct buf      *bp;
    1277             : 
    1278             :         /* XXX: recursion control */
    1279           0 :         if ((sdp->swd_flags & SWF_BUSY) != 0)
    1280           0 :                 return;
    1281             : 
    1282           0 :         sdp->swd_flags |= SWF_BUSY;
    1283             : 
    1284           0 :         while (sdp->swd_active < sdp->swd_maxactive) {
    1285           0 :                 bp = bufq_dequeue(&sdp->swd_bufq);
    1286           0 :                 if (bp == NULL)
    1287             :                         break;
    1288             : 
    1289           0 :                 sdp->swd_active++;
    1290             : 
    1291           0 :                 if ((bp->b_flags & B_READ) == 0)
    1292           0 :                         bp->b_vp->v_numoutput++;
    1293             : 
    1294           0 :                 VOP_STRATEGY(bp);
    1295             :         }
    1296           0 :         sdp->swd_flags &= ~SWF_BUSY;
    1297           0 : }
    1298             : 
    1299             : /*
    1300             :  * sw_reg_iodone: one of our i/o's has completed and needs post-i/o cleanup
    1301             :  *
    1302             :  * => note that we can recover the vndbuf struct by casting the buf ptr
    1303             :  *
    1304             :  * XXX:
    1305             :  * We only put this onto a taskq here, because of the maxactive game since
    1306             :  * it basically requires us to call back into VOP_STRATEGY() (where we must
    1307             :  * be able to sleep) via sw_reg_start().
    1308             :  */
    1309             : void
    1310           0 : sw_reg_iodone(struct buf *bp)
    1311             : {
    1312           0 :         struct vndbuf *vbp = (struct vndbuf *)bp;
    1313           0 :         task_add(systq, &vbp->vb_task);
    1314           0 : }
    1315             : 
    1316             : void
    1317           0 : sw_reg_iodone_internal(void *xvbp)
    1318             : {
    1319           0 :         struct vndbuf *vbp = xvbp;
    1320           0 :         struct vndxfer *vnx = vbp->vb_vnx;
    1321           0 :         struct buf *pbp = vnx->vx_bp;                /* parent buffer */
    1322           0 :         struct swapdev  *sdp = vnx->vx_sdp;
    1323             :         int resid, s;
    1324             : 
    1325           0 :         s = splbio();
    1326             : 
    1327           0 :         resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid;
    1328           0 :         pbp->b_resid -= resid;
    1329           0 :         vnx->vx_pending--;
    1330             : 
    1331             :         /* pass error upward */
    1332           0 :         if (vbp->vb_buf.b_error)
    1333           0 :                 vnx->vx_error = vbp->vb_buf.b_error;
    1334             : 
    1335             :         /* disassociate this buffer from the vnode (if any). */
    1336           0 :         if (vbp->vb_buf.b_vp != NULL) {
    1337           0 :                 brelvp(&vbp->vb_buf);
    1338           0 :         }
    1339             : 
    1340             :         /* kill vbp structure */
    1341           0 :         pool_put(&vndbuf_pool, vbp);
    1342             : 
    1343             :         /*
    1344             :          * wrap up this transaction if it has run to completion or, in
    1345             :          * case of an error, when all auxiliary buffers have returned.
    1346             :          */
    1347           0 :         if (vnx->vx_error != 0) {
    1348             :                 /* pass error upward */
    1349           0 :                 pbp->b_flags |= B_ERROR;
    1350           0 :                 pbp->b_error = vnx->vx_error;
    1351           0 :                 if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) {
    1352           0 :                         pool_put(&vndxfer_pool, vnx);
    1353           0 :                         biodone(pbp);
    1354           0 :                 }
    1355           0 :         } else if (pbp->b_resid == 0) {
    1356           0 :                 KASSERT(vnx->vx_pending == 0);
    1357           0 :                 if ((vnx->vx_flags & VX_BUSY) == 0) {
    1358           0 :                         pool_put(&vndxfer_pool, vnx);
    1359           0 :                         biodone(pbp);
    1360           0 :                 }
    1361             :         }
    1362             : 
    1363             :         /*
    1364             :          * done!   start next swapdev I/O if one is pending
    1365             :          */
    1366           0 :         sdp->swd_active--;
    1367           0 :         sw_reg_start(sdp);
    1368           0 :         splx(s);
    1369           0 : }
    1370             : 
    1371             : 
    1372             : /*
    1373             :  * uvm_swap_alloc: allocate space on swap
    1374             :  *
    1375             :  * => allocation is done "round robin" down the priority list, as we
    1376             :  *      allocate in a priority we "rotate" the tail queue.
    1377             :  * => space can be freed with uvm_swap_free
    1378             :  * => we return the page slot number in /dev/drum (0 == invalid slot)
    1379             :  * => we lock uvm.swap_data_lock
    1380             :  * => XXXMRG: "LESSOK" INTERFACE NEEDED TO EXTENT SYSTEM
    1381             :  */
    1382             : int
    1383           0 : uvm_swap_alloc(int *nslots, boolean_t lessok)
    1384             : {
    1385             :         struct swapdev *sdp;
    1386             :         struct swappri *spp;
    1387           0 :         u_long  result;
    1388             : 
    1389             :         /*
    1390             :          * no swap devices configured yet?   definite failure.
    1391             :          */
    1392           0 :         if (uvmexp.nswapdev < 1)
    1393           0 :                 return 0;
    1394             : 
    1395             :         /*
    1396             :          * lock data lock, convert slots into blocks, and enter loop
    1397             :          */
    1398             : 
    1399             : ReTry:  /* XXXMRG */
    1400           0 :         LIST_FOREACH(spp, &swap_priority, spi_swappri) {
    1401           0 :                 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) {
    1402             :                         /* if it's not enabled, then we can't swap from it */
    1403           0 :                         if ((sdp->swd_flags & SWF_ENABLE) == 0)
    1404             :                                 continue;
    1405           0 :                         if (sdp->swd_npginuse + *nslots > sdp->swd_npages)
    1406             :                                 continue;
    1407           0 :                         if (extent_alloc(sdp->swd_ex, *nslots, EX_NOALIGN, 0,
    1408             :                                          EX_NOBOUNDARY, EX_MALLOCOK|EX_NOWAIT,
    1409           0 :                                          &result) != 0) {
    1410             :                                 continue;
    1411             :                         }
    1412             : 
    1413             :                         /*
    1414             :                          * successful allocation!  now rotate the tailq.
    1415             :                          */
    1416           0 :                         TAILQ_REMOVE(&spp->spi_swapdev, sdp, swd_next);
    1417           0 :                         TAILQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
    1418           0 :                         sdp->swd_npginuse += *nslots;
    1419           0 :                         uvmexp.swpginuse += *nslots;
    1420             :                         /* done!  return drum slot number */
    1421           0 :                         return(result + sdp->swd_drumoffset);
    1422             :                 }
    1423             :         }
    1424             : 
    1425             :         /* XXXMRG: BEGIN HACK */
    1426           0 :         if (*nslots > 1 && lessok) {
    1427           0 :                 *nslots = 1;
    1428           0 :                 goto ReTry;     /* XXXMRG: ugh!  extent should support this for us */
    1429             :         }
    1430             :         /* XXXMRG: END HACK */
    1431             : 
    1432           0 :         return 0;               /* failed */
    1433           0 : }
    1434             : 
    1435             : /*
    1436             :  * uvm_swap_markbad: keep track of swap ranges where we've had i/o errors
    1437             :  *
    1438             :  * => we lock uvm.swap_data_lock
    1439             :  */
    1440             : void
    1441           0 : uvm_swap_markbad(int startslot, int nslots)
    1442             : {
    1443             :         struct swapdev *sdp;
    1444             : 
    1445           0 :         sdp = swapdrum_getsdp(startslot);
    1446           0 :         if (sdp != NULL) {
    1447             :                 /*
    1448             :                  * we just keep track of how many pages have been marked bad
    1449             :                  * in this device, to make everything add up in swap_off().
    1450             :                  * we assume here that the range of slots will all be within
    1451             :                  * one swap device.
    1452             :                  */
    1453           0 :                 sdp->swd_npgbad += nslots;
    1454           0 :         }
    1455           0 : }
    1456             : 
    1457             : /*
    1458             :  * uvm_swap_free: free swap slots
    1459             :  *
    1460             :  * => this can be all or part of an allocation made by uvm_swap_alloc
    1461             :  * => we lock uvm.swap_data_lock
    1462             :  */
    1463             : void
    1464           0 : uvm_swap_free(int startslot, int nslots)
    1465             : {
    1466             :         struct swapdev *sdp;
    1467             : 
    1468             :         /*
    1469             :          * ignore attempts to free the "bad" slot.
    1470             :          */
    1471             : 
    1472           0 :         if (startslot == SWSLOT_BAD) {
    1473           0 :                 return;
    1474             :         }
    1475             : 
    1476             :         /*
    1477             :          * convert drum slot offset back to sdp, free the blocks
    1478             :          * in the extent, and return.   must hold pri lock to do
    1479             :          * lookup and access the extent.
    1480             :          */
    1481             : 
    1482           0 :         sdp = swapdrum_getsdp(startslot);
    1483           0 :         KASSERT(uvmexp.nswapdev >= 1);
    1484           0 :         KASSERT(sdp != NULL);
    1485           0 :         KASSERT(sdp->swd_npginuse >= nslots);
    1486           0 :         if (extent_free(sdp->swd_ex, startslot - sdp->swd_drumoffset, nslots,
    1487           0 :                         EX_MALLOCOK|EX_NOWAIT) != 0) {
    1488           0 :                 printf("warning: resource shortage: %d pages of swap lost\n",
    1489             :                         nslots);
    1490           0 :         }
    1491             : 
    1492           0 :         sdp->swd_npginuse -= nslots;
    1493           0 :         uvmexp.swpginuse -= nslots;
    1494             : #ifdef UVM_SWAP_ENCRYPT
    1495             :         {
    1496             :                 int i;
    1497           0 :                 if (swap_encrypt_initialized) {
    1498             :                         /* Dereference keys */
    1499           0 :                         for (i = 0; i < nslots; i++)
    1500           0 :                                 if (uvm_swap_needdecrypt(sdp, startslot + i)) {
    1501             :                                         struct swap_key *key;
    1502             : 
    1503           0 :                                         key = SWD_KEY(sdp, startslot + i);
    1504           0 :                                         if (key->refcount != 0)
    1505           0 :                                                 SWAP_KEY_PUT(sdp, key);
    1506           0 :                                 }
    1507             : 
    1508             :                         /* Mark range as not decrypt */
    1509           0 :                         uvm_swap_markdecrypt(sdp, startslot, nslots, 0);
    1510           0 :                 }
    1511             :         }
    1512             : #endif /* UVM_SWAP_ENCRYPT */
    1513           0 : }
    1514             : 
    1515             : /*
    1516             :  * uvm_swap_put: put any number of pages into a contig place on swap
    1517             :  *
    1518             :  * => can be sync or async
    1519             :  */
    1520             : int
    1521           0 : uvm_swap_put(int swslot, struct vm_page **ppsp, int npages, int flags)
    1522             : {
    1523             :         int     result;
    1524             : 
    1525           0 :         result = uvm_swap_io(ppsp, swslot, npages, B_WRITE |
    1526           0 :             ((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
    1527             : 
    1528           0 :         return (result);
    1529             : }
    1530             : 
    1531             : /*
    1532             :  * uvm_swap_get: get a single page from swap
    1533             :  *
    1534             :  * => usually a sync op (from fault)
    1535             :  */
    1536             : int
    1537           0 : uvm_swap_get(struct vm_page *page, int swslot, int flags)
    1538             : {
    1539             :         int     result;
    1540             : 
    1541           0 :         uvmexp.nswget++;
    1542           0 :         KASSERT(flags & PGO_SYNCIO);
    1543           0 :         if (swslot == SWSLOT_BAD) {
    1544           0 :                 return VM_PAGER_ERROR;
    1545             :         }
    1546             : 
    1547             :         /* this page is (about to be) no longer only in swap. */
    1548           0 :         uvmexp.swpgonly--;
    1549             : 
    1550           0 :         result = uvm_swap_io(&page, swslot, 1, B_READ |
    1551             :             ((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
    1552             : 
    1553           0 :         if (result != VM_PAGER_OK && result != VM_PAGER_PEND) {
    1554             :                 /* oops, the read failed so it really is still only in swap. */
    1555           0 :                 uvmexp.swpgonly++;
    1556           0 :         }
    1557             : 
    1558           0 :         return (result);
    1559           0 : }
    1560             : 
    1561             : /*
    1562             :  * uvm_swap_io: do an i/o operation to swap
    1563             :  */
    1564             : 
    1565             : int
    1566           0 : uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags)
    1567             : {
    1568             :         daddr_t startblk;
    1569             :         struct  buf *bp;
    1570             :         vaddr_t kva;
    1571             :         int     result, s, mapinflags, pflag, bounce = 0, i;
    1572             :         boolean_t write, async;
    1573             :         vaddr_t bouncekva;
    1574           0 :         struct vm_page *tpps[MAXBSIZE >> PAGE_SHIFT];
    1575             : #ifdef UVM_SWAP_ENCRYPT
    1576             :         struct swapdev *sdp;
    1577             :         int     encrypt = 0;
    1578             : #endif
    1579             : 
    1580           0 :         write = (flags & B_READ) == 0;
    1581           0 :         async = (flags & B_ASYNC) != 0;
    1582             : 
    1583             :         /* convert starting drum slot to block number */
    1584           0 :         startblk = btodb((u_int64_t)startslot << PAGE_SHIFT);
    1585             : 
    1586             :         /*
    1587             :          * first, map the pages into the kernel (XXX: currently required
    1588             :          * by buffer system).
    1589             :          */
    1590           0 :         mapinflags = !write ? UVMPAGER_MAPIN_READ : UVMPAGER_MAPIN_WRITE;
    1591           0 :         if (!async)
    1592           0 :                 mapinflags |= UVMPAGER_MAPIN_WAITOK;
    1593           0 :         kva = uvm_pagermapin(pps, npages, mapinflags);
    1594           0 :         if (kva == 0)
    1595           0 :                 return (VM_PAGER_AGAIN);
    1596             : 
    1597             : #ifdef UVM_SWAP_ENCRYPT
    1598           0 :         if (write) {
    1599             :                 /*
    1600             :                  * Check if we need to do swap encryption on old pages.
    1601             :                  * Later we need a different scheme, that swap encrypts
    1602             :                  * all pages of a process that had at least one page swap
    1603             :                  * encrypted.  Then we might not need to copy all pages
    1604             :                  * in the cluster, and avoid the memory overheard in
    1605             :                  * swapping.
    1606             :                  */
    1607           0 :                 if (uvm_doswapencrypt)
    1608           0 :                         encrypt = 1;
    1609             :         }
    1610             : 
    1611           0 :         if (swap_encrypt_initialized || encrypt) {
    1612             :                 /*
    1613             :                  * we need to know the swap device that we are swapping to/from
    1614             :                  * to see if the pages need to be marked for decryption or
    1615             :                  * actually need to be decrypted.
    1616             :                  * XXX - does this information stay the same over the whole
    1617             :                  * execution of this function?
    1618             :                  */
    1619           0 :                 sdp = swapdrum_getsdp(startslot);
    1620           0 :         }
    1621             : 
    1622             :         /*
    1623             :          * Check that we are dma capable for read (write always bounces
    1624             :          * through the swapencrypt anyway...
    1625             :          */
    1626           0 :         if (write && encrypt) {
    1627             :                 bounce = 1; /* bounce through swapencrypt always */
    1628           0 :         } else {
    1629             : #else
    1630             :         {
    1631             : #endif
    1632             : 
    1633           0 :                 for (i = 0; i < npages; i++) {
    1634           0 :                         if (VM_PAGE_TO_PHYS(pps[i]) < dma_constraint.ucr_low ||
    1635           0 :                            VM_PAGE_TO_PHYS(pps[i]) > dma_constraint.ucr_high) {
    1636             :                                 bounce = 1;
    1637           0 :                                 break;
    1638             :                         }
    1639             :                 }
    1640             :         }
    1641             : 
    1642           0 :         if (bounce)  {
    1643             :                 int swmapflags;
    1644             : 
    1645             :                 /* We always need write access. */
    1646             :                 swmapflags = UVMPAGER_MAPIN_READ;
    1647           0 :                 if (!async)
    1648           0 :                         swmapflags |= UVMPAGER_MAPIN_WAITOK;
    1649             : 
    1650           0 :                 if (!uvm_swap_allocpages(tpps, npages)) {
    1651           0 :                         uvm_pagermapout(kva, npages);
    1652           0 :                         return (VM_PAGER_AGAIN);
    1653             :                 }
    1654             : 
    1655           0 :                 bouncekva = uvm_pagermapin(tpps, npages, swmapflags);
    1656           0 :                 if (bouncekva == 0) {
    1657           0 :                         uvm_pagermapout(kva, npages);
    1658           0 :                         uvm_swap_freepages(tpps, npages);
    1659           0 :                         return (VM_PAGER_AGAIN);
    1660             :                 }
    1661           0 :         }
    1662             : 
    1663             :         /* encrypt to swap */
    1664           0 :         if (write && bounce) {
    1665           0 :                 int i, opages;
    1666             :                 caddr_t src, dst;
    1667             :                 u_int64_t block;
    1668             : 
    1669           0 :                 src = (caddr_t) kva;
    1670           0 :                 dst = (caddr_t) bouncekva;
    1671             :                 block = startblk;
    1672           0 :                 for (i = 0; i < npages; i++) {
    1673             : #ifdef UVM_SWAP_ENCRYPT
    1674             :                         struct swap_key *key;
    1675             : 
    1676           0 :                         if (encrypt) {
    1677           0 :                                 key = SWD_KEY(sdp, startslot + i);
    1678           0 :                                 SWAP_KEY_GET(sdp, key); /* add reference */
    1679             : 
    1680           0 :                                 swap_encrypt(key, src, dst, block, PAGE_SIZE);
    1681           0 :                                 block += btodb(PAGE_SIZE);
    1682           0 :                         } else {
    1683             : #else
    1684             :                         {
    1685             : #endif /* UVM_SWAP_ENCRYPT */
    1686           0 :                                 memcpy(dst, src, PAGE_SIZE);
    1687             :                         }
    1688             :                         /* this just tells async callbacks to free */
    1689           0 :                         atomic_setbits_int(&tpps[i]->pg_flags, PQ_ENCRYPT);
    1690           0 :                         src += PAGE_SIZE;
    1691           0 :                         dst += PAGE_SIZE;
    1692             :                 }
    1693             : 
    1694           0 :                 uvm_pagermapout(kva, npages);
    1695             : 
    1696             :                 /* dispose of pages we dont use anymore */
    1697           0 :                 opages = npages;
    1698           0 :                 uvm_pager_dropcluster(NULL, NULL, pps, &opages,
    1699             :                                       PGO_PDFREECLUST);
    1700             : 
    1701             :                 kva = bouncekva;
    1702           0 :         }
    1703             : 
    1704             :         /*
    1705             :          * now allocate a buf for the i/o.
    1706             :          * [make sure we don't put the pagedaemon to sleep...]
    1707             :          */
    1708           0 :         pflag = (async || curproc == uvm.pagedaemon_proc) ? PR_NOWAIT :
    1709             :             PR_WAITOK;
    1710           0 :         bp = pool_get(&bufpool, pflag | PR_ZERO);
    1711             : 
    1712             :         /*
    1713             :          * if we failed to get a swapbuf, return "try again"
    1714             :          */
    1715           0 :         if (bp == NULL) {
    1716           0 :                 if (write && bounce) {
    1717             : #ifdef UVM_SWAP_ENCRYPT
    1718             :                         int i;
    1719             : 
    1720             :                         /* swap encrypt needs cleanup */
    1721           0 :                         if (encrypt)
    1722           0 :                                 for (i = 0; i < npages; i++)
    1723           0 :                                         SWAP_KEY_PUT(sdp, SWD_KEY(sdp,
    1724             :                                             startslot + i));
    1725             : #endif
    1726             : 
    1727           0 :                         uvm_pagermapout(kva, npages);
    1728           0 :                         uvm_swap_freepages(tpps, npages);
    1729           0 :                 }
    1730           0 :                 return (VM_PAGER_AGAIN);
    1731             :         }
    1732             : 
    1733             :         /*
    1734             :          * prevent ASYNC reads.
    1735             :          * uvm_swap_io is only called from uvm_swap_get, uvm_swap_get
    1736             :          * assumes that all gets are SYNCIO.  Just make sure here.
    1737             :          * XXXARTUBC - might not be true anymore.
    1738             :          */
    1739           0 :         if (!write) {
    1740           0 :                 flags &= ~B_ASYNC;
    1741             :                 async = 0;
    1742           0 :         }
    1743             : 
    1744             :         /*
    1745             :          * fill in the bp.   we currently route our i/o through
    1746             :          * /dev/drum's vnode [swapdev_vp].
    1747             :          */
    1748           0 :         bp->b_flags = B_BUSY | B_NOCACHE | B_RAW | (flags & (B_READ|B_ASYNC));
    1749           0 :         bp->b_proc = &proc0;     /* XXX */
    1750           0 :         bp->b_vnbufs.le_next = NOLIST;
    1751           0 :         if (bounce)
    1752           0 :                 bp->b_data = (caddr_t)bouncekva;
    1753             :         else
    1754           0 :                 bp->b_data = (caddr_t)kva;
    1755           0 :         bp->b_bq = NULL;
    1756           0 :         bp->b_blkno = startblk;
    1757           0 :         LIST_INIT(&bp->b_dep);
    1758           0 :         s = splbio();
    1759           0 :         bp->b_vp = NULL;
    1760           0 :         buf_replacevnode(bp, swapdev_vp);
    1761           0 :         splx(s);
    1762           0 :         bp->b_bufsize = bp->b_bcount = (long)npages << PAGE_SHIFT;
    1763             : 
    1764             :         /*
    1765             :          * for pageouts we must set "dirtyoff" [NFS client code needs it].
    1766             :          * and we bump v_numoutput (counter of number of active outputs).
    1767             :          */
    1768           0 :         if (write) {
    1769           0 :                 bp->b_dirtyoff = 0;
    1770           0 :                 bp->b_dirtyend = npages << PAGE_SHIFT;
    1771             : #ifdef UVM_SWAP_ENCRYPT
    1772             :                 /* mark the pages in the drum for decryption */
    1773           0 :                 if (swap_encrypt_initialized)
    1774           0 :                         uvm_swap_markdecrypt(sdp, startslot, npages, encrypt);
    1775             : #endif
    1776           0 :                 s = splbio();
    1777           0 :                 swapdev_vp->v_numoutput++;
    1778           0 :                 splx(s);
    1779           0 :         }
    1780             : 
    1781             :         /* for async ops we must set up the iodone handler. */
    1782           0 :         if (async) {
    1783           0 :                 bp->b_flags |= B_CALL | (curproc == uvm.pagedaemon_proc ?
    1784             :                                          B_PDAEMON : 0);
    1785           0 :                 bp->b_iodone = uvm_aio_biodone;
    1786           0 :         }
    1787             : 
    1788             :         /* now we start the I/O, and if async, return. */
    1789           0 :         VOP_STRATEGY(bp);
    1790           0 :         if (async)
    1791           0 :                 return (VM_PAGER_PEND);
    1792             : 
    1793             :         /* must be sync i/o.   wait for it to finish */
    1794           0 :         (void) biowait(bp);
    1795           0 :         result = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
    1796             : 
    1797             :         /* decrypt swap */
    1798           0 :         if (!write && !(bp->b_flags & B_ERROR)) {
    1799             :                 int i;
    1800           0 :                 caddr_t data = (caddr_t)kva;
    1801             :                 caddr_t dst = (caddr_t)kva;
    1802             :                 u_int64_t block = startblk;
    1803             : 
    1804           0 :                 if (bounce)
    1805           0 :                         data = (caddr_t)bouncekva;
    1806             : 
    1807           0 :                 for (i = 0; i < npages; i++) {
    1808             : #ifdef UVM_SWAP_ENCRYPT
    1809             :                         struct swap_key *key;
    1810             : 
    1811             :                         /* Check if we need to decrypt */
    1812           0 :                         if (swap_encrypt_initialized &&
    1813           0 :                             uvm_swap_needdecrypt(sdp, startslot + i)) {
    1814           0 :                                 key = SWD_KEY(sdp, startslot + i);
    1815           0 :                                 if (key->refcount == 0) {
    1816             :                                         result = VM_PAGER_ERROR;
    1817           0 :                                         break;
    1818             :                                 }
    1819           0 :                                 swap_decrypt(key, data, dst, block, PAGE_SIZE);
    1820           0 :                         } else if (bounce) {
    1821             : #else
    1822             :                         if (bounce) {
    1823             : #endif
    1824           0 :                                 memcpy(dst, data, PAGE_SIZE);
    1825           0 :                         }
    1826           0 :                         data += PAGE_SIZE;
    1827           0 :                         dst += PAGE_SIZE;
    1828           0 :                         block += btodb(PAGE_SIZE);
    1829           0 :                 }
    1830           0 :                 if (bounce)
    1831           0 :                         uvm_pagermapout(bouncekva, npages);
    1832           0 :         }
    1833             :         /* kill the pager mapping */
    1834           0 :         uvm_pagermapout(kva, npages);
    1835             : 
    1836             :         /*  Not anymore needed, free after encryption/bouncing */
    1837           0 :         if (!write && bounce)
    1838           0 :                 uvm_swap_freepages(tpps, npages);
    1839             : 
    1840             :         /* now dispose of the buf */
    1841           0 :         s = splbio();
    1842           0 :         if (bp->b_vp)
    1843           0 :                 brelvp(bp);
    1844             : 
    1845           0 :         if (write && bp->b_vp)
    1846           0 :                 vwakeup(bp->b_vp);
    1847           0 :         pool_put(&bufpool, bp);
    1848           0 :         splx(s);
    1849             : 
    1850             :         /* finally return. */
    1851           0 :         return (result);
    1852           0 : }
    1853             : 
    1854             : void
    1855           0 : swapmount(void)
    1856             : {
    1857             :         struct swapdev *sdp;
    1858             :         struct swappri *spp;
    1859           0 :         struct vnode *vp;
    1860           0 :         dev_t swap_dev = swdevt[0].sw_dev;
    1861             :         char *nam;
    1862           0 :         char path[MNAMELEN + 1];
    1863             : 
    1864             :         /*
    1865             :          * No locking here since we happen to know that we will just be called
    1866             :          * once before any other process has forked.
    1867             :          */
    1868           0 :         if (swap_dev == NODEV)
    1869           0 :                 return;
    1870             : 
    1871             : #if defined(NFSCLIENT)
    1872           0 :         if (swap_dev == NETDEV) {
    1873             :                 extern struct nfs_diskless nfs_diskless;
    1874             : 
    1875           0 :                 snprintf(path, sizeof(path), "%s",
    1876             :                     nfs_diskless.nd_swap.ndm_host);
    1877           0 :                 vp = nfs_diskless.sw_vp;
    1878           0 :                 goto gotit;
    1879             :         } else
    1880             : #endif
    1881           0 :         if (bdevvp(swap_dev, &vp))
    1882           0 :                 return;
    1883             : 
    1884             :         /* Construct a potential path to swap */
    1885           0 :         if ((nam = findblkname(major(swap_dev))))
    1886           0 :                 snprintf(path, sizeof(path), "/dev/%s%d%c", nam,
    1887           0 :                     DISKUNIT(swap_dev), 'a' + DISKPART(swap_dev));
    1888             :         else
    1889           0 :                 snprintf(path, sizeof(path), "blkdev0x%x",
    1890             :                     swap_dev);
    1891             : 
    1892             : #if defined(NFSCLIENT)
    1893             : gotit:
    1894             : #endif
    1895           0 :         sdp = malloc(sizeof(*sdp), M_VMSWAP, M_WAITOK|M_ZERO);
    1896           0 :         spp = malloc(sizeof(*spp), M_VMSWAP, M_WAITOK);
    1897             : 
    1898           0 :         sdp->swd_flags = SWF_FAKE;
    1899           0 :         sdp->swd_dev = swap_dev;
    1900             : 
    1901           0 :         sdp->swd_pathlen = strlen(path) + 1;
    1902           0 :         sdp->swd_path = malloc(sdp->swd_pathlen, M_VMSWAP, M_WAITOK | M_ZERO);
    1903           0 :         strlcpy(sdp->swd_path, path, sdp->swd_pathlen);
    1904             : 
    1905           0 :         sdp->swd_vp = vp;
    1906             : 
    1907           0 :         swaplist_insert(sdp, spp, 0);
    1908             : 
    1909           0 :         if (swap_on(curproc, sdp)) {
    1910           0 :                 swaplist_find(vp, 1);
    1911           0 :                 swaplist_trim();
    1912           0 :                 vput(sdp->swd_vp);
    1913           0 :                 free(sdp->swd_path, M_VMSWAP, sdp->swd_pathlen);
    1914           0 :                 free(sdp, M_VMSWAP, sizeof(*sdp));
    1915           0 :                 return;
    1916             :         }
    1917           0 : }
    1918             : 
    1919             : #ifdef HIBERNATE
    1920             : int
    1921           0 : uvm_hibswap(dev_t dev, u_long *sp, u_long *ep)
    1922             : {
    1923             :         struct swapdev *sdp, *swd = NULL;
    1924             :         struct swappri *spp;
    1925             :         struct extent_region *exr, *exrn;
    1926             :         u_long start = 0, end = 0, size = 0;
    1927             : 
    1928             :         /* no swap devices configured yet? */
    1929           0 :         if (uvmexp.nswapdev < 1 || dev != swdevt[0].sw_dev)
    1930           0 :                 return (1);
    1931             : 
    1932           0 :         LIST_FOREACH(spp, &swap_priority, spi_swappri) {
    1933           0 :                 TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) {
    1934           0 :                         if (sdp->swd_dev == dev)
    1935           0 :                                 swd = sdp;
    1936             :                 }
    1937             :         }
    1938             : 
    1939           0 :         if (swd == NULL || (swd->swd_flags & SWF_ENABLE) == 0)
    1940           0 :                 return (1);
    1941             : 
    1942           0 :         LIST_FOREACH(exr, &swd->swd_ex->ex_regions, er_link) {
    1943             :                 u_long gapstart, gapend, gapsize;
    1944             :         
    1945           0 :                 gapstart = exr->er_end + 1;
    1946           0 :                 exrn = LIST_NEXT(exr, er_link);
    1947           0 :                 if (!exrn)
    1948           0 :                         break;
    1949           0 :                 gapend = exrn->er_start - 1;
    1950           0 :                 gapsize = gapend - gapstart;
    1951           0 :                 if (gapsize > size) {
    1952             :                         start = gapstart;
    1953             :                         end = gapend;
    1954             :                         size = gapsize;
    1955           0 :                 }
    1956           0 :         }
    1957             : 
    1958           0 :         if (size) {
    1959           0 :                 *sp = start;
    1960           0 :                 *ep = end;
    1961           0 :                 return (0);
    1962             :         }
    1963           0 :         return (1);
    1964           0 : }
    1965             : #endif /* HIBERNATE */

Generated by: LCOV version 1.13