Line data Source code
1 : /* $OpenBSD: uvm_swap.c,v 1.143 2018/02/19 08:59:53 mpi Exp $ */
2 : /* $NetBSD: uvm_swap.c,v 1.40 2000/11/17 11:39:39 mrg Exp $ */
3 :
4 : /*
5 : * Copyright (c) 1995, 1996, 1997 Matthew R. Green
6 : * All rights reserved.
7 : *
8 : * Redistribution and use in source and binary forms, with or without
9 : * modification, are permitted provided that the following conditions
10 : * are met:
11 : * 1. Redistributions of source code must retain the above copyright
12 : * notice, this list of conditions and the following disclaimer.
13 : * 2. Redistributions in binary form must reproduce the above copyright
14 : * notice, this list of conditions and the following disclaimer in the
15 : * documentation and/or other materials provided with the distribution.
16 : * 3. The name of the author may not be used to endorse or promote products
17 : * derived from this software without specific prior written permission.
18 : *
19 : * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 : * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 : * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 : * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 : * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 : * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 : * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26 : * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 : * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 : * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 : * SUCH DAMAGE.
30 : *
31 : * from: NetBSD: vm_swap.c,v 1.52 1997/12/02 13:47:37 pk Exp
32 : * from: Id: uvm_swap.c,v 1.1.2.42 1998/02/02 20:38:06 chuck Exp
33 : */
34 :
35 : #include <sys/param.h>
36 : #include <sys/systm.h>
37 : #include <sys/buf.h>
38 : #include <sys/conf.h>
39 : #include <sys/proc.h>
40 : #include <sys/namei.h>
41 : #include <sys/disklabel.h>
42 : #include <sys/errno.h>
43 : #include <sys/kernel.h>
44 : #include <sys/malloc.h>
45 : #include <sys/vnode.h>
46 : #include <sys/fcntl.h>
47 : #include <sys/extent.h>
48 : #include <sys/mount.h>
49 : #include <sys/pool.h>
50 : #include <sys/syscallargs.h>
51 : #include <sys/swap.h>
52 : #include <sys/disk.h>
53 : #include <sys/task.h>
54 : #include <sys/pledge.h>
55 : #if defined(NFSCLIENT)
56 : #include <sys/socket.h>
57 : #include <sys/domain.h>
58 : #include <netinet/in.h>
59 : #include <nfs/nfsproto.h>
60 : #include <nfs/nfsdiskless.h>
61 : #endif
62 :
63 : #include <uvm/uvm.h>
64 : #ifdef UVM_SWAP_ENCRYPT
65 : #include <uvm/uvm_swap_encrypt.h>
66 : #endif
67 :
68 : #include <sys/specdev.h>
69 :
70 : #include "vnd.h"
71 :
72 : /*
73 : * uvm_swap.c: manage configuration and i/o to swap space.
74 : */
75 :
76 : /*
77 : * swap space is managed in the following way:
78 : *
79 : * each swap partition or file is described by a "swapdev" structure.
80 : * each "swapdev" structure contains a "swapent" structure which contains
81 : * information that is passed up to the user (via system calls).
82 : *
83 : * each swap partition is assigned a "priority" (int) which controls
84 : * swap partition usage.
85 : *
86 : * the system maintains a global data structure describing all swap
87 : * partitions/files. there is a sorted LIST of "swappri" structures
88 : * which describe "swapdev"'s at that priority. this LIST is headed
89 : * by the "swap_priority" global var. each "swappri" contains a
90 : * TAILQ of "swapdev" structures at that priority.
91 : *
92 : * locking:
93 : * - swap_syscall_lock (sleep lock): this lock serializes the swapctl
94 : * system call and prevents the swap priority list from changing
95 : * while we are in the middle of a system call (e.g. SWAP_STATS).
96 : *
97 : * each swap device has the following info:
98 : * - swap device in use (could be disabled, preventing future use)
99 : * - swap enabled (allows new allocations on swap)
100 : * - map info in /dev/drum
101 : * - vnode pointer
102 : * for swap files only:
103 : * - block size
104 : * - max byte count in buffer
105 : * - buffer
106 : * - credentials to use when doing i/o to file
107 : *
108 : * userland controls and configures swap with the swapctl(2) system call.
109 : * the sys_swapctl performs the following operations:
110 : * [1] SWAP_NSWAP: returns the number of swap devices currently configured
111 : * [2] SWAP_STATS: given a pointer to an array of swapent structures
112 : * (passed in via "arg") of a size passed in via "misc" ... we load
113 : * the current swap config into the array.
114 : * [3] SWAP_ON: given a pathname in arg (could be device or file) and a
115 : * priority in "misc", start swapping on it.
116 : * [4] SWAP_OFF: as SWAP_ON, but stops swapping to a device
117 : * [5] SWAP_CTL: changes the priority of a swap device (new priority in
118 : * "misc")
119 : */
120 :
121 : /*
122 : * swapdev: describes a single swap partition/file
123 : *
124 : * note the following should be true:
125 : * swd_inuse <= swd_nblks [number of blocks in use is <= total blocks]
126 : * swd_nblks <= swd_mapsize [because mapsize includes disklabel]
127 : */
128 : struct swapdev {
129 : struct swapent swd_se;
130 : #define swd_dev swd_se.se_dev /* device id */
131 : #define swd_flags swd_se.se_flags /* flags:inuse/enable/fake */
132 : #define swd_priority swd_se.se_priority /* our priority */
133 : #define swd_inuse swd_se.se_inuse /* blocks used */
134 : #define swd_nblks swd_se.se_nblks /* total blocks */
135 : char *swd_path; /* saved pathname of device */
136 : int swd_pathlen; /* length of pathname */
137 : int swd_npages; /* #pages we can use */
138 : int swd_npginuse; /* #pages in use */
139 : int swd_npgbad; /* #pages bad */
140 : int swd_drumoffset; /* page0 offset in drum */
141 : int swd_drumsize; /* #pages in drum */
142 : struct extent *swd_ex; /* extent for this swapdev */
143 : char swd_exname[12]; /* name of extent above */
144 : struct vnode *swd_vp; /* backing vnode */
145 : TAILQ_ENTRY(swapdev) swd_next; /* priority tailq */
146 :
147 : int swd_bsize; /* blocksize (bytes) */
148 : int swd_maxactive; /* max active i/o reqs */
149 : int swd_active; /* # of active i/o reqs */
150 : struct bufq swd_bufq;
151 : struct ucred *swd_cred; /* cred for file access */
152 : #ifdef UVM_SWAP_ENCRYPT
153 : #define SWD_KEY_SHIFT 7 /* One key per 0.5 MByte */
154 : #define SWD_KEY(x,y) &((x)->swd_keys[((y) - (x)->swd_drumoffset) >> SWD_KEY_SHIFT])
155 : #define SWD_KEY_SIZE(x) (((x) + (1 << SWD_KEY_SHIFT) - 1) >> SWD_KEY_SHIFT)
156 :
157 : #define SWD_DCRYPT_SHIFT 5
158 : #define SWD_DCRYPT_BITS 32
159 : #define SWD_DCRYPT_MASK (SWD_DCRYPT_BITS - 1)
160 : #define SWD_DCRYPT_OFF(x) ((x) >> SWD_DCRYPT_SHIFT)
161 : #define SWD_DCRYPT_BIT(x) ((x) & SWD_DCRYPT_MASK)
162 : #define SWD_DCRYPT_SIZE(x) (SWD_DCRYPT_OFF((x) + SWD_DCRYPT_MASK) * sizeof(u_int32_t))
163 : u_int32_t *swd_decrypt; /* bitmap for decryption */
164 : struct swap_key *swd_keys; /* keys for different parts */
165 : #endif
166 : };
167 :
168 : /*
169 : * swap device priority entry; the list is kept sorted on `spi_priority'.
170 : */
171 : struct swappri {
172 : int spi_priority; /* priority */
173 : TAILQ_HEAD(spi_swapdev, swapdev) spi_swapdev;
174 : /* tailq of swapdevs at this priority */
175 : LIST_ENTRY(swappri) spi_swappri; /* global list of pri's */
176 : };
177 :
178 : /*
179 : * The following two structures are used to keep track of data transfers
180 : * on swap devices associated with regular files.
181 : * NOTE: this code is more or less a copy of vnd.c; we use the same
182 : * structure names here to ease porting..
183 : */
184 : struct vndxfer {
185 : struct buf *vx_bp; /* Pointer to parent buffer */
186 : struct swapdev *vx_sdp;
187 : int vx_error;
188 : int vx_pending; /* # of pending aux buffers */
189 : int vx_flags;
190 : #define VX_BUSY 1
191 : #define VX_DEAD 2
192 : };
193 :
194 : struct vndbuf {
195 : struct buf vb_buf;
196 : struct vndxfer *vb_vnx;
197 : struct task vb_task;
198 : };
199 :
200 : /*
201 : * We keep a of pool vndbuf's and vndxfer structures.
202 : */
203 : struct pool vndxfer_pool;
204 : struct pool vndbuf_pool;
205 :
206 :
207 : /*
208 : * local variables
209 : */
210 : struct extent *swapmap; /* controls the mapping of /dev/drum */
211 :
212 : /* list of all active swap devices [by priority] */
213 : LIST_HEAD(swap_priority, swappri);
214 : struct swap_priority swap_priority;
215 :
216 : /* locks */
217 : struct rwlock swap_syscall_lock = RWLOCK_INITIALIZER("swplk");
218 :
219 : /*
220 : * prototypes
221 : */
222 : void swapdrum_add(struct swapdev *, int);
223 : struct swapdev *swapdrum_getsdp(int);
224 :
225 : struct swapdev *swaplist_find(struct vnode *, int);
226 : void swaplist_insert(struct swapdev *,
227 : struct swappri *, int);
228 : void swaplist_trim(void);
229 :
230 : int swap_on(struct proc *, struct swapdev *);
231 : int swap_off(struct proc *, struct swapdev *);
232 :
233 : void sw_reg_strategy(struct swapdev *, struct buf *, int);
234 : void sw_reg_iodone(struct buf *);
235 : void sw_reg_iodone_internal(void *);
236 : void sw_reg_start(struct swapdev *);
237 :
238 : int uvm_swap_io(struct vm_page **, int, int, int);
239 :
240 : void swapmount(void);
241 : boolean_t uvm_swap_allocpages(struct vm_page **, int);
242 :
243 : #ifdef UVM_SWAP_ENCRYPT
244 : /* for swap encrypt */
245 : void uvm_swap_markdecrypt(struct swapdev *, int, int, int);
246 : boolean_t uvm_swap_needdecrypt(struct swapdev *, int);
247 : void uvm_swap_initcrypt(struct swapdev *, int);
248 : #endif
249 :
250 : /*
251 : * uvm_swap_init: init the swap system data structures and locks
252 : *
253 : * => called at boot time from init_main.c after the filesystems
254 : * are brought up (which happens after uvm_init())
255 : */
256 : void
257 0 : uvm_swap_init(void)
258 : {
259 : /*
260 : * first, init the swap list, its counter, and its lock.
261 : * then get a handle on the vnode for /dev/drum by using
262 : * the its dev_t number ("swapdev", from MD conf.c).
263 : */
264 0 : LIST_INIT(&swap_priority);
265 0 : uvmexp.nswapdev = 0;
266 :
267 0 : if (!swapdev_vp && bdevvp(swapdev, &swapdev_vp))
268 0 : panic("uvm_swap_init: can't get vnode for swap device");
269 :
270 : /*
271 : * create swap block extent to map /dev/drum. The extent spans
272 : * 1 to INT_MAX allows 2 gigablocks of swap space. Note that
273 : * block 0 is reserved (used to indicate an allocation failure,
274 : * or no allocation).
275 : */
276 0 : swapmap = extent_create("swapmap", 1, INT_MAX,
277 : M_VMSWAP, 0, 0, EX_NOWAIT);
278 0 : if (swapmap == 0)
279 0 : panic("uvm_swap_init: extent_create failed");
280 :
281 : /* allocate pools for structures used for swapping to files. */
282 0 : pool_init(&vndxfer_pool, sizeof(struct vndxfer), 0, IPL_BIO, 0,
283 : "swp vnx", NULL);
284 0 : pool_init(&vndbuf_pool, sizeof(struct vndbuf), 0, IPL_BIO, 0,
285 : "swp vnd", NULL);
286 :
287 : /* Setup the initial swap partition */
288 0 : swapmount();
289 0 : }
290 :
291 : #ifdef UVM_SWAP_ENCRYPT
292 : void
293 0 : uvm_swap_initcrypt_all(void)
294 : {
295 : struct swapdev *sdp;
296 : struct swappri *spp;
297 : int npages;
298 :
299 :
300 0 : LIST_FOREACH(spp, &swap_priority, spi_swappri) {
301 0 : TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) {
302 0 : if (sdp->swd_decrypt == NULL) {
303 0 : npages = dbtob((uint64_t)sdp->swd_nblks) >>
304 : PAGE_SHIFT;
305 0 : uvm_swap_initcrypt(sdp, npages);
306 0 : }
307 : }
308 : }
309 0 : }
310 :
311 : void
312 0 : uvm_swap_initcrypt(struct swapdev *sdp, int npages)
313 : {
314 : /*
315 : * keep information if a page needs to be decrypted when we get it
316 : * from the swap device.
317 : * We cannot chance a malloc later, if we are doing ASYNC puts,
318 : * we may not call malloc with M_WAITOK. This consumes only
319 : * 8KB memory for a 256MB swap partition.
320 : */
321 0 : sdp->swd_decrypt = malloc(SWD_DCRYPT_SIZE(npages), M_VMSWAP,
322 : M_WAITOK|M_ZERO);
323 0 : sdp->swd_keys = mallocarray(SWD_KEY_SIZE(npages),
324 : sizeof(struct swap_key), M_VMSWAP, M_WAITOK|M_ZERO);
325 0 : }
326 :
327 : #endif /* UVM_SWAP_ENCRYPT */
328 :
329 : boolean_t
330 0 : uvm_swap_allocpages(struct vm_page **pps, int npages)
331 : {
332 0 : struct pglist pgl;
333 : int i;
334 : boolean_t fail;
335 :
336 : /* Estimate if we will succeed */
337 0 : uvm_lock_fpageq();
338 :
339 0 : fail = uvmexp.free - npages < uvmexp.reserve_kernel;
340 :
341 0 : uvm_unlock_fpageq();
342 :
343 0 : if (fail)
344 0 : return FALSE;
345 :
346 0 : TAILQ_INIT(&pgl);
347 0 : if (uvm_pglistalloc(npages * PAGE_SIZE, dma_constraint.ucr_low,
348 0 : dma_constraint.ucr_high, 0, 0, &pgl, npages, UVM_PLA_NOWAIT))
349 0 : return FALSE;
350 :
351 0 : for (i = 0; i < npages; i++) {
352 0 : pps[i] = TAILQ_FIRST(&pgl);
353 : /* *sigh* */
354 0 : atomic_setbits_int(&pps[i]->pg_flags, PG_BUSY);
355 0 : TAILQ_REMOVE(&pgl, pps[i], pageq);
356 : }
357 :
358 0 : return TRUE;
359 0 : }
360 :
361 : void
362 0 : uvm_swap_freepages(struct vm_page **pps, int npages)
363 : {
364 : int i;
365 :
366 0 : uvm_lock_pageq();
367 0 : for (i = 0; i < npages; i++)
368 0 : uvm_pagefree(pps[i]);
369 0 : uvm_unlock_pageq();
370 0 : }
371 :
372 : #ifdef UVM_SWAP_ENCRYPT
373 : /*
374 : * Mark pages on the swap device for later decryption
375 : */
376 :
377 : void
378 0 : uvm_swap_markdecrypt(struct swapdev *sdp, int startslot, int npages,
379 : int decrypt)
380 : {
381 : int pagestart, i;
382 : int off, bit;
383 :
384 0 : if (!sdp)
385 0 : return;
386 :
387 0 : pagestart = startslot - sdp->swd_drumoffset;
388 0 : for (i = 0; i < npages; i++, pagestart++) {
389 0 : off = SWD_DCRYPT_OFF(pagestart);
390 0 : bit = SWD_DCRYPT_BIT(pagestart);
391 0 : if (decrypt)
392 : /* pages read need decryption */
393 0 : sdp->swd_decrypt[off] |= 1 << bit;
394 : else
395 : /* pages read do not need decryption */
396 0 : sdp->swd_decrypt[off] &= ~(1 << bit);
397 : }
398 0 : }
399 :
400 : /*
401 : * Check if the page that we got from disk needs to be decrypted
402 : */
403 :
404 : boolean_t
405 0 : uvm_swap_needdecrypt(struct swapdev *sdp, int off)
406 : {
407 0 : if (!sdp)
408 0 : return FALSE;
409 :
410 0 : off -= sdp->swd_drumoffset;
411 0 : return sdp->swd_decrypt[SWD_DCRYPT_OFF(off)] & (1 << SWD_DCRYPT_BIT(off)) ?
412 : TRUE : FALSE;
413 0 : }
414 :
415 : void
416 0 : uvm_swap_finicrypt_all(void)
417 : {
418 : struct swapdev *sdp;
419 : struct swappri *spp;
420 : struct swap_key *key;
421 : unsigned int nkeys;
422 :
423 0 : LIST_FOREACH(spp, &swap_priority, spi_swappri) {
424 0 : TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) {
425 0 : if (sdp->swd_decrypt == NULL)
426 : continue;
427 :
428 0 : nkeys = dbtob((uint64_t)sdp->swd_nblks) >> PAGE_SHIFT;
429 0 : key = sdp->swd_keys + (SWD_KEY_SIZE(nkeys) - 1);
430 0 : do {
431 0 : if (key->refcount != 0)
432 0 : swap_key_delete(key);
433 0 : } while (key-- != sdp->swd_keys);
434 : }
435 : }
436 0 : }
437 : #endif /* UVM_SWAP_ENCRYPT */
438 :
439 : /*
440 : * swaplist functions: functions that operate on the list of swap
441 : * devices on the system.
442 : */
443 :
444 : /*
445 : * swaplist_insert: insert swap device "sdp" into the global list
446 : *
447 : * => caller must hold both swap_syscall_lock and uvm.swap_data_lock
448 : * => caller must provide a newly malloc'd swappri structure (we will
449 : * FREE it if we don't need it... this it to prevent malloc blocking
450 : * here while adding swap)
451 : */
452 : void
453 0 : swaplist_insert(struct swapdev *sdp, struct swappri *newspp, int priority)
454 : {
455 : struct swappri *spp, *pspp;
456 :
457 : /*
458 : * find entry at or after which to insert the new device.
459 : */
460 0 : for (pspp = NULL, spp = LIST_FIRST(&swap_priority); spp != NULL;
461 0 : spp = LIST_NEXT(spp, spi_swappri)) {
462 0 : if (priority <= spp->spi_priority)
463 : break;
464 : pspp = spp;
465 : }
466 :
467 : /*
468 : * new priority?
469 : */
470 0 : if (spp == NULL || spp->spi_priority != priority) {
471 : spp = newspp; /* use newspp! */
472 :
473 0 : spp->spi_priority = priority;
474 0 : TAILQ_INIT(&spp->spi_swapdev);
475 :
476 0 : if (pspp)
477 0 : LIST_INSERT_AFTER(pspp, spp, spi_swappri);
478 : else
479 0 : LIST_INSERT_HEAD(&swap_priority, spp, spi_swappri);
480 : } else {
481 : /* we don't need a new priority structure, free it */
482 0 : free(newspp, M_VMSWAP, sizeof(*newspp));
483 : }
484 :
485 : /*
486 : * priority found (or created). now insert on the priority's
487 : * tailq list and bump the total number of swapdevs.
488 : */
489 0 : sdp->swd_priority = priority;
490 0 : TAILQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
491 0 : uvmexp.nswapdev++;
492 0 : }
493 :
494 : /*
495 : * swaplist_find: find and optionally remove a swap device from the
496 : * global list.
497 : *
498 : * => caller must hold both swap_syscall_lock and uvm.swap_data_lock
499 : * => we return the swapdev we found (and removed)
500 : */
501 : struct swapdev *
502 0 : swaplist_find(struct vnode *vp, boolean_t remove)
503 : {
504 : struct swapdev *sdp;
505 : struct swappri *spp;
506 :
507 : /*
508 : * search the lists for the requested vp
509 : */
510 0 : LIST_FOREACH(spp, &swap_priority, spi_swappri) {
511 0 : TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) {
512 0 : if (sdp->swd_vp != vp)
513 : continue;
514 0 : if (remove) {
515 0 : TAILQ_REMOVE(&spp->spi_swapdev, sdp, swd_next);
516 0 : uvmexp.nswapdev--;
517 0 : }
518 0 : return (sdp);
519 : }
520 : }
521 0 : return (NULL);
522 0 : }
523 :
524 :
525 : /*
526 : * swaplist_trim: scan priority list for empty priority entries and kill
527 : * them.
528 : *
529 : * => caller must hold both swap_syscall_lock and uvm.swap_data_lock
530 : */
531 : void
532 0 : swaplist_trim(void)
533 : {
534 : struct swappri *spp, *nextspp;
535 :
536 0 : LIST_FOREACH_SAFE(spp, &swap_priority, spi_swappri, nextspp) {
537 0 : if (!TAILQ_EMPTY(&spp->spi_swapdev))
538 : continue;
539 0 : LIST_REMOVE(spp, spi_swappri);
540 0 : free(spp, M_VMSWAP, sizeof(*spp));
541 0 : }
542 0 : }
543 :
544 : /*
545 : * swapdrum_add: add a "swapdev"'s blocks into /dev/drum's area.
546 : *
547 : * => caller must hold swap_syscall_lock
548 : * => uvm.swap_data_lock should be unlocked (we may sleep)
549 : */
550 : void
551 0 : swapdrum_add(struct swapdev *sdp, int npages)
552 : {
553 0 : u_long result;
554 :
555 0 : if (extent_alloc(swapmap, npages, EX_NOALIGN, 0, EX_NOBOUNDARY,
556 : EX_WAITOK, &result))
557 0 : panic("swapdrum_add");
558 :
559 0 : sdp->swd_drumoffset = result;
560 0 : sdp->swd_drumsize = npages;
561 0 : }
562 :
563 : /*
564 : * swapdrum_getsdp: given a page offset in /dev/drum, convert it back
565 : * to the "swapdev" that maps that section of the drum.
566 : *
567 : * => each swapdev takes one big contig chunk of the drum
568 : * => caller must hold uvm.swap_data_lock
569 : */
570 : struct swapdev *
571 0 : swapdrum_getsdp(int pgno)
572 : {
573 : struct swapdev *sdp;
574 : struct swappri *spp;
575 :
576 0 : LIST_FOREACH(spp, &swap_priority, spi_swappri) {
577 0 : TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) {
578 0 : if (pgno >= sdp->swd_drumoffset &&
579 0 : pgno < (sdp->swd_drumoffset + sdp->swd_drumsize)) {
580 0 : return sdp;
581 : }
582 : }
583 : }
584 0 : return NULL;
585 0 : }
586 :
587 :
588 : /*
589 : * sys_swapctl: main entry point for swapctl(2) system call
590 : * [with two helper functions: swap_on and swap_off]
591 : */
592 : int
593 0 : sys_swapctl(struct proc *p, void *v, register_t *retval)
594 : {
595 : struct sys_swapctl_args /* {
596 : syscallarg(int) cmd;
597 : syscallarg(void *) arg;
598 : syscallarg(int) misc;
599 0 : } */ *uap = (struct sys_swapctl_args *)v;
600 : struct vnode *vp;
601 0 : struct nameidata nd;
602 : struct swappri *spp;
603 : struct swapdev *sdp;
604 : struct swapent *sep;
605 0 : char userpath[MAXPATHLEN];
606 0 : size_t len;
607 : int count, error, misc;
608 : int priority;
609 :
610 0 : misc = SCARG(uap, misc);
611 :
612 : /*
613 : * ensure serialized syscall access by grabbing the swap_syscall_lock
614 : */
615 0 : rw_enter_write(&swap_syscall_lock);
616 :
617 : /*
618 : * we handle the non-priv NSWAP and STATS request first.
619 : *
620 : * SWAP_NSWAP: return number of config'd swap devices
621 : * [can also be obtained with uvmexp sysctl]
622 : */
623 0 : if (SCARG(uap, cmd) == SWAP_NSWAP) {
624 0 : *retval = uvmexp.nswapdev;
625 : error = 0;
626 0 : goto out;
627 : }
628 :
629 : /*
630 : * SWAP_STATS: get stats on current # of configured swap devs
631 : *
632 : * note that the swap_priority list can't change as long
633 : * as we are holding the swap_syscall_lock. we don't want
634 : * to grab the uvm.swap_data_lock because we may fault&sleep during
635 : * copyout() and we don't want to be holding that lock then!
636 : */
637 0 : if (SCARG(uap, cmd) == SWAP_STATS) {
638 0 : sep = (struct swapent *)SCARG(uap, arg);
639 : count = 0;
640 :
641 0 : LIST_FOREACH(spp, &swap_priority, spi_swappri) {
642 0 : TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) {
643 0 : if (count >= misc)
644 : continue;
645 :
646 0 : sdp->swd_inuse =
647 0 : btodb((u_int64_t)sdp->swd_npginuse <<
648 : PAGE_SHIFT);
649 0 : error = copyout(&sdp->swd_se, sep,
650 : sizeof(struct swapent));
651 0 : if (error)
652 : goto out;
653 :
654 : /* now copy out the path if necessary */
655 0 : error = copyoutstr(sdp->swd_path,
656 0 : sep->se_path, sizeof(sep->se_path), NULL);
657 0 : if (error)
658 : goto out;
659 :
660 0 : count++;
661 0 : sep++;
662 0 : }
663 : }
664 :
665 0 : *retval = count;
666 : error = 0;
667 0 : goto out;
668 : }
669 :
670 : /* all other requests require superuser privs. verify. */
671 0 : if ((error = suser(p)) || (error = pledge_swapctl(p)))
672 : goto out;
673 :
674 : /*
675 : * at this point we expect a path name in arg. we will
676 : * use namei() to gain a vnode reference (vref), and lock
677 : * the vnode (VOP_LOCK).
678 : */
679 0 : error = copyinstr(SCARG(uap, arg), userpath, sizeof(userpath), &len);
680 0 : if (error)
681 : goto out;
682 0 : disk_map(userpath, userpath, sizeof(userpath), DM_OPENBLCK);
683 0 : NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, userpath, p);
684 0 : if ((error = namei(&nd)))
685 : goto out;
686 0 : vp = nd.ni_vp;
687 : /* note: "vp" is referenced and locked */
688 :
689 : error = 0; /* assume no error */
690 0 : switch(SCARG(uap, cmd)) {
691 : case SWAP_DUMPDEV:
692 0 : if (vp->v_type != VBLK) {
693 : error = ENOTBLK;
694 0 : break;
695 : }
696 0 : dumpdev = vp->v_rdev;
697 0 : break;
698 : case SWAP_CTL:
699 : /*
700 : * get new priority, remove old entry (if any) and then
701 : * reinsert it in the correct place. finally, prune out
702 : * any empty priority structures.
703 : */
704 0 : priority = SCARG(uap, misc);
705 0 : spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK);
706 0 : if ((sdp = swaplist_find(vp, 1)) == NULL) {
707 : error = ENOENT;
708 0 : } else {
709 0 : swaplist_insert(sdp, spp, priority);
710 0 : swaplist_trim();
711 : }
712 0 : if (error)
713 0 : free(spp, M_VMSWAP, sizeof(*spp));
714 : break;
715 : case SWAP_ON:
716 : /*
717 : * check for duplicates. if none found, then insert a
718 : * dummy entry on the list to prevent someone else from
719 : * trying to enable this device while we are working on
720 : * it.
721 : */
722 0 : priority = SCARG(uap, misc);
723 0 : if ((sdp = swaplist_find(vp, 0)) != NULL) {
724 : error = EBUSY;
725 0 : break;
726 : }
727 0 : sdp = malloc(sizeof *sdp, M_VMSWAP, M_WAITOK|M_ZERO);
728 0 : spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK);
729 0 : sdp->swd_flags = SWF_FAKE; /* placeholder only */
730 0 : sdp->swd_vp = vp;
731 0 : sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV;
732 :
733 : /*
734 : * XXX Is NFS elaboration necessary?
735 : */
736 0 : if (vp->v_type == VREG) {
737 0 : sdp->swd_cred = crdup(p->p_ucred);
738 0 : }
739 :
740 0 : swaplist_insert(sdp, spp, priority);
741 :
742 0 : sdp->swd_pathlen = len;
743 0 : sdp->swd_path = malloc(sdp->swd_pathlen, M_VMSWAP, M_WAITOK);
744 0 : strlcpy(sdp->swd_path, userpath, len);
745 :
746 : /*
747 : * we've now got a FAKE placeholder in the swap list.
748 : * now attempt to enable swap on it. if we fail, undo
749 : * what we've done and kill the fake entry we just inserted.
750 : * if swap_on is a success, it will clear the SWF_FAKE flag
751 : */
752 :
753 0 : if ((error = swap_on(p, sdp)) != 0) {
754 0 : (void) swaplist_find(vp, 1); /* kill fake entry */
755 0 : swaplist_trim();
756 0 : if (vp->v_type == VREG) {
757 0 : crfree(sdp->swd_cred);
758 0 : }
759 0 : free(sdp->swd_path, M_VMSWAP, sdp->swd_pathlen);
760 0 : free(sdp, M_VMSWAP, sizeof(*sdp));
761 0 : break;
762 : }
763 : break;
764 : case SWAP_OFF:
765 0 : if ((sdp = swaplist_find(vp, 0)) == NULL) {
766 : error = ENXIO;
767 0 : break;
768 : }
769 :
770 : /*
771 : * If a device isn't in use or enabled, we
772 : * can't stop swapping from it (again).
773 : */
774 0 : if ((sdp->swd_flags & (SWF_INUSE|SWF_ENABLE)) == 0) {
775 : error = EBUSY;
776 0 : break;
777 : }
778 :
779 : /*
780 : * do the real work.
781 : */
782 0 : error = swap_off(p, sdp);
783 0 : break;
784 : default:
785 : error = EINVAL;
786 0 : }
787 :
788 : /* done! release the ref gained by namei() and unlock. */
789 0 : vput(vp);
790 :
791 : out:
792 0 : rw_exit_write(&swap_syscall_lock);
793 :
794 0 : return (error);
795 0 : }
796 :
797 : /*
798 : * swap_on: attempt to enable a swapdev for swapping. note that the
799 : * swapdev is already on the global list, but disabled (marked
800 : * SWF_FAKE).
801 : *
802 : * => we avoid the start of the disk (to protect disk labels)
803 : * => caller should leave uvm.swap_data_lock unlocked, we may lock it
804 : * if needed.
805 : */
806 : int
807 0 : swap_on(struct proc *p, struct swapdev *sdp)
808 : {
809 : static int count = 0; /* static */
810 : struct vnode *vp;
811 : int error, npages, nblocks, size;
812 : long addr;
813 0 : struct vattr va;
814 : #if defined(NFSCLIENT)
815 : extern struct vops nfs_vops;
816 : #endif /* defined(NFSCLIENT) */
817 : dev_t dev;
818 :
819 : /*
820 : * we want to enable swapping on sdp. the swd_vp contains
821 : * the vnode we want (locked and ref'd), and the swd_dev
822 : * contains the dev_t of the file, if it a block device.
823 : */
824 :
825 0 : vp = sdp->swd_vp;
826 0 : dev = sdp->swd_dev;
827 :
828 : #if NVND > 0
829 : /* no swapping to vnds. */
830 0 : if (bdevsw[major(dev)].d_strategy == vndstrategy)
831 0 : return (EOPNOTSUPP);
832 : #endif
833 :
834 : /*
835 : * open the swap file (mostly useful for block device files to
836 : * let device driver know what is up).
837 : *
838 : * we skip the open/close for root on swap because the root
839 : * has already been opened when root was mounted (mountroot).
840 : */
841 0 : if (vp != rootvp) {
842 0 : if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p)))
843 0 : return (error);
844 : }
845 :
846 : /* XXX this only works for block devices */
847 : /*
848 : * we now need to determine the size of the swap area. for
849 : * block specials we can call the d_psize function.
850 : * for normal files, we must stat [get attrs].
851 : *
852 : * we put the result in nblks.
853 : * for normal files, we also want the filesystem block size
854 : * (which we get with statfs).
855 : */
856 0 : switch (vp->v_type) {
857 : case VBLK:
858 0 : if (bdevsw[major(dev)].d_psize == 0 ||
859 0 : (nblocks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) {
860 : error = ENXIO;
861 0 : goto bad;
862 : }
863 : break;
864 :
865 : case VREG:
866 0 : if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)))
867 : goto bad;
868 0 : nblocks = (int)btodb(va.va_size);
869 0 : if ((error =
870 0 : VFS_STATFS(vp->v_mount, &vp->v_mount->mnt_stat, p)) != 0)
871 : goto bad;
872 :
873 0 : sdp->swd_bsize = vp->v_mount->mnt_stat.f_iosize;
874 : /*
875 : * limit the max # of outstanding I/O requests we issue
876 : * at any one time. take it easy on NFS servers.
877 : */
878 : #if defined(NFSCLIENT)
879 0 : if (vp->v_op == &nfs_vops)
880 0 : sdp->swd_maxactive = 2; /* XXX */
881 : else
882 : #endif /* defined(NFSCLIENT) */
883 0 : sdp->swd_maxactive = 8; /* XXX */
884 0 : bufq_init(&sdp->swd_bufq, BUFQ_FIFO);
885 0 : break;
886 :
887 : default:
888 : error = ENXIO;
889 0 : goto bad;
890 : }
891 :
892 : /*
893 : * save nblocks in a safe place and convert to pages.
894 : */
895 :
896 0 : sdp->swd_nblks = nblocks;
897 0 : npages = dbtob((u_int64_t)nblocks) >> PAGE_SHIFT;
898 :
899 : /*
900 : * for block special files, we want to make sure that leave
901 : * the disklabel and bootblocks alone, so we arrange to skip
902 : * over them (arbitrarily choosing to skip PAGE_SIZE bytes).
903 : * note that because of this the "size" can be less than the
904 : * actual number of blocks on the device.
905 : */
906 0 : if (vp->v_type == VBLK) {
907 : /* we use pages 1 to (size - 1) [inclusive] */
908 0 : size = npages - 1;
909 : addr = 1;
910 0 : } else {
911 : /* we use pages 0 to (size - 1) [inclusive] */
912 : size = npages;
913 : addr = 0;
914 : }
915 :
916 : /*
917 : * make sure we have enough blocks for a reasonable sized swap
918 : * area. we want at least one page.
919 : */
920 :
921 0 : if (size < 1) {
922 : error = EINVAL;
923 0 : goto bad;
924 : }
925 :
926 : /*
927 : * now we need to allocate an extent to manage this swap device
928 : */
929 0 : snprintf(sdp->swd_exname, sizeof(sdp->swd_exname), "swap0x%04x",
930 0 : count++);
931 :
932 : /* note that extent_create's 3rd arg is inclusive, thus "- 1" */
933 0 : sdp->swd_ex = extent_create(sdp->swd_exname, 0, npages - 1, M_VMSWAP,
934 : 0, 0, EX_WAITOK);
935 : /* allocate the `saved' region from the extent so it won't be used */
936 0 : if (addr) {
937 0 : if (extent_alloc_region(sdp->swd_ex, 0, addr, EX_WAITOK))
938 0 : panic("disklabel reserve");
939 : /* XXX: is extent synchronized with swd_npginuse? */
940 : }
941 : #ifdef HIBERNATE
942 : /*
943 : * Lock down the last region of primary disk swap, in case
944 : * hibernate needs to place a signature there.
945 : */
946 0 : if (dev == swdevt[0].sw_dev && vp->v_type == VBLK && size > 3 ) {
947 0 : if (extent_alloc_region(sdp->swd_ex,
948 0 : npages - 1 - 1, 1, EX_WAITOK))
949 0 : panic("hibernate reserve");
950 : /* XXX: is extent synchronized with swd_npginuse? */
951 : }
952 : #endif
953 :
954 : /* add a ref to vp to reflect usage as a swap device. */
955 0 : vref(vp);
956 :
957 : #ifdef UVM_SWAP_ENCRYPT
958 0 : if (uvm_doswapencrypt)
959 0 : uvm_swap_initcrypt(sdp, npages);
960 : #endif
961 : /* now add the new swapdev to the drum and enable. */
962 0 : swapdrum_add(sdp, npages);
963 0 : sdp->swd_npages = size;
964 0 : sdp->swd_flags &= ~SWF_FAKE; /* going live */
965 0 : sdp->swd_flags |= (SWF_INUSE|SWF_ENABLE);
966 0 : uvmexp.swpages += size;
967 0 : return (0);
968 :
969 : bad:
970 : /* failure: close device if necessary and return error. */
971 0 : if (vp != rootvp)
972 0 : (void)VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p);
973 0 : return (error);
974 0 : }
975 :
976 : /*
977 : * swap_off: stop swapping on swapdev
978 : *
979 : * => swap data should be locked, we will unlock.
980 : */
981 : int
982 0 : swap_off(struct proc *p, struct swapdev *sdp)
983 : {
984 : int error = 0;
985 :
986 : /* disable the swap area being removed */
987 0 : sdp->swd_flags &= ~SWF_ENABLE;
988 :
989 : /*
990 : * the idea is to find all the pages that are paged out to this
991 : * device, and page them all in. in uvm, swap-backed pageable
992 : * memory can take two forms: aobjs and anons. call the
993 : * swapoff hook for each subsystem to bring in pages.
994 : */
995 :
996 0 : if (uao_swap_off(sdp->swd_drumoffset,
997 0 : sdp->swd_drumoffset + sdp->swd_drumsize) ||
998 0 : amap_swap_off(sdp->swd_drumoffset,
999 0 : sdp->swd_drumoffset + sdp->swd_drumsize)) {
1000 :
1001 : error = ENOMEM;
1002 0 : } else if (sdp->swd_npginuse > sdp->swd_npgbad) {
1003 : error = EBUSY;
1004 0 : }
1005 :
1006 0 : if (error) {
1007 0 : sdp->swd_flags |= SWF_ENABLE;
1008 0 : return (error);
1009 : }
1010 :
1011 : /*
1012 : * done with the vnode and saved creds.
1013 : * drop our ref on the vnode before calling VOP_CLOSE()
1014 : * so that spec_close() can tell if this is the last close.
1015 : */
1016 0 : if (sdp->swd_vp->v_type == VREG) {
1017 0 : crfree(sdp->swd_cred);
1018 0 : }
1019 0 : vrele(sdp->swd_vp);
1020 0 : if (sdp->swd_vp != rootvp) {
1021 0 : (void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, p->p_ucred, p);
1022 0 : }
1023 :
1024 0 : uvmexp.swpages -= sdp->swd_npages;
1025 :
1026 0 : if (swaplist_find(sdp->swd_vp, 1) == NULL)
1027 0 : panic("swap_off: swapdev not in list");
1028 0 : swaplist_trim();
1029 :
1030 : /*
1031 : * free all resources!
1032 : */
1033 0 : extent_free(swapmap, sdp->swd_drumoffset, sdp->swd_drumsize,
1034 : EX_WAITOK);
1035 0 : extent_destroy(sdp->swd_ex);
1036 : /* free sdp->swd_path ? */
1037 0 : free(sdp, M_VMSWAP, sizeof(*sdp));
1038 0 : return (0);
1039 0 : }
1040 :
1041 : /*
1042 : * /dev/drum interface and i/o functions
1043 : */
1044 :
1045 : /*
1046 : * swstrategy: perform I/O on the drum
1047 : *
1048 : * => we must map the i/o request from the drum to the correct swapdev.
1049 : */
1050 : void
1051 0 : swstrategy(struct buf *bp)
1052 : {
1053 : struct swapdev *sdp;
1054 : int s, pageno, bn;
1055 :
1056 : /*
1057 : * convert block number to swapdev. note that swapdev can't
1058 : * be yanked out from under us because we are holding resources
1059 : * in it (i.e. the blocks we are doing I/O on).
1060 : */
1061 0 : pageno = dbtob((u_int64_t)bp->b_blkno) >> PAGE_SHIFT;
1062 0 : sdp = swapdrum_getsdp(pageno);
1063 0 : if (sdp == NULL) {
1064 0 : bp->b_error = EINVAL;
1065 0 : bp->b_flags |= B_ERROR;
1066 0 : s = splbio();
1067 0 : biodone(bp);
1068 0 : splx(s);
1069 0 : return;
1070 : }
1071 :
1072 : /* convert drum page number to block number on this swapdev. */
1073 0 : pageno -= sdp->swd_drumoffset; /* page # on swapdev */
1074 0 : bn = btodb((u_int64_t)pageno << PAGE_SHIFT); /* convert to diskblock */
1075 :
1076 : /*
1077 : * for block devices we finish up here.
1078 : * for regular files we have to do more work which we delegate
1079 : * to sw_reg_strategy().
1080 : */
1081 0 : switch (sdp->swd_vp->v_type) {
1082 : default:
1083 0 : panic("swstrategy: vnode type 0x%x", sdp->swd_vp->v_type);
1084 : case VBLK:
1085 : /*
1086 : * must convert "bp" from an I/O on /dev/drum to an I/O
1087 : * on the swapdev (sdp).
1088 : */
1089 0 : s = splbio();
1090 0 : buf_replacevnode(bp, sdp->swd_vp);
1091 :
1092 0 : bp->b_blkno = bn;
1093 0 : splx(s);
1094 0 : VOP_STRATEGY(bp);
1095 0 : return;
1096 : case VREG:
1097 : /* delegate to sw_reg_strategy function. */
1098 0 : sw_reg_strategy(sdp, bp, bn);
1099 0 : return;
1100 : }
1101 : /* NOTREACHED */
1102 0 : }
1103 :
1104 : /*
1105 : * sw_reg_strategy: handle swap i/o to regular files
1106 : */
1107 : void
1108 0 : sw_reg_strategy(struct swapdev *sdp, struct buf *bp, int bn)
1109 : {
1110 0 : struct vnode *vp;
1111 : struct vndxfer *vnx;
1112 0 : daddr_t nbn;
1113 : caddr_t addr;
1114 : off_t byteoff;
1115 0 : int s, off, nra, error, sz, resid;
1116 :
1117 : /*
1118 : * allocate a vndxfer head for this transfer and point it to
1119 : * our buffer.
1120 : */
1121 0 : vnx = pool_get(&vndxfer_pool, PR_WAITOK);
1122 0 : vnx->vx_flags = VX_BUSY;
1123 0 : vnx->vx_error = 0;
1124 0 : vnx->vx_pending = 0;
1125 0 : vnx->vx_bp = bp;
1126 0 : vnx->vx_sdp = sdp;
1127 :
1128 : /*
1129 : * setup for main loop where we read filesystem blocks into
1130 : * our buffer.
1131 : */
1132 : error = 0;
1133 0 : bp->b_resid = bp->b_bcount; /* nothing transferred yet! */
1134 0 : addr = bp->b_data; /* current position in buffer */
1135 0 : byteoff = dbtob((u_int64_t)bn);
1136 :
1137 0 : for (resid = bp->b_resid; resid; resid -= sz) {
1138 : struct vndbuf *nbp;
1139 : /*
1140 : * translate byteoffset into block number. return values:
1141 : * vp = vnode of underlying device
1142 : * nbn = new block number (on underlying vnode dev)
1143 : * nra = num blocks we can read-ahead (excludes requested
1144 : * block)
1145 : */
1146 0 : nra = 0;
1147 0 : error = VOP_BMAP(sdp->swd_vp, byteoff / sdp->swd_bsize,
1148 : &vp, &nbn, &nra);
1149 :
1150 0 : if (error == 0 && nbn == -1) {
1151 : /*
1152 : * this used to just set error, but that doesn't
1153 : * do the right thing. Instead, it causes random
1154 : * memory errors. The panic() should remain until
1155 : * this condition doesn't destabilize the system.
1156 : */
1157 : #if 1
1158 0 : panic("sw_reg_strategy: swap to sparse file");
1159 : #else
1160 : error = EIO; /* failure */
1161 : #endif
1162 : }
1163 :
1164 : /*
1165 : * punt if there was an error or a hole in the file.
1166 : * we must wait for any i/o ops we have already started
1167 : * to finish before returning.
1168 : *
1169 : * XXX we could deal with holes here but it would be
1170 : * a hassle (in the write case).
1171 : */
1172 0 : if (error) {
1173 0 : s = splbio();
1174 0 : vnx->vx_error = error; /* pass error up */
1175 0 : goto out;
1176 : }
1177 :
1178 : /*
1179 : * compute the size ("sz") of this transfer (in bytes).
1180 : */
1181 0 : off = byteoff % sdp->swd_bsize;
1182 0 : sz = (1 + nra) * sdp->swd_bsize - off;
1183 0 : if (sz > resid)
1184 0 : sz = resid;
1185 :
1186 : /*
1187 : * now get a buf structure. note that the vb_buf is
1188 : * at the front of the nbp structure so that you can
1189 : * cast pointers between the two structure easily.
1190 : */
1191 0 : nbp = pool_get(&vndbuf_pool, PR_WAITOK);
1192 0 : nbp->vb_buf.b_flags = bp->b_flags | B_CALL;
1193 0 : nbp->vb_buf.b_bcount = sz;
1194 0 : nbp->vb_buf.b_bufsize = sz;
1195 0 : nbp->vb_buf.b_error = 0;
1196 0 : nbp->vb_buf.b_data = addr;
1197 0 : nbp->vb_buf.b_bq = NULL;
1198 0 : nbp->vb_buf.b_blkno = nbn + btodb(off);
1199 0 : nbp->vb_buf.b_proc = bp->b_proc;
1200 0 : nbp->vb_buf.b_iodone = sw_reg_iodone;
1201 0 : nbp->vb_buf.b_vp = NULLVP;
1202 0 : nbp->vb_buf.b_vnbufs.le_next = NOLIST;
1203 0 : LIST_INIT(&nbp->vb_buf.b_dep);
1204 :
1205 : /*
1206 : * set b_dirtyoff/end and b_validoff/end. this is
1207 : * required by the NFS client code (otherwise it will
1208 : * just discard our I/O request).
1209 : */
1210 0 : if (bp->b_dirtyend == 0) {
1211 0 : nbp->vb_buf.b_dirtyoff = 0;
1212 0 : nbp->vb_buf.b_dirtyend = sz;
1213 0 : } else {
1214 0 : nbp->vb_buf.b_dirtyoff =
1215 0 : max(0, bp->b_dirtyoff - (bp->b_bcount-resid));
1216 0 : nbp->vb_buf.b_dirtyend =
1217 0 : min(sz,
1218 0 : max(0, bp->b_dirtyend - (bp->b_bcount-resid)));
1219 : }
1220 0 : if (bp->b_validend == 0) {
1221 0 : nbp->vb_buf.b_validoff = 0;
1222 0 : nbp->vb_buf.b_validend = sz;
1223 0 : } else {
1224 0 : nbp->vb_buf.b_validoff =
1225 0 : max(0, bp->b_validoff - (bp->b_bcount-resid));
1226 0 : nbp->vb_buf.b_validend =
1227 0 : min(sz,
1228 0 : max(0, bp->b_validend - (bp->b_bcount-resid)));
1229 : }
1230 :
1231 : /* patch it back to the vnx */
1232 0 : nbp->vb_vnx = vnx;
1233 0 : task_set(&nbp->vb_task, sw_reg_iodone_internal, nbp);
1234 :
1235 0 : s = splbio();
1236 0 : if (vnx->vx_error != 0) {
1237 0 : pool_put(&vndbuf_pool, nbp);
1238 0 : goto out;
1239 : }
1240 0 : vnx->vx_pending++;
1241 :
1242 : /* assoc new buffer with underlying vnode */
1243 0 : bgetvp(vp, &nbp->vb_buf);
1244 :
1245 : /* start I/O if we are not over our limit */
1246 0 : bufq_queue(&sdp->swd_bufq, &nbp->vb_buf);
1247 0 : sw_reg_start(sdp);
1248 0 : splx(s);
1249 :
1250 : /*
1251 : * advance to the next I/O
1252 : */
1253 0 : byteoff += sz;
1254 0 : addr += sz;
1255 0 : }
1256 :
1257 0 : s = splbio();
1258 :
1259 : out: /* Arrive here at splbio */
1260 0 : vnx->vx_flags &= ~VX_BUSY;
1261 0 : if (vnx->vx_pending == 0) {
1262 0 : if (vnx->vx_error != 0) {
1263 0 : bp->b_error = vnx->vx_error;
1264 0 : bp->b_flags |= B_ERROR;
1265 0 : }
1266 0 : pool_put(&vndxfer_pool, vnx);
1267 0 : biodone(bp);
1268 0 : }
1269 0 : splx(s);
1270 0 : }
1271 :
1272 : /* sw_reg_start: start an I/O request on the requested swapdev. */
1273 : void
1274 0 : sw_reg_start(struct swapdev *sdp)
1275 : {
1276 : struct buf *bp;
1277 :
1278 : /* XXX: recursion control */
1279 0 : if ((sdp->swd_flags & SWF_BUSY) != 0)
1280 0 : return;
1281 :
1282 0 : sdp->swd_flags |= SWF_BUSY;
1283 :
1284 0 : while (sdp->swd_active < sdp->swd_maxactive) {
1285 0 : bp = bufq_dequeue(&sdp->swd_bufq);
1286 0 : if (bp == NULL)
1287 : break;
1288 :
1289 0 : sdp->swd_active++;
1290 :
1291 0 : if ((bp->b_flags & B_READ) == 0)
1292 0 : bp->b_vp->v_numoutput++;
1293 :
1294 0 : VOP_STRATEGY(bp);
1295 : }
1296 0 : sdp->swd_flags &= ~SWF_BUSY;
1297 0 : }
1298 :
1299 : /*
1300 : * sw_reg_iodone: one of our i/o's has completed and needs post-i/o cleanup
1301 : *
1302 : * => note that we can recover the vndbuf struct by casting the buf ptr
1303 : *
1304 : * XXX:
1305 : * We only put this onto a taskq here, because of the maxactive game since
1306 : * it basically requires us to call back into VOP_STRATEGY() (where we must
1307 : * be able to sleep) via sw_reg_start().
1308 : */
1309 : void
1310 0 : sw_reg_iodone(struct buf *bp)
1311 : {
1312 0 : struct vndbuf *vbp = (struct vndbuf *)bp;
1313 0 : task_add(systq, &vbp->vb_task);
1314 0 : }
1315 :
1316 : void
1317 0 : sw_reg_iodone_internal(void *xvbp)
1318 : {
1319 0 : struct vndbuf *vbp = xvbp;
1320 0 : struct vndxfer *vnx = vbp->vb_vnx;
1321 0 : struct buf *pbp = vnx->vx_bp; /* parent buffer */
1322 0 : struct swapdev *sdp = vnx->vx_sdp;
1323 : int resid, s;
1324 :
1325 0 : s = splbio();
1326 :
1327 0 : resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid;
1328 0 : pbp->b_resid -= resid;
1329 0 : vnx->vx_pending--;
1330 :
1331 : /* pass error upward */
1332 0 : if (vbp->vb_buf.b_error)
1333 0 : vnx->vx_error = vbp->vb_buf.b_error;
1334 :
1335 : /* disassociate this buffer from the vnode (if any). */
1336 0 : if (vbp->vb_buf.b_vp != NULL) {
1337 0 : brelvp(&vbp->vb_buf);
1338 0 : }
1339 :
1340 : /* kill vbp structure */
1341 0 : pool_put(&vndbuf_pool, vbp);
1342 :
1343 : /*
1344 : * wrap up this transaction if it has run to completion or, in
1345 : * case of an error, when all auxiliary buffers have returned.
1346 : */
1347 0 : if (vnx->vx_error != 0) {
1348 : /* pass error upward */
1349 0 : pbp->b_flags |= B_ERROR;
1350 0 : pbp->b_error = vnx->vx_error;
1351 0 : if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) {
1352 0 : pool_put(&vndxfer_pool, vnx);
1353 0 : biodone(pbp);
1354 0 : }
1355 0 : } else if (pbp->b_resid == 0) {
1356 0 : KASSERT(vnx->vx_pending == 0);
1357 0 : if ((vnx->vx_flags & VX_BUSY) == 0) {
1358 0 : pool_put(&vndxfer_pool, vnx);
1359 0 : biodone(pbp);
1360 0 : }
1361 : }
1362 :
1363 : /*
1364 : * done! start next swapdev I/O if one is pending
1365 : */
1366 0 : sdp->swd_active--;
1367 0 : sw_reg_start(sdp);
1368 0 : splx(s);
1369 0 : }
1370 :
1371 :
1372 : /*
1373 : * uvm_swap_alloc: allocate space on swap
1374 : *
1375 : * => allocation is done "round robin" down the priority list, as we
1376 : * allocate in a priority we "rotate" the tail queue.
1377 : * => space can be freed with uvm_swap_free
1378 : * => we return the page slot number in /dev/drum (0 == invalid slot)
1379 : * => we lock uvm.swap_data_lock
1380 : * => XXXMRG: "LESSOK" INTERFACE NEEDED TO EXTENT SYSTEM
1381 : */
1382 : int
1383 0 : uvm_swap_alloc(int *nslots, boolean_t lessok)
1384 : {
1385 : struct swapdev *sdp;
1386 : struct swappri *spp;
1387 0 : u_long result;
1388 :
1389 : /*
1390 : * no swap devices configured yet? definite failure.
1391 : */
1392 0 : if (uvmexp.nswapdev < 1)
1393 0 : return 0;
1394 :
1395 : /*
1396 : * lock data lock, convert slots into blocks, and enter loop
1397 : */
1398 :
1399 : ReTry: /* XXXMRG */
1400 0 : LIST_FOREACH(spp, &swap_priority, spi_swappri) {
1401 0 : TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) {
1402 : /* if it's not enabled, then we can't swap from it */
1403 0 : if ((sdp->swd_flags & SWF_ENABLE) == 0)
1404 : continue;
1405 0 : if (sdp->swd_npginuse + *nslots > sdp->swd_npages)
1406 : continue;
1407 0 : if (extent_alloc(sdp->swd_ex, *nslots, EX_NOALIGN, 0,
1408 : EX_NOBOUNDARY, EX_MALLOCOK|EX_NOWAIT,
1409 0 : &result) != 0) {
1410 : continue;
1411 : }
1412 :
1413 : /*
1414 : * successful allocation! now rotate the tailq.
1415 : */
1416 0 : TAILQ_REMOVE(&spp->spi_swapdev, sdp, swd_next);
1417 0 : TAILQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
1418 0 : sdp->swd_npginuse += *nslots;
1419 0 : uvmexp.swpginuse += *nslots;
1420 : /* done! return drum slot number */
1421 0 : return(result + sdp->swd_drumoffset);
1422 : }
1423 : }
1424 :
1425 : /* XXXMRG: BEGIN HACK */
1426 0 : if (*nslots > 1 && lessok) {
1427 0 : *nslots = 1;
1428 0 : goto ReTry; /* XXXMRG: ugh! extent should support this for us */
1429 : }
1430 : /* XXXMRG: END HACK */
1431 :
1432 0 : return 0; /* failed */
1433 0 : }
1434 :
1435 : /*
1436 : * uvm_swap_markbad: keep track of swap ranges where we've had i/o errors
1437 : *
1438 : * => we lock uvm.swap_data_lock
1439 : */
1440 : void
1441 0 : uvm_swap_markbad(int startslot, int nslots)
1442 : {
1443 : struct swapdev *sdp;
1444 :
1445 0 : sdp = swapdrum_getsdp(startslot);
1446 0 : if (sdp != NULL) {
1447 : /*
1448 : * we just keep track of how many pages have been marked bad
1449 : * in this device, to make everything add up in swap_off().
1450 : * we assume here that the range of slots will all be within
1451 : * one swap device.
1452 : */
1453 0 : sdp->swd_npgbad += nslots;
1454 0 : }
1455 0 : }
1456 :
1457 : /*
1458 : * uvm_swap_free: free swap slots
1459 : *
1460 : * => this can be all or part of an allocation made by uvm_swap_alloc
1461 : * => we lock uvm.swap_data_lock
1462 : */
1463 : void
1464 0 : uvm_swap_free(int startslot, int nslots)
1465 : {
1466 : struct swapdev *sdp;
1467 :
1468 : /*
1469 : * ignore attempts to free the "bad" slot.
1470 : */
1471 :
1472 0 : if (startslot == SWSLOT_BAD) {
1473 0 : return;
1474 : }
1475 :
1476 : /*
1477 : * convert drum slot offset back to sdp, free the blocks
1478 : * in the extent, and return. must hold pri lock to do
1479 : * lookup and access the extent.
1480 : */
1481 :
1482 0 : sdp = swapdrum_getsdp(startslot);
1483 0 : KASSERT(uvmexp.nswapdev >= 1);
1484 0 : KASSERT(sdp != NULL);
1485 0 : KASSERT(sdp->swd_npginuse >= nslots);
1486 0 : if (extent_free(sdp->swd_ex, startslot - sdp->swd_drumoffset, nslots,
1487 0 : EX_MALLOCOK|EX_NOWAIT) != 0) {
1488 0 : printf("warning: resource shortage: %d pages of swap lost\n",
1489 : nslots);
1490 0 : }
1491 :
1492 0 : sdp->swd_npginuse -= nslots;
1493 0 : uvmexp.swpginuse -= nslots;
1494 : #ifdef UVM_SWAP_ENCRYPT
1495 : {
1496 : int i;
1497 0 : if (swap_encrypt_initialized) {
1498 : /* Dereference keys */
1499 0 : for (i = 0; i < nslots; i++)
1500 0 : if (uvm_swap_needdecrypt(sdp, startslot + i)) {
1501 : struct swap_key *key;
1502 :
1503 0 : key = SWD_KEY(sdp, startslot + i);
1504 0 : if (key->refcount != 0)
1505 0 : SWAP_KEY_PUT(sdp, key);
1506 0 : }
1507 :
1508 : /* Mark range as not decrypt */
1509 0 : uvm_swap_markdecrypt(sdp, startslot, nslots, 0);
1510 0 : }
1511 : }
1512 : #endif /* UVM_SWAP_ENCRYPT */
1513 0 : }
1514 :
1515 : /*
1516 : * uvm_swap_put: put any number of pages into a contig place on swap
1517 : *
1518 : * => can be sync or async
1519 : */
1520 : int
1521 0 : uvm_swap_put(int swslot, struct vm_page **ppsp, int npages, int flags)
1522 : {
1523 : int result;
1524 :
1525 0 : result = uvm_swap_io(ppsp, swslot, npages, B_WRITE |
1526 0 : ((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
1527 :
1528 0 : return (result);
1529 : }
1530 :
1531 : /*
1532 : * uvm_swap_get: get a single page from swap
1533 : *
1534 : * => usually a sync op (from fault)
1535 : */
1536 : int
1537 0 : uvm_swap_get(struct vm_page *page, int swslot, int flags)
1538 : {
1539 : int result;
1540 :
1541 0 : uvmexp.nswget++;
1542 0 : KASSERT(flags & PGO_SYNCIO);
1543 0 : if (swslot == SWSLOT_BAD) {
1544 0 : return VM_PAGER_ERROR;
1545 : }
1546 :
1547 : /* this page is (about to be) no longer only in swap. */
1548 0 : uvmexp.swpgonly--;
1549 :
1550 0 : result = uvm_swap_io(&page, swslot, 1, B_READ |
1551 : ((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
1552 :
1553 0 : if (result != VM_PAGER_OK && result != VM_PAGER_PEND) {
1554 : /* oops, the read failed so it really is still only in swap. */
1555 0 : uvmexp.swpgonly++;
1556 0 : }
1557 :
1558 0 : return (result);
1559 0 : }
1560 :
1561 : /*
1562 : * uvm_swap_io: do an i/o operation to swap
1563 : */
1564 :
1565 : int
1566 0 : uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags)
1567 : {
1568 : daddr_t startblk;
1569 : struct buf *bp;
1570 : vaddr_t kva;
1571 : int result, s, mapinflags, pflag, bounce = 0, i;
1572 : boolean_t write, async;
1573 : vaddr_t bouncekva;
1574 0 : struct vm_page *tpps[MAXBSIZE >> PAGE_SHIFT];
1575 : #ifdef UVM_SWAP_ENCRYPT
1576 : struct swapdev *sdp;
1577 : int encrypt = 0;
1578 : #endif
1579 :
1580 0 : write = (flags & B_READ) == 0;
1581 0 : async = (flags & B_ASYNC) != 0;
1582 :
1583 : /* convert starting drum slot to block number */
1584 0 : startblk = btodb((u_int64_t)startslot << PAGE_SHIFT);
1585 :
1586 : /*
1587 : * first, map the pages into the kernel (XXX: currently required
1588 : * by buffer system).
1589 : */
1590 0 : mapinflags = !write ? UVMPAGER_MAPIN_READ : UVMPAGER_MAPIN_WRITE;
1591 0 : if (!async)
1592 0 : mapinflags |= UVMPAGER_MAPIN_WAITOK;
1593 0 : kva = uvm_pagermapin(pps, npages, mapinflags);
1594 0 : if (kva == 0)
1595 0 : return (VM_PAGER_AGAIN);
1596 :
1597 : #ifdef UVM_SWAP_ENCRYPT
1598 0 : if (write) {
1599 : /*
1600 : * Check if we need to do swap encryption on old pages.
1601 : * Later we need a different scheme, that swap encrypts
1602 : * all pages of a process that had at least one page swap
1603 : * encrypted. Then we might not need to copy all pages
1604 : * in the cluster, and avoid the memory overheard in
1605 : * swapping.
1606 : */
1607 0 : if (uvm_doswapencrypt)
1608 0 : encrypt = 1;
1609 : }
1610 :
1611 0 : if (swap_encrypt_initialized || encrypt) {
1612 : /*
1613 : * we need to know the swap device that we are swapping to/from
1614 : * to see if the pages need to be marked for decryption or
1615 : * actually need to be decrypted.
1616 : * XXX - does this information stay the same over the whole
1617 : * execution of this function?
1618 : */
1619 0 : sdp = swapdrum_getsdp(startslot);
1620 0 : }
1621 :
1622 : /*
1623 : * Check that we are dma capable for read (write always bounces
1624 : * through the swapencrypt anyway...
1625 : */
1626 0 : if (write && encrypt) {
1627 : bounce = 1; /* bounce through swapencrypt always */
1628 0 : } else {
1629 : #else
1630 : {
1631 : #endif
1632 :
1633 0 : for (i = 0; i < npages; i++) {
1634 0 : if (VM_PAGE_TO_PHYS(pps[i]) < dma_constraint.ucr_low ||
1635 0 : VM_PAGE_TO_PHYS(pps[i]) > dma_constraint.ucr_high) {
1636 : bounce = 1;
1637 0 : break;
1638 : }
1639 : }
1640 : }
1641 :
1642 0 : if (bounce) {
1643 : int swmapflags;
1644 :
1645 : /* We always need write access. */
1646 : swmapflags = UVMPAGER_MAPIN_READ;
1647 0 : if (!async)
1648 0 : swmapflags |= UVMPAGER_MAPIN_WAITOK;
1649 :
1650 0 : if (!uvm_swap_allocpages(tpps, npages)) {
1651 0 : uvm_pagermapout(kva, npages);
1652 0 : return (VM_PAGER_AGAIN);
1653 : }
1654 :
1655 0 : bouncekva = uvm_pagermapin(tpps, npages, swmapflags);
1656 0 : if (bouncekva == 0) {
1657 0 : uvm_pagermapout(kva, npages);
1658 0 : uvm_swap_freepages(tpps, npages);
1659 0 : return (VM_PAGER_AGAIN);
1660 : }
1661 0 : }
1662 :
1663 : /* encrypt to swap */
1664 0 : if (write && bounce) {
1665 0 : int i, opages;
1666 : caddr_t src, dst;
1667 : u_int64_t block;
1668 :
1669 0 : src = (caddr_t) kva;
1670 0 : dst = (caddr_t) bouncekva;
1671 : block = startblk;
1672 0 : for (i = 0; i < npages; i++) {
1673 : #ifdef UVM_SWAP_ENCRYPT
1674 : struct swap_key *key;
1675 :
1676 0 : if (encrypt) {
1677 0 : key = SWD_KEY(sdp, startslot + i);
1678 0 : SWAP_KEY_GET(sdp, key); /* add reference */
1679 :
1680 0 : swap_encrypt(key, src, dst, block, PAGE_SIZE);
1681 0 : block += btodb(PAGE_SIZE);
1682 0 : } else {
1683 : #else
1684 : {
1685 : #endif /* UVM_SWAP_ENCRYPT */
1686 0 : memcpy(dst, src, PAGE_SIZE);
1687 : }
1688 : /* this just tells async callbacks to free */
1689 0 : atomic_setbits_int(&tpps[i]->pg_flags, PQ_ENCRYPT);
1690 0 : src += PAGE_SIZE;
1691 0 : dst += PAGE_SIZE;
1692 : }
1693 :
1694 0 : uvm_pagermapout(kva, npages);
1695 :
1696 : /* dispose of pages we dont use anymore */
1697 0 : opages = npages;
1698 0 : uvm_pager_dropcluster(NULL, NULL, pps, &opages,
1699 : PGO_PDFREECLUST);
1700 :
1701 : kva = bouncekva;
1702 0 : }
1703 :
1704 : /*
1705 : * now allocate a buf for the i/o.
1706 : * [make sure we don't put the pagedaemon to sleep...]
1707 : */
1708 0 : pflag = (async || curproc == uvm.pagedaemon_proc) ? PR_NOWAIT :
1709 : PR_WAITOK;
1710 0 : bp = pool_get(&bufpool, pflag | PR_ZERO);
1711 :
1712 : /*
1713 : * if we failed to get a swapbuf, return "try again"
1714 : */
1715 0 : if (bp == NULL) {
1716 0 : if (write && bounce) {
1717 : #ifdef UVM_SWAP_ENCRYPT
1718 : int i;
1719 :
1720 : /* swap encrypt needs cleanup */
1721 0 : if (encrypt)
1722 0 : for (i = 0; i < npages; i++)
1723 0 : SWAP_KEY_PUT(sdp, SWD_KEY(sdp,
1724 : startslot + i));
1725 : #endif
1726 :
1727 0 : uvm_pagermapout(kva, npages);
1728 0 : uvm_swap_freepages(tpps, npages);
1729 0 : }
1730 0 : return (VM_PAGER_AGAIN);
1731 : }
1732 :
1733 : /*
1734 : * prevent ASYNC reads.
1735 : * uvm_swap_io is only called from uvm_swap_get, uvm_swap_get
1736 : * assumes that all gets are SYNCIO. Just make sure here.
1737 : * XXXARTUBC - might not be true anymore.
1738 : */
1739 0 : if (!write) {
1740 0 : flags &= ~B_ASYNC;
1741 : async = 0;
1742 0 : }
1743 :
1744 : /*
1745 : * fill in the bp. we currently route our i/o through
1746 : * /dev/drum's vnode [swapdev_vp].
1747 : */
1748 0 : bp->b_flags = B_BUSY | B_NOCACHE | B_RAW | (flags & (B_READ|B_ASYNC));
1749 0 : bp->b_proc = &proc0; /* XXX */
1750 0 : bp->b_vnbufs.le_next = NOLIST;
1751 0 : if (bounce)
1752 0 : bp->b_data = (caddr_t)bouncekva;
1753 : else
1754 0 : bp->b_data = (caddr_t)kva;
1755 0 : bp->b_bq = NULL;
1756 0 : bp->b_blkno = startblk;
1757 0 : LIST_INIT(&bp->b_dep);
1758 0 : s = splbio();
1759 0 : bp->b_vp = NULL;
1760 0 : buf_replacevnode(bp, swapdev_vp);
1761 0 : splx(s);
1762 0 : bp->b_bufsize = bp->b_bcount = (long)npages << PAGE_SHIFT;
1763 :
1764 : /*
1765 : * for pageouts we must set "dirtyoff" [NFS client code needs it].
1766 : * and we bump v_numoutput (counter of number of active outputs).
1767 : */
1768 0 : if (write) {
1769 0 : bp->b_dirtyoff = 0;
1770 0 : bp->b_dirtyend = npages << PAGE_SHIFT;
1771 : #ifdef UVM_SWAP_ENCRYPT
1772 : /* mark the pages in the drum for decryption */
1773 0 : if (swap_encrypt_initialized)
1774 0 : uvm_swap_markdecrypt(sdp, startslot, npages, encrypt);
1775 : #endif
1776 0 : s = splbio();
1777 0 : swapdev_vp->v_numoutput++;
1778 0 : splx(s);
1779 0 : }
1780 :
1781 : /* for async ops we must set up the iodone handler. */
1782 0 : if (async) {
1783 0 : bp->b_flags |= B_CALL | (curproc == uvm.pagedaemon_proc ?
1784 : B_PDAEMON : 0);
1785 0 : bp->b_iodone = uvm_aio_biodone;
1786 0 : }
1787 :
1788 : /* now we start the I/O, and if async, return. */
1789 0 : VOP_STRATEGY(bp);
1790 0 : if (async)
1791 0 : return (VM_PAGER_PEND);
1792 :
1793 : /* must be sync i/o. wait for it to finish */
1794 0 : (void) biowait(bp);
1795 0 : result = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
1796 :
1797 : /* decrypt swap */
1798 0 : if (!write && !(bp->b_flags & B_ERROR)) {
1799 : int i;
1800 0 : caddr_t data = (caddr_t)kva;
1801 : caddr_t dst = (caddr_t)kva;
1802 : u_int64_t block = startblk;
1803 :
1804 0 : if (bounce)
1805 0 : data = (caddr_t)bouncekva;
1806 :
1807 0 : for (i = 0; i < npages; i++) {
1808 : #ifdef UVM_SWAP_ENCRYPT
1809 : struct swap_key *key;
1810 :
1811 : /* Check if we need to decrypt */
1812 0 : if (swap_encrypt_initialized &&
1813 0 : uvm_swap_needdecrypt(sdp, startslot + i)) {
1814 0 : key = SWD_KEY(sdp, startslot + i);
1815 0 : if (key->refcount == 0) {
1816 : result = VM_PAGER_ERROR;
1817 0 : break;
1818 : }
1819 0 : swap_decrypt(key, data, dst, block, PAGE_SIZE);
1820 0 : } else if (bounce) {
1821 : #else
1822 : if (bounce) {
1823 : #endif
1824 0 : memcpy(dst, data, PAGE_SIZE);
1825 0 : }
1826 0 : data += PAGE_SIZE;
1827 0 : dst += PAGE_SIZE;
1828 0 : block += btodb(PAGE_SIZE);
1829 0 : }
1830 0 : if (bounce)
1831 0 : uvm_pagermapout(bouncekva, npages);
1832 0 : }
1833 : /* kill the pager mapping */
1834 0 : uvm_pagermapout(kva, npages);
1835 :
1836 : /* Not anymore needed, free after encryption/bouncing */
1837 0 : if (!write && bounce)
1838 0 : uvm_swap_freepages(tpps, npages);
1839 :
1840 : /* now dispose of the buf */
1841 0 : s = splbio();
1842 0 : if (bp->b_vp)
1843 0 : brelvp(bp);
1844 :
1845 0 : if (write && bp->b_vp)
1846 0 : vwakeup(bp->b_vp);
1847 0 : pool_put(&bufpool, bp);
1848 0 : splx(s);
1849 :
1850 : /* finally return. */
1851 0 : return (result);
1852 0 : }
1853 :
1854 : void
1855 0 : swapmount(void)
1856 : {
1857 : struct swapdev *sdp;
1858 : struct swappri *spp;
1859 0 : struct vnode *vp;
1860 0 : dev_t swap_dev = swdevt[0].sw_dev;
1861 : char *nam;
1862 0 : char path[MNAMELEN + 1];
1863 :
1864 : /*
1865 : * No locking here since we happen to know that we will just be called
1866 : * once before any other process has forked.
1867 : */
1868 0 : if (swap_dev == NODEV)
1869 0 : return;
1870 :
1871 : #if defined(NFSCLIENT)
1872 0 : if (swap_dev == NETDEV) {
1873 : extern struct nfs_diskless nfs_diskless;
1874 :
1875 0 : snprintf(path, sizeof(path), "%s",
1876 : nfs_diskless.nd_swap.ndm_host);
1877 0 : vp = nfs_diskless.sw_vp;
1878 0 : goto gotit;
1879 : } else
1880 : #endif
1881 0 : if (bdevvp(swap_dev, &vp))
1882 0 : return;
1883 :
1884 : /* Construct a potential path to swap */
1885 0 : if ((nam = findblkname(major(swap_dev))))
1886 0 : snprintf(path, sizeof(path), "/dev/%s%d%c", nam,
1887 0 : DISKUNIT(swap_dev), 'a' + DISKPART(swap_dev));
1888 : else
1889 0 : snprintf(path, sizeof(path), "blkdev0x%x",
1890 : swap_dev);
1891 :
1892 : #if defined(NFSCLIENT)
1893 : gotit:
1894 : #endif
1895 0 : sdp = malloc(sizeof(*sdp), M_VMSWAP, M_WAITOK|M_ZERO);
1896 0 : spp = malloc(sizeof(*spp), M_VMSWAP, M_WAITOK);
1897 :
1898 0 : sdp->swd_flags = SWF_FAKE;
1899 0 : sdp->swd_dev = swap_dev;
1900 :
1901 0 : sdp->swd_pathlen = strlen(path) + 1;
1902 0 : sdp->swd_path = malloc(sdp->swd_pathlen, M_VMSWAP, M_WAITOK | M_ZERO);
1903 0 : strlcpy(sdp->swd_path, path, sdp->swd_pathlen);
1904 :
1905 0 : sdp->swd_vp = vp;
1906 :
1907 0 : swaplist_insert(sdp, spp, 0);
1908 :
1909 0 : if (swap_on(curproc, sdp)) {
1910 0 : swaplist_find(vp, 1);
1911 0 : swaplist_trim();
1912 0 : vput(sdp->swd_vp);
1913 0 : free(sdp->swd_path, M_VMSWAP, sdp->swd_pathlen);
1914 0 : free(sdp, M_VMSWAP, sizeof(*sdp));
1915 0 : return;
1916 : }
1917 0 : }
1918 :
1919 : #ifdef HIBERNATE
1920 : int
1921 0 : uvm_hibswap(dev_t dev, u_long *sp, u_long *ep)
1922 : {
1923 : struct swapdev *sdp, *swd = NULL;
1924 : struct swappri *spp;
1925 : struct extent_region *exr, *exrn;
1926 : u_long start = 0, end = 0, size = 0;
1927 :
1928 : /* no swap devices configured yet? */
1929 0 : if (uvmexp.nswapdev < 1 || dev != swdevt[0].sw_dev)
1930 0 : return (1);
1931 :
1932 0 : LIST_FOREACH(spp, &swap_priority, spi_swappri) {
1933 0 : TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) {
1934 0 : if (sdp->swd_dev == dev)
1935 0 : swd = sdp;
1936 : }
1937 : }
1938 :
1939 0 : if (swd == NULL || (swd->swd_flags & SWF_ENABLE) == 0)
1940 0 : return (1);
1941 :
1942 0 : LIST_FOREACH(exr, &swd->swd_ex->ex_regions, er_link) {
1943 : u_long gapstart, gapend, gapsize;
1944 :
1945 0 : gapstart = exr->er_end + 1;
1946 0 : exrn = LIST_NEXT(exr, er_link);
1947 0 : if (!exrn)
1948 0 : break;
1949 0 : gapend = exrn->er_start - 1;
1950 0 : gapsize = gapend - gapstart;
1951 0 : if (gapsize > size) {
1952 : start = gapstart;
1953 : end = gapend;
1954 : size = gapsize;
1955 0 : }
1956 0 : }
1957 :
1958 0 : if (size) {
1959 0 : *sp = start;
1960 0 : *ep = end;
1961 0 : return (0);
1962 : }
1963 0 : return (1);
1964 0 : }
1965 : #endif /* HIBERNATE */
|