Line data Source code
1 : /* $OpenBSD: uvm_pdaemon.c,v 1.79 2018/01/18 18:08:51 bluhm Exp $ */
2 : /* $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $ */
3 :
4 : /*
5 : * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 : * Copyright (c) 1991, 1993, The Regents of the University of California.
7 : *
8 : * All rights reserved.
9 : *
10 : * This code is derived from software contributed to Berkeley by
11 : * The Mach Operating System project at Carnegie-Mellon University.
12 : *
13 : * Redistribution and use in source and binary forms, with or without
14 : * modification, are permitted provided that the following conditions
15 : * are met:
16 : * 1. Redistributions of source code must retain the above copyright
17 : * notice, this list of conditions and the following disclaimer.
18 : * 2. Redistributions in binary form must reproduce the above copyright
19 : * notice, this list of conditions and the following disclaimer in the
20 : * documentation and/or other materials provided with the distribution.
21 : * 3. Neither the name of the University nor the names of its contributors
22 : * may be used to endorse or promote products derived from this software
23 : * without specific prior written permission.
24 : *
25 : * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 : * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 : * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 : * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 : * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 : * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 : * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 : * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 : * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 : * SUCH DAMAGE.
36 : *
37 : * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
38 : * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
39 : *
40 : *
41 : * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42 : * All rights reserved.
43 : *
44 : * Permission to use, copy, modify and distribute this software and
45 : * its documentation is hereby granted, provided that both the copyright
46 : * notice and this permission notice appear in all copies of the
47 : * software, derivative works or modified versions, and any portions
48 : * thereof, and that both notices appear in supporting documentation.
49 : *
50 : * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 : * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 : * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 : *
54 : * Carnegie Mellon requests users of this software to return to
55 : *
56 : * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
57 : * School of Computer Science
58 : * Carnegie Mellon University
59 : * Pittsburgh PA 15213-3890
60 : *
61 : * any improvements or extensions that they make and grant Carnegie the
62 : * rights to redistribute these changes.
63 : */
64 :
65 : /*
66 : * uvm_pdaemon.c: the page daemon
67 : */
68 :
69 : #include <sys/param.h>
70 : #include <sys/systm.h>
71 : #include <sys/kernel.h>
72 : #include <sys/pool.h>
73 : #include <sys/proc.h>
74 : #include <sys/buf.h>
75 : #include <sys/mount.h>
76 : #include <sys/atomic.h>
77 :
78 : #ifdef HIBERNATE
79 : #include <sys/hibernate.h>
80 : #endif
81 :
82 : #include <uvm/uvm.h>
83 :
84 : /*
85 : * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate
86 : * in a pass thru the inactive list when swap is full. the value should be
87 : * "small"... if it's too large we'll cycle the active pages thru the inactive
88 : * queue too quickly to for them to be referenced and avoid being freed.
89 : */
90 :
91 : #define UVMPD_NUMDIRTYREACTS 16
92 :
93 :
94 : /*
95 : * local prototypes
96 : */
97 :
98 : void uvmpd_scan(void);
99 : boolean_t uvmpd_scan_inactive(struct pglist *);
100 : void uvmpd_tune(void);
101 : void uvmpd_drop(struct pglist *);
102 :
103 : /*
104 : * uvm_wait: wait (sleep) for the page daemon to free some pages
105 : *
106 : * => should be called with all locks released
107 : * => should _not_ be called by the page daemon (to avoid deadlock)
108 : */
109 :
110 : void
111 0 : uvm_wait(const char *wmsg)
112 : {
113 : int timo = 0;
114 :
115 : #ifdef DIAGNOSTIC
116 0 : if (curproc == &proc0)
117 0 : panic("%s: cannot sleep for memory during boot", __func__);
118 : #endif
119 :
120 : /* check for page daemon going to sleep (waiting for itself) */
121 0 : if (curproc == uvm.pagedaemon_proc) {
122 0 : printf("uvm_wait emergency bufbackoff\n");
123 0 : if (bufbackoff(NULL, 4) == 0)
124 0 : return;
125 : /*
126 : * now we have a problem: the pagedaemon wants to go to
127 : * sleep until it frees more memory. but how can it
128 : * free more memory if it is asleep? that is a deadlock.
129 : * we have two options:
130 : * [1] panic now
131 : * [2] put a timeout on the sleep, thus causing the
132 : * pagedaemon to only pause (rather than sleep forever)
133 : *
134 : * note that option [2] will only help us if we get lucky
135 : * and some other process on the system breaks the deadlock
136 : * by exiting or freeing memory (thus allowing the pagedaemon
137 : * to continue). for now we panic if DEBUG is defined,
138 : * otherwise we hope for the best with option [2] (better
139 : * yet, this should never happen in the first place!).
140 : */
141 :
142 0 : printf("pagedaemon: deadlock detected!\n");
143 0 : timo = hz >> 3; /* set timeout */
144 : #if defined(DEBUG)
145 : /* DEBUG: panic so we can debug it */
146 : panic("pagedaemon deadlock");
147 : #endif
148 0 : }
149 :
150 0 : uvm_lock_fpageq();
151 0 : wakeup(&uvm.pagedaemon); /* wake the daemon! */
152 0 : msleep(&uvmexp.free, &uvm.fpageqlock, PVM | PNORELOCK, wmsg, timo);
153 0 : }
154 :
155 : /*
156 : * uvmpd_tune: tune paging parameters
157 : *
158 : * => called whenever memory is added to (or removed from?) the system
159 : * => caller must call with page queues locked
160 : */
161 :
162 : void
163 0 : uvmpd_tune(void)
164 : {
165 :
166 0 : uvmexp.freemin = uvmexp.npages / 30;
167 :
168 : /* between 16k and 512k */
169 : /* XXX: what are these values good for? */
170 0 : uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT);
171 : #if 0
172 : uvmexp.freemin = min(uvmexp.freemin, (512*1024) >> PAGE_SHIFT);
173 : #endif
174 :
175 : /* Make sure there's always a user page free. */
176 0 : if (uvmexp.freemin < uvmexp.reserve_kernel + 1)
177 0 : uvmexp.freemin = uvmexp.reserve_kernel + 1;
178 :
179 0 : uvmexp.freetarg = (uvmexp.freemin * 4) / 3;
180 0 : if (uvmexp.freetarg <= uvmexp.freemin)
181 0 : uvmexp.freetarg = uvmexp.freemin + 1;
182 :
183 : /* uvmexp.inactarg: computed in main daemon loop */
184 :
185 0 : uvmexp.wiredmax = uvmexp.npages / 3;
186 0 : }
187 :
188 : /*
189 : * uvm_pageout: the main loop for the pagedaemon
190 : */
191 : void
192 0 : uvm_pageout(void *arg)
193 : {
194 0 : struct uvm_constraint_range constraint;
195 : struct uvm_pmalloc *pma;
196 : int work_done;
197 : int npages = 0;
198 :
199 : /* ensure correct priority and set paging parameters... */
200 0 : uvm.pagedaemon_proc = curproc;
201 0 : (void) spl0();
202 0 : uvm_lock_pageq();
203 0 : npages = uvmexp.npages;
204 0 : uvmpd_tune();
205 0 : uvm_unlock_pageq();
206 :
207 0 : for (;;) {
208 : long size;
209 : work_done = 0; /* No work done this iteration. */
210 :
211 0 : uvm_lock_fpageq();
212 :
213 0 : if (TAILQ_EMPTY(&uvm.pmr_control.allocs)) {
214 0 : msleep(&uvm.pagedaemon, &uvm.fpageqlock, PVM,
215 : "pgdaemon", 0);
216 0 : uvmexp.pdwoke++;
217 0 : }
218 :
219 0 : if ((pma = TAILQ_FIRST(&uvm.pmr_control.allocs)) != NULL) {
220 0 : pma->pm_flags |= UVM_PMA_BUSY;
221 0 : constraint = pma->pm_constraint;
222 0 : } else
223 0 : constraint = no_constraint;
224 :
225 0 : uvm_unlock_fpageq();
226 :
227 : /* now lock page queues and recompute inactive count */
228 0 : uvm_lock_pageq();
229 0 : if (npages != uvmexp.npages) { /* check for new pages? */
230 : npages = uvmexp.npages;
231 0 : uvmpd_tune();
232 0 : }
233 :
234 0 : uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
235 0 : if (uvmexp.inactarg <= uvmexp.freetarg) {
236 0 : uvmexp.inactarg = uvmexp.freetarg + 1;
237 0 : }
238 :
239 : /* Reclaim pages from the buffer cache if possible. */
240 : size = 0;
241 0 : if (pma != NULL)
242 0 : size += pma->pm_size >> PAGE_SHIFT;
243 0 : if (uvmexp.free - BUFPAGES_DEFICIT < uvmexp.freetarg)
244 0 : size += uvmexp.freetarg - (uvmexp.free -
245 0 : BUFPAGES_DEFICIT);
246 0 : uvm_unlock_pageq();
247 0 : (void) bufbackoff(&constraint, size * 2);
248 0 : uvm_lock_pageq();
249 :
250 : /* Scan if needed to meet our targets. */
251 0 : if (pma != NULL ||
252 0 : ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) ||
253 0 : ((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) {
254 0 : uvmpd_scan();
255 : work_done = 1; /* XXX we hope... */
256 0 : }
257 :
258 : /*
259 : * if there's any free memory to be had,
260 : * wake up any waiters.
261 : */
262 0 : uvm_lock_fpageq();
263 0 : if (uvmexp.free > uvmexp.reserve_kernel ||
264 0 : uvmexp.paging == 0) {
265 0 : wakeup(&uvmexp.free);
266 0 : }
267 :
268 0 : if (pma != NULL) {
269 0 : pma->pm_flags &= ~UVM_PMA_BUSY;
270 0 : if (!work_done)
271 0 : pma->pm_flags |= UVM_PMA_FAIL;
272 0 : if (pma->pm_flags & (UVM_PMA_FAIL | UVM_PMA_FREED)) {
273 0 : pma->pm_flags &= ~UVM_PMA_LINKED;
274 0 : TAILQ_REMOVE(&uvm.pmr_control.allocs, pma,
275 : pmq);
276 0 : }
277 0 : wakeup(pma);
278 0 : }
279 0 : uvm_unlock_fpageq();
280 :
281 : /* scan done. unlock page queues (only lock we are holding) */
282 0 : uvm_unlock_pageq();
283 :
284 0 : sched_pause(yield);
285 : }
286 : /*NOTREACHED*/
287 : }
288 :
289 :
290 : /*
291 : * uvm_aiodone_daemon: main loop for the aiodone daemon.
292 : */
293 : void
294 0 : uvm_aiodone_daemon(void *arg)
295 : {
296 : int s, free;
297 : struct buf *bp, *nbp;
298 :
299 0 : uvm.aiodoned_proc = curproc;
300 :
301 0 : for (;;) {
302 : /*
303 : * Check for done aio structures. If we've got structures to
304 : * process, do so. Otherwise sleep while avoiding races.
305 : */
306 0 : mtx_enter(&uvm.aiodoned_lock);
307 0 : while ((bp = TAILQ_FIRST(&uvm.aio_done)) == NULL)
308 0 : msleep(&uvm.aiodoned, &uvm.aiodoned_lock,
309 : PVM, "aiodoned", 0);
310 : /* Take the list for ourselves. */
311 0 : TAILQ_INIT(&uvm.aio_done);
312 0 : mtx_leave(&uvm.aiodoned_lock);
313 :
314 : /* process each i/o that's done. */
315 0 : free = uvmexp.free;
316 0 : while (bp != NULL) {
317 0 : if (bp->b_flags & B_PDAEMON) {
318 0 : uvmexp.paging -= bp->b_bufsize >> PAGE_SHIFT;
319 0 : }
320 0 : nbp = TAILQ_NEXT(bp, b_freelist);
321 0 : s = splbio(); /* b_iodone must by called at splbio */
322 0 : (*bp->b_iodone)(bp);
323 0 : splx(s);
324 : bp = nbp;
325 :
326 0 : sched_pause(yield);
327 : }
328 0 : uvm_lock_fpageq();
329 0 : wakeup(free <= uvmexp.reserve_kernel ? &uvm.pagedaemon :
330 : &uvmexp.free);
331 0 : uvm_unlock_fpageq();
332 : }
333 : }
334 :
335 :
336 :
337 : /*
338 : * uvmpd_scan_inactive: scan an inactive list for pages to clean or free.
339 : *
340 : * => called with page queues locked
341 : * => we work on meeting our free target by converting inactive pages
342 : * into free pages.
343 : * => we handle the building of swap-backed clusters
344 : * => we return TRUE if we are exiting because we met our target
345 : */
346 :
347 : boolean_t
348 0 : uvmpd_scan_inactive(struct pglist *pglst)
349 : {
350 : boolean_t retval = FALSE; /* assume we haven't hit target */
351 : int free, result;
352 : struct vm_page *p, *nextpg;
353 : struct uvm_object *uobj;
354 0 : struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp;
355 0 : int npages;
356 0 : struct vm_page *swpps[MAXBSIZE >> PAGE_SHIFT]; /* XXX: see below */
357 0 : int swnpages, swcpages; /* XXX: see below */
358 : int swslot;
359 : struct vm_anon *anon;
360 : boolean_t swap_backed;
361 : vaddr_t start;
362 : int dirtyreacts;
363 :
364 : /*
365 : * note: we currently keep swap-backed pages on a separate inactive
366 : * list from object-backed pages. however, merging the two lists
367 : * back together again hasn't been ruled out. thus, we keep our
368 : * swap cluster in "swpps" rather than in pps (allows us to mix
369 : * clustering types in the event of a mixed inactive queue).
370 : */
371 : /*
372 : * swslot is non-zero if we are building a swap cluster. we want
373 : * to stay in the loop while we have a page to scan or we have
374 : * a swap-cluster to build.
375 : */
376 : swslot = 0;
377 0 : swnpages = swcpages = 0;
378 : free = 0;
379 : dirtyreacts = 0;
380 :
381 0 : for (p = TAILQ_FIRST(pglst); p != NULL || swslot != 0; p = nextpg) {
382 : /*
383 : * note that p can be NULL iff we have traversed the whole
384 : * list and need to do one final swap-backed clustered pageout.
385 : */
386 : uobj = NULL;
387 : anon = NULL;
388 :
389 0 : if (p) {
390 : /*
391 : * update our copy of "free" and see if we've met
392 : * our target
393 : */
394 0 : free = uvmexp.free - BUFPAGES_DEFICIT;
395 :
396 0 : if (free + uvmexp.paging >= uvmexp.freetarg << 2 ||
397 0 : dirtyreacts == UVMPD_NUMDIRTYREACTS) {
398 : retval = TRUE;
399 :
400 0 : if (swslot == 0) {
401 : /* exit now if no swap-i/o pending */
402 : break;
403 : }
404 :
405 : /* set p to null to signal final swap i/o */
406 : p = NULL;
407 0 : }
408 : }
409 :
410 0 : if (p) { /* if (we have a new page to consider) */
411 : /*
412 : * we are below target and have a new page to consider.
413 : */
414 0 : uvmexp.pdscans++;
415 0 : nextpg = TAILQ_NEXT(p, pageq);
416 :
417 : /*
418 : * move referenced pages back to active queue and
419 : * skip to next page (unlikely to happen since
420 : * inactive pages shouldn't have any valid mappings
421 : * and we cleared reference before deactivating).
422 : */
423 :
424 0 : if (pmap_is_referenced(p)) {
425 0 : uvm_pageactivate(p);
426 0 : uvmexp.pdreact++;
427 0 : continue;
428 : }
429 :
430 0 : if (p->pg_flags & PQ_ANON) {
431 0 : anon = p->uanon;
432 0 : KASSERT(anon != NULL);
433 0 : if (p->pg_flags & PG_BUSY) {
434 0 : uvmexp.pdbusy++;
435 : /* someone else owns page, skip it */
436 0 : continue;
437 : }
438 0 : uvmexp.pdanscan++;
439 0 : } else {
440 0 : uobj = p->uobject;
441 0 : KASSERT(uobj != NULL);
442 0 : if (p->pg_flags & PG_BUSY) {
443 0 : uvmexp.pdbusy++;
444 : /* someone else owns page, skip it */
445 0 : continue;
446 : }
447 0 : uvmexp.pdobscan++;
448 : }
449 :
450 : /*
451 : * we now have the page queues locked.
452 : * the page is not busy. if the page is clean we
453 : * can free it now and continue.
454 : */
455 0 : if (p->pg_flags & PG_CLEAN) {
456 0 : if (p->pg_flags & PQ_SWAPBACKED) {
457 : /* this page now lives only in swap */
458 0 : uvmexp.swpgonly++;
459 0 : }
460 :
461 : /* zap all mappings with pmap_page_protect... */
462 0 : pmap_page_protect(p, PROT_NONE);
463 0 : uvm_pagefree(p);
464 0 : uvmexp.pdfreed++;
465 :
466 0 : if (anon) {
467 :
468 : /*
469 : * an anonymous page can only be clean
470 : * if it has backing store assigned.
471 : */
472 :
473 0 : KASSERT(anon->an_swslot != 0);
474 :
475 : /* remove from object */
476 0 : anon->an_page = NULL;
477 0 : }
478 : continue;
479 : }
480 :
481 : /*
482 : * this page is dirty, skip it if we'll have met our
483 : * free target when all the current pageouts complete.
484 : */
485 0 : if (free + uvmexp.paging > uvmexp.freetarg << 2) {
486 : continue;
487 : }
488 :
489 : /*
490 : * this page is dirty, but we can't page it out
491 : * since all pages in swap are only in swap.
492 : * reactivate it so that we eventually cycle
493 : * all pages thru the inactive queue.
494 : */
495 0 : KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
496 0 : if ((p->pg_flags & PQ_SWAPBACKED) &&
497 0 : uvmexp.swpgonly == uvmexp.swpages) {
498 0 : dirtyreacts++;
499 0 : uvm_pageactivate(p);
500 0 : continue;
501 : }
502 :
503 : /*
504 : * if the page is swap-backed and dirty and swap space
505 : * is full, free any swap allocated to the page
506 : * so that other pages can be paged out.
507 : */
508 0 : KASSERT(uvmexp.swpginuse <= uvmexp.swpages);
509 0 : if ((p->pg_flags & PQ_SWAPBACKED) &&
510 0 : uvmexp.swpginuse == uvmexp.swpages) {
511 :
512 0 : if ((p->pg_flags & PQ_ANON) &&
513 0 : p->uanon->an_swslot) {
514 0 : uvm_swap_free(p->uanon->an_swslot, 1);
515 0 : p->uanon->an_swslot = 0;
516 0 : }
517 0 : if (p->pg_flags & PQ_AOBJ) {
518 0 : uao_dropswap(p->uobject,
519 0 : p->offset >> PAGE_SHIFT);
520 0 : }
521 : }
522 :
523 : /*
524 : * the page we are looking at is dirty. we must
525 : * clean it before it can be freed. to do this we
526 : * first mark the page busy so that no one else will
527 : * touch the page. we write protect all the mappings
528 : * of the page so that no one touches it while it is
529 : * in I/O.
530 : */
531 :
532 0 : swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0);
533 0 : atomic_setbits_int(&p->pg_flags, PG_BUSY);
534 : UVM_PAGE_OWN(p, "scan_inactive");
535 0 : pmap_page_protect(p, PROT_READ);
536 0 : uvmexp.pgswapout++;
537 :
538 : /*
539 : * for swap-backed pages we need to (re)allocate
540 : * swap space.
541 : */
542 0 : if (swap_backed) {
543 : /* free old swap slot (if any) */
544 0 : if (anon) {
545 0 : if (anon->an_swslot) {
546 0 : uvm_swap_free(anon->an_swslot,
547 : 1);
548 0 : anon->an_swslot = 0;
549 0 : }
550 : } else {
551 0 : uao_dropswap(uobj,
552 0 : p->offset >> PAGE_SHIFT);
553 : }
554 :
555 : /* start new cluster (if necessary) */
556 0 : if (swslot == 0) {
557 0 : swnpages = MAXBSIZE >> PAGE_SHIFT;
558 0 : swslot = uvm_swap_alloc(&swnpages,
559 : TRUE);
560 0 : if (swslot == 0) {
561 : /* no swap? give up! */
562 0 : atomic_clearbits_int(
563 : &p->pg_flags,
564 : PG_BUSY);
565 : UVM_PAGE_OWN(p, NULL);
566 0 : continue;
567 : }
568 : swcpages = 0; /* cluster is empty */
569 0 : }
570 :
571 : /* add block to cluster */
572 0 : swpps[swcpages] = p;
573 0 : if (anon)
574 0 : anon->an_swslot = swslot + swcpages;
575 : else
576 0 : uao_set_swslot(uobj,
577 0 : p->offset >> PAGE_SHIFT,
578 0 : swslot + swcpages);
579 0 : swcpages++;
580 0 : }
581 : } else {
582 : /* if p == NULL we must be doing a last swap i/o */
583 : swap_backed = TRUE;
584 : }
585 :
586 : /*
587 : * now consider doing the pageout.
588 : *
589 : * for swap-backed pages, we do the pageout if we have either
590 : * filled the cluster (in which case (swnpages == swcpages) or
591 : * run out of pages (p == NULL).
592 : *
593 : * for object pages, we always do the pageout.
594 : */
595 0 : if (swap_backed) {
596 0 : if (p) { /* if we just added a page to cluster */
597 : /* cluster not full yet? */
598 0 : if (swcpages < swnpages)
599 : continue;
600 : }
601 :
602 : /* starting I/O now... set up for it */
603 0 : npages = swcpages;
604 0 : ppsp = swpps;
605 : /* for swap-backed pages only */
606 0 : start = (vaddr_t) swslot;
607 :
608 : /* if this is final pageout we could have a few
609 : * extra swap blocks */
610 0 : if (swcpages < swnpages) {
611 0 : uvm_swap_free(swslot + swcpages,
612 0 : (swnpages - swcpages));
613 0 : }
614 : } else {
615 : /* normal object pageout */
616 0 : ppsp = pps;
617 0 : npages = sizeof(pps) / sizeof(struct vm_page *);
618 : /* not looked at because PGO_ALLPAGES is set */
619 : start = 0;
620 : }
621 :
622 : /*
623 : * now do the pageout.
624 : *
625 : * for swap_backed pages we have already built the cluster.
626 : * for !swap_backed pages, uvm_pager_put will call the object's
627 : * "make put cluster" function to build a cluster on our behalf.
628 : *
629 : * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct
630 : * it to free the cluster pages for us on a successful I/O (it
631 : * always does this for un-successful I/O requests). this
632 : * allows us to do clustered pageout without having to deal
633 : * with cluster pages at this level.
634 : *
635 : * note locking semantics of uvm_pager_put with PGO_PDFREECLUST:
636 : * IN: locked: page queues
637 : * OUT: locked:
638 : * !locked: pageqs
639 : */
640 :
641 0 : uvmexp.pdpageouts++;
642 0 : result = uvm_pager_put(swap_backed ? NULL : uobj, p,
643 : &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
644 :
645 : /*
646 : * if we did i/o to swap, zero swslot to indicate that we are
647 : * no longer building a swap-backed cluster.
648 : */
649 :
650 0 : if (swap_backed)
651 0 : swslot = 0; /* done with this cluster */
652 :
653 : /*
654 : * first, we check for VM_PAGER_PEND which means that the
655 : * async I/O is in progress and the async I/O done routine
656 : * will clean up after us. in this case we move on to the
657 : * next page.
658 : *
659 : * there is a very remote chance that the pending async i/o can
660 : * finish _before_ we get here. if that happens, our page "p"
661 : * may no longer be on the inactive queue. so we verify this
662 : * when determining the next page (starting over at the head if
663 : * we've lost our inactive page).
664 : */
665 :
666 0 : if (result == VM_PAGER_PEND) {
667 0 : uvmexp.paging += npages;
668 0 : uvm_lock_pageq();
669 0 : uvmexp.pdpending++;
670 0 : if (p) {
671 0 : if (p->pg_flags & PQ_INACTIVE)
672 0 : nextpg = TAILQ_NEXT(p, pageq);
673 : else
674 0 : nextpg = TAILQ_FIRST(pglst);
675 : } else {
676 : nextpg = NULL;
677 : }
678 : continue;
679 : }
680 :
681 : /* clean up "p" if we have one */
682 0 : if (p) {
683 : /*
684 : * the I/O request to "p" is done and uvm_pager_put
685 : * has freed any cluster pages it may have allocated
686 : * during I/O. all that is left for us to do is
687 : * clean up page "p" (which is still PG_BUSY).
688 : *
689 : * our result could be one of the following:
690 : * VM_PAGER_OK: successful pageout
691 : *
692 : * VM_PAGER_AGAIN: tmp resource shortage, we skip
693 : * to next page
694 : * VM_PAGER_{FAIL,ERROR,BAD}: an error. we
695 : * "reactivate" page to get it out of the way (it
696 : * will eventually drift back into the inactive
697 : * queue for a retry).
698 : * VM_PAGER_UNLOCK: should never see this as it is
699 : * only valid for "get" operations
700 : */
701 :
702 : /* relock p's object: page queues not lock yet, so
703 : * no need for "try" */
704 :
705 : #ifdef DIAGNOSTIC
706 0 : if (result == VM_PAGER_UNLOCK)
707 0 : panic("pagedaemon: pageout returned "
708 : "invalid 'unlock' code");
709 : #endif
710 :
711 : /* handle PG_WANTED now */
712 0 : if (p->pg_flags & PG_WANTED)
713 0 : wakeup(p);
714 :
715 0 : atomic_clearbits_int(&p->pg_flags, PG_BUSY|PG_WANTED);
716 : UVM_PAGE_OWN(p, NULL);
717 :
718 : /* released during I/O? Can only happen for anons */
719 0 : if (p->pg_flags & PG_RELEASED) {
720 0 : KASSERT(anon != NULL);
721 : /*
722 : * remove page so we can get nextpg,
723 : * also zero out anon so we don't use
724 : * it after the free.
725 : */
726 0 : anon->an_page = NULL;
727 0 : p->uanon = NULL;
728 :
729 0 : uvm_anfree(anon); /* kills anon */
730 0 : pmap_page_protect(p, PROT_NONE);
731 : anon = NULL;
732 0 : uvm_lock_pageq();
733 0 : nextpg = TAILQ_NEXT(p, pageq);
734 : /* free released page */
735 0 : uvm_pagefree(p);
736 0 : } else { /* page was not released during I/O */
737 0 : uvm_lock_pageq();
738 0 : nextpg = TAILQ_NEXT(p, pageq);
739 0 : if (result != VM_PAGER_OK) {
740 : /* pageout was a failure... */
741 0 : if (result != VM_PAGER_AGAIN)
742 0 : uvm_pageactivate(p);
743 0 : pmap_clear_reference(p);
744 : /* XXXCDC: if (swap_backed) FREE p's
745 : * swap block? */
746 0 : } else {
747 : /* pageout was a success... */
748 0 : pmap_clear_reference(p);
749 0 : pmap_clear_modify(p);
750 0 : atomic_setbits_int(&p->pg_flags,
751 : PG_CLEAN);
752 : }
753 : }
754 :
755 : /*
756 : * drop object lock (if there is an object left). do
757 : * a safety check of nextpg to make sure it is on the
758 : * inactive queue (it should be since PG_BUSY pages on
759 : * the inactive queue can't be re-queued [note: not
760 : * true for active queue]).
761 : */
762 :
763 0 : if (nextpg && (nextpg->pg_flags & PQ_INACTIVE) == 0) {
764 0 : nextpg = TAILQ_FIRST(pglst); /* reload! */
765 0 : }
766 : } else {
767 : /*
768 : * if p is null in this loop, make sure it stays null
769 : * in the next loop.
770 : */
771 : nextpg = NULL;
772 :
773 : /*
774 : * lock page queues here just so they're always locked
775 : * at the end of the loop.
776 : */
777 0 : uvm_lock_pageq();
778 : }
779 : }
780 0 : return (retval);
781 0 : }
782 :
783 : /*
784 : * uvmpd_scan: scan the page queues and attempt to meet our targets.
785 : *
786 : * => called with pageq's locked
787 : */
788 :
789 : void
790 0 : uvmpd_scan(void)
791 : {
792 : int free, inactive_shortage, swap_shortage, pages_freed;
793 : struct vm_page *p, *nextpg;
794 : struct uvm_object *uobj;
795 : boolean_t got_it;
796 :
797 0 : uvmexp.pdrevs++; /* counter */
798 : uobj = NULL;
799 :
800 : /*
801 : * get current "free" page count
802 : */
803 0 : free = uvmexp.free - BUFPAGES_DEFICIT;
804 :
805 : #ifndef __SWAP_BROKEN
806 : /*
807 : * swap out some processes if we are below our free target.
808 : * we need to unlock the page queues for this.
809 : */
810 0 : if (free < uvmexp.freetarg) {
811 0 : uvmexp.pdswout++;
812 0 : uvm_unlock_pageq();
813 0 : uvm_swapout_threads();
814 0 : uvm_lock_pageq();
815 0 : }
816 : #endif
817 :
818 : /*
819 : * now we want to work on meeting our targets. first we work on our
820 : * free target by converting inactive pages into free pages. then
821 : * we work on meeting our inactive target by converting active pages
822 : * to inactive ones.
823 : */
824 :
825 : /*
826 : * alternate starting queue between swap and object based on the
827 : * low bit of uvmexp.pdrevs (which we bump by one each call).
828 : */
829 : got_it = FALSE;
830 0 : pages_freed = uvmexp.pdfreed; /* XXX - int */
831 0 : if ((uvmexp.pdrevs & 1) != 0 && uvmexp.nswapdev != 0)
832 0 : got_it = uvmpd_scan_inactive(&uvm.page_inactive_swp);
833 0 : if (!got_it)
834 0 : got_it = uvmpd_scan_inactive(&uvm.page_inactive_obj);
835 0 : if (!got_it && (uvmexp.pdrevs & 1) == 0 && uvmexp.nswapdev != 0)
836 0 : (void) uvmpd_scan_inactive(&uvm.page_inactive_swp);
837 0 : pages_freed = uvmexp.pdfreed - pages_freed;
838 :
839 : /*
840 : * we have done the scan to get free pages. now we work on meeting
841 : * our inactive target.
842 : */
843 0 : inactive_shortage = uvmexp.inactarg - uvmexp.inactive - BUFPAGES_INACT;
844 :
845 : /*
846 : * detect if we're not going to be able to page anything out
847 : * until we free some swap resources from active pages.
848 : */
849 : swap_shortage = 0;
850 0 : if (uvmexp.free < uvmexp.freetarg &&
851 0 : uvmexp.swpginuse == uvmexp.swpages &&
852 0 : uvmexp.swpgonly < uvmexp.swpages &&
853 0 : pages_freed == 0) {
854 0 : swap_shortage = uvmexp.freetarg - uvmexp.free;
855 0 : }
856 :
857 0 : for (p = TAILQ_FIRST(&uvm.page_active);
858 0 : p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
859 : p = nextpg) {
860 0 : nextpg = TAILQ_NEXT(p, pageq);
861 :
862 : /* skip this page if it's busy. */
863 0 : if (p->pg_flags & PG_BUSY)
864 : continue;
865 :
866 0 : if (p->pg_flags & PQ_ANON)
867 0 : KASSERT(p->uanon != NULL);
868 : else
869 0 : KASSERT(p->uobject != NULL);
870 :
871 : /*
872 : * if there's a shortage of swap, free any swap allocated
873 : * to this page so that other pages can be paged out.
874 : */
875 0 : if (swap_shortage > 0) {
876 0 : if ((p->pg_flags & PQ_ANON) && p->uanon->an_swslot) {
877 0 : uvm_swap_free(p->uanon->an_swslot, 1);
878 0 : p->uanon->an_swslot = 0;
879 0 : atomic_clearbits_int(&p->pg_flags, PG_CLEAN);
880 0 : swap_shortage--;
881 0 : }
882 0 : if (p->pg_flags & PQ_AOBJ) {
883 0 : int slot = uao_set_swslot(p->uobject,
884 0 : p->offset >> PAGE_SHIFT, 0);
885 0 : if (slot) {
886 0 : uvm_swap_free(slot, 1);
887 0 : atomic_clearbits_int(&p->pg_flags,
888 : PG_CLEAN);
889 0 : swap_shortage--;
890 0 : }
891 0 : }
892 : }
893 :
894 : /*
895 : * deactivate this page if there's a shortage of
896 : * inactive pages.
897 : */
898 0 : if (inactive_shortage > 0) {
899 0 : pmap_page_protect(p, PROT_NONE);
900 : /* no need to check wire_count as pg is "active" */
901 0 : uvm_pagedeactivate(p);
902 0 : uvmexp.pddeact++;
903 0 : inactive_shortage--;
904 0 : }
905 : }
906 0 : }
907 :
908 : #ifdef HIBERNATE
909 :
910 : /*
911 : * uvmpd_drop: drop clean pages from list
912 : */
913 : void
914 0 : uvmpd_drop(struct pglist *pglst)
915 : {
916 : struct vm_page *p, *nextpg;
917 :
918 0 : for (p = TAILQ_FIRST(pglst); p != NULL; p = nextpg) {
919 0 : nextpg = TAILQ_NEXT(p, pageq);
920 :
921 0 : if (p->pg_flags & PQ_ANON || p->uobject == NULL)
922 : continue;
923 :
924 0 : if (p->pg_flags & PG_BUSY)
925 : continue;
926 :
927 0 : if (p->pg_flags & PG_CLEAN) {
928 : /*
929 : * we now have the page queues locked.
930 : * the page is not busy. if the page is clean we
931 : * can free it now and continue.
932 : */
933 0 : if (p->pg_flags & PG_CLEAN) {
934 0 : if (p->pg_flags & PQ_SWAPBACKED) {
935 : /* this page now lives only in swap */
936 0 : uvmexp.swpgonly++;
937 0 : }
938 :
939 : /* zap all mappings with pmap_page_protect... */
940 0 : pmap_page_protect(p, PROT_NONE);
941 0 : uvm_pagefree(p);
942 0 : }
943 : }
944 : }
945 0 : }
946 :
947 : void
948 0 : uvmpd_hibernate(void)
949 : {
950 0 : uvm_lock_pageq();
951 :
952 0 : uvmpd_drop(&uvm.page_inactive_swp);
953 0 : uvmpd_drop(&uvm.page_inactive_obj);
954 0 : uvmpd_drop(&uvm.page_active);
955 :
956 0 : uvm_unlock_pageq();
957 0 : }
958 :
959 : #endif
|