Line data Source code
1 : /* $OpenBSD: uvm_fault.c,v 1.93 2018/04/12 17:13:44 deraadt Exp $ */
2 : /* $NetBSD: uvm_fault.c,v 1.51 2000/08/06 00:22:53 thorpej Exp $ */
3 :
4 : /*
5 : * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 : * All rights reserved.
7 : *
8 : * Redistribution and use in source and binary forms, with or without
9 : * modification, are permitted provided that the following conditions
10 : * are met:
11 : * 1. Redistributions of source code must retain the above copyright
12 : * notice, this list of conditions and the following disclaimer.
13 : * 2. Redistributions in binary form must reproduce the above copyright
14 : * notice, this list of conditions and the following disclaimer in the
15 : * documentation and/or other materials provided with the distribution.
16 : *
17 : * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 : * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 : * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 : * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 : * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 : * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 : * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 : * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 : * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 : * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 : *
28 : * from: Id: uvm_fault.c,v 1.1.2.23 1998/02/06 05:29:05 chs Exp
29 : */
30 :
31 : /*
32 : * uvm_fault.c: fault handler
33 : */
34 :
35 : #include <sys/param.h>
36 : #include <sys/systm.h>
37 : #include <sys/kernel.h>
38 : #include <sys/proc.h>
39 : #include <sys/malloc.h>
40 : #include <sys/mman.h>
41 :
42 : #include <uvm/uvm.h>
43 :
44 : /*
45 : *
46 : * a word on page faults:
47 : *
48 : * types of page faults we handle:
49 : *
50 : * CASE 1: upper layer faults CASE 2: lower layer faults
51 : *
52 : * CASE 1A CASE 1B CASE 2A CASE 2B
53 : * read/write1 write>1 read/write +-cow_write/zero
54 : * | | | |
55 : * +--|--+ +--|--+ +-----+ + | + | +-----+
56 : * amap | V | | ----------->new| | | | ^ |
57 : * +-----+ +-----+ +-----+ + | + | +--|--+
58 : * | | |
59 : * +-----+ +-----+ +--|--+ | +--|--+
60 : * uobj | d/c | | d/c | | V | +----| |
61 : * +-----+ +-----+ +-----+ +-----+
62 : *
63 : * d/c = don't care
64 : *
65 : * case [0]: layerless fault
66 : * no amap or uobj is present. this is an error.
67 : *
68 : * case [1]: upper layer fault [anon active]
69 : * 1A: [read] or [write with anon->an_ref == 1]
70 : * I/O takes place in top level anon and uobj is not touched.
71 : * 1B: [write with anon->an_ref > 1]
72 : * new anon is alloc'd and data is copied off ["COW"]
73 : *
74 : * case [2]: lower layer fault [uobj]
75 : * 2A: [read on non-NULL uobj] or [write to non-copy_on_write area]
76 : * I/O takes place directly in object.
77 : * 2B: [write to copy_on_write] or [read on NULL uobj]
78 : * data is "promoted" from uobj to a new anon.
79 : * if uobj is null, then we zero fill.
80 : *
81 : * we follow the standard UVM locking protocol ordering:
82 : *
83 : * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ)
84 : * we hold a PG_BUSY page if we unlock for I/O
85 : *
86 : *
87 : * the code is structured as follows:
88 : *
89 : * - init the "IN" params in the ufi structure
90 : * ReFault:
91 : * - do lookups [locks maps], check protection, handle needs_copy
92 : * - check for case 0 fault (error)
93 : * - establish "range" of fault
94 : * - if we have an amap lock it and extract the anons
95 : * - if sequential advice deactivate pages behind us
96 : * - at the same time check pmap for unmapped areas and anon for pages
97 : * that we could map in (and do map it if found)
98 : * - check object for resident pages that we could map in
99 : * - if (case 2) goto Case2
100 : * - >>> handle case 1
101 : * - ensure source anon is resident in RAM
102 : * - if case 1B alloc new anon and copy from source
103 : * - map the correct page in
104 : * Case2:
105 : * - >>> handle case 2
106 : * - ensure source page is resident (if uobj)
107 : * - if case 2B alloc new anon and copy from source (could be zero
108 : * fill if uobj == NULL)
109 : * - map the correct page in
110 : * - done!
111 : *
112 : * note on paging:
113 : * if we have to do I/O we place a PG_BUSY page in the correct object,
114 : * unlock everything, and do the I/O. when I/O is done we must reverify
115 : * the state of the world before assuming that our data structures are
116 : * valid. [because mappings could change while the map is unlocked]
117 : *
118 : * alternative 1: unbusy the page in question and restart the page fault
119 : * from the top (ReFault). this is easy but does not take advantage
120 : * of the information that we already have from our previous lookup,
121 : * although it is possible that the "hints" in the vm_map will help here.
122 : *
123 : * alternative 2: the system already keeps track of a "version" number of
124 : * a map. [i.e. every time you write-lock a map (e.g. to change a
125 : * mapping) you bump the version number up by one...] so, we can save
126 : * the version number of the map before we release the lock and start I/O.
127 : * then when I/O is done we can relock and check the version numbers
128 : * to see if anything changed. this might save us some over 1 because
129 : * we don't have to unbusy the page and may be less compares(?).
130 : *
131 : * alternative 3: put in backpointers or a way to "hold" part of a map
132 : * in place while I/O is in progress. this could be complex to
133 : * implement (especially with structures like amap that can be referenced
134 : * by multiple map entries, and figuring out what should wait could be
135 : * complex as well...).
136 : *
137 : * given that we are not currently multiprocessor or multithreaded we might
138 : * as well choose alternative 2 now. maybe alternative 3 would be useful
139 : * in the future. XXX keep in mind for future consideration//rechecking.
140 : */
141 :
142 : /*
143 : * local data structures
144 : */
145 : struct uvm_advice {
146 : int nback;
147 : int nforw;
148 : };
149 :
150 : /*
151 : * page range array: set up in uvmfault_init().
152 : */
153 : static struct uvm_advice uvmadvice[MADV_MASK + 1];
154 :
155 : #define UVM_MAXRANGE 16 /* must be max() of nback+nforw+1 */
156 :
157 : /*
158 : * private prototypes
159 : */
160 : static void uvmfault_amapcopy(struct uvm_faultinfo *);
161 : static __inline void uvmfault_anonflush(struct vm_anon **, int);
162 : void uvmfault_unlockmaps(struct uvm_faultinfo *, boolean_t);
163 : void uvmfault_update_stats(struct uvm_faultinfo *);
164 :
165 : /*
166 : * inline functions
167 : */
168 : /*
169 : * uvmfault_anonflush: try and deactivate pages in specified anons
170 : *
171 : * => does not have to deactivate page if it is busy
172 : */
173 : static __inline void
174 0 : uvmfault_anonflush(struct vm_anon **anons, int n)
175 : {
176 : int lcv;
177 : struct vm_page *pg;
178 :
179 0 : for (lcv = 0 ; lcv < n ; lcv++) {
180 0 : if (anons[lcv] == NULL)
181 : continue;
182 0 : pg = anons[lcv]->an_page;
183 0 : if (pg && (pg->pg_flags & PG_BUSY) == 0) {
184 0 : uvm_lock_pageq();
185 0 : if (pg->wire_count == 0) {
186 0 : pmap_page_protect(pg, PROT_NONE);
187 0 : uvm_pagedeactivate(pg);
188 0 : }
189 0 : uvm_unlock_pageq();
190 0 : }
191 : }
192 0 : }
193 :
194 : /*
195 : * normal functions
196 : */
197 : /*
198 : * uvmfault_init: compute proper values for the uvmadvice[] array.
199 : */
200 : void
201 0 : uvmfault_init(void)
202 : {
203 : int npages;
204 :
205 : npages = atop(16384);
206 0 : if (npages > 0) {
207 0 : KASSERT(npages <= UVM_MAXRANGE / 2);
208 0 : uvmadvice[MADV_NORMAL].nforw = npages;
209 0 : uvmadvice[MADV_NORMAL].nback = npages - 1;
210 0 : }
211 :
212 : npages = atop(32768);
213 0 : if (npages > 0) {
214 0 : KASSERT(npages <= UVM_MAXRANGE / 2);
215 0 : uvmadvice[MADV_SEQUENTIAL].nforw = npages - 1;
216 0 : uvmadvice[MADV_SEQUENTIAL].nback = npages;
217 0 : }
218 0 : }
219 :
220 : /*
221 : * uvmfault_amapcopy: clear "needs_copy" in a map.
222 : *
223 : * => if we are out of RAM we sleep (waiting for more)
224 : */
225 : static void
226 0 : uvmfault_amapcopy(struct uvm_faultinfo *ufi)
227 : {
228 :
229 : /* while we haven't done the job */
230 0 : while (1) {
231 : /* no mapping? give up. */
232 0 : if (uvmfault_lookup(ufi, TRUE) == FALSE)
233 : return;
234 :
235 : /* copy if needed. */
236 0 : if (UVM_ET_ISNEEDSCOPY(ufi->entry))
237 0 : amap_copy(ufi->map, ufi->entry, M_NOWAIT,
238 0 : UVM_ET_ISSTACK(ufi->entry) ? FALSE : TRUE,
239 0 : ufi->orig_rvaddr, ufi->orig_rvaddr + 1);
240 :
241 : /* didn't work? must be out of RAM. sleep. */
242 0 : if (UVM_ET_ISNEEDSCOPY(ufi->entry)) {
243 : uvmfault_unlockmaps(ufi, TRUE);
244 0 : uvm_wait("fltamapcopy");
245 0 : continue;
246 : }
247 :
248 : /* got it! */
249 : uvmfault_unlockmaps(ufi, TRUE);
250 : return;
251 : }
252 : /*NOTREACHED*/
253 0 : }
254 :
255 : /*
256 : * uvmfault_anonget: get data in an anon into a non-busy, non-released
257 : * page in that anon.
258 : *
259 : * => we don't move the page on the queues [gets moved later]
260 : * => if we allocate a new page [we_own], it gets put on the queues.
261 : * either way, the result is that the page is on the queues at return time
262 : */
263 : int
264 0 : uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap,
265 : struct vm_anon *anon)
266 : {
267 : boolean_t we_own; /* we own anon's page? */
268 : boolean_t locked; /* did we relock? */
269 : struct vm_page *pg;
270 : int result;
271 :
272 : result = 0; /* XXX shut up gcc */
273 60 : uvmexp.fltanget++;
274 : /* bump rusage counters */
275 0 : if (anon->an_page)
276 0 : curproc->p_ru.ru_minflt++;
277 : else
278 0 : curproc->p_ru.ru_majflt++;
279 :
280 : /* loop until we get it, or fail. */
281 0 : while (1) {
282 : we_own = FALSE; /* TRUE if we set PG_BUSY on a page */
283 0 : pg = anon->an_page;
284 :
285 : /* page there? make sure it is not busy/released. */
286 0 : if (pg) {
287 0 : KASSERT(pg->pg_flags & PQ_ANON);
288 0 : KASSERT(pg->uanon == anon);
289 :
290 : /*
291 : * if the page is busy, we drop all the locks and
292 : * try again.
293 : */
294 60 : if ((pg->pg_flags & (PG_BUSY|PG_RELEASED)) == 0)
295 0 : return (VM_PAGER_OK);
296 0 : atomic_setbits_int(&pg->pg_flags, PG_WANTED);
297 0 : uvmexp.fltpgwait++;
298 :
299 : /*
300 : * the last unlock must be an atomic unlock+wait on
301 : * the owner of page
302 : */
303 0 : uvmfault_unlockall(ufi, amap, NULL, NULL);
304 0 : UVM_WAIT(pg, 0, "anonget2", 0);
305 : /* ready to relock and try again */
306 0 : } else {
307 : /* no page, we must try and bring it in. */
308 0 : pg = uvm_pagealloc(NULL, 0, anon, 0);
309 :
310 0 : if (pg == NULL) { /* out of RAM. */
311 0 : uvmfault_unlockall(ufi, amap, NULL, anon);
312 0 : uvmexp.fltnoram++;
313 0 : uvm_wait("flt_noram1");
314 : /* ready to relock and try again */
315 0 : } else {
316 : /* we set the PG_BUSY bit */
317 : we_own = TRUE;
318 0 : uvmfault_unlockall(ufi, amap, NULL, anon);
319 :
320 : /*
321 : * we are passing a PG_BUSY+PG_FAKE+PG_CLEAN
322 : * page into the uvm_swap_get function with
323 : * all data structures unlocked. note that
324 : * it is ok to read an_swslot here because
325 : * we hold PG_BUSY on the page.
326 : */
327 0 : uvmexp.pageins++;
328 0 : result = uvm_swap_get(pg, anon->an_swslot,
329 : PGO_SYNCIO);
330 :
331 : /*
332 : * we clean up after the i/o below in the
333 : * "we_own" case
334 : */
335 : /* ready to relock and try again */
336 : }
337 : }
338 :
339 : /* now relock and try again */
340 0 : locked = uvmfault_relock(ufi);
341 :
342 : /*
343 : * if we own the page (i.e. we set PG_BUSY), then we need
344 : * to clean up after the I/O. there are three cases to
345 : * consider:
346 : * [1] page released during I/O: free anon and ReFault.
347 : * [2] I/O not OK. free the page and cause the fault
348 : * to fail.
349 : * [3] I/O OK! activate the page and sync with the
350 : * non-we_own case (i.e. drop anon lock if not locked).
351 : */
352 0 : if (we_own) {
353 0 : if (pg->pg_flags & PG_WANTED) {
354 0 : wakeup(pg);
355 0 : }
356 : /* un-busy! */
357 0 : atomic_clearbits_int(&pg->pg_flags,
358 : PG_WANTED|PG_BUSY|PG_FAKE);
359 : UVM_PAGE_OWN(pg, NULL);
360 :
361 : /*
362 : * if we were RELEASED during I/O, then our anon is
363 : * no longer part of an amap. we need to free the
364 : * anon and try again.
365 : */
366 0 : if (pg->pg_flags & PG_RELEASED) {
367 0 : pmap_page_protect(pg, PROT_NONE);
368 0 : uvm_anfree(anon); /* frees page for us */
369 0 : if (locked)
370 0 : uvmfault_unlockall(ufi, amap, NULL,
371 : NULL);
372 0 : uvmexp.fltpgrele++;
373 0 : return (VM_PAGER_REFAULT); /* refault! */
374 : }
375 :
376 0 : if (result != VM_PAGER_OK) {
377 0 : KASSERT(result != VM_PAGER_PEND);
378 :
379 : /* remove page from anon */
380 0 : anon->an_page = NULL;
381 :
382 : /*
383 : * remove the swap slot from the anon
384 : * and mark the anon as having no real slot.
385 : * don't free the swap slot, thus preventing
386 : * it from being used again.
387 : */
388 0 : uvm_swap_markbad(anon->an_swslot, 1);
389 0 : anon->an_swslot = SWSLOT_BAD;
390 :
391 : /*
392 : * note: page was never !PG_BUSY, so it
393 : * can't be mapped and thus no need to
394 : * pmap_page_protect it...
395 : */
396 0 : uvm_lock_pageq();
397 0 : uvm_pagefree(pg);
398 0 : uvm_unlock_pageq();
399 :
400 0 : if (locked)
401 0 : uvmfault_unlockall(ufi, amap, NULL,
402 : anon);
403 0 : return (VM_PAGER_ERROR);
404 : }
405 :
406 : /*
407 : * must be OK, clear modify (already PG_CLEAN)
408 : * and activate
409 : */
410 0 : pmap_clear_modify(pg);
411 0 : uvm_lock_pageq();
412 0 : uvm_pageactivate(pg);
413 0 : uvm_unlock_pageq();
414 0 : }
415 :
416 : /* we were not able to relock. restart fault. */
417 0 : if (!locked)
418 0 : return (VM_PAGER_REFAULT);
419 :
420 : /* verify no one touched the amap and moved the anon on us. */
421 0 : if (ufi != NULL &&
422 0 : amap_lookup(&ufi->entry->aref,
423 0 : ufi->orig_rvaddr - ufi->entry->start) != anon) {
424 :
425 0 : uvmfault_unlockall(ufi, amap, NULL, anon);
426 0 : return (VM_PAGER_REFAULT);
427 : }
428 :
429 : /* try it again! */
430 0 : uvmexp.fltanretry++;
431 0 : continue;
432 :
433 : } /* while (1) */
434 : /*NOTREACHED*/
435 0 : }
436 :
437 : /*
438 : * Update statistics after fault resolution.
439 : * - maxrss
440 : */
441 : void
442 0 : uvmfault_update_stats(struct uvm_faultinfo *ufi)
443 : {
444 : struct vm_map *map;
445 : struct proc *p;
446 : vsize_t res;
447 :
448 0 : map = ufi->orig_map;
449 :
450 : /*
451 : * If this is a nested pmap (eg, a virtual machine pmap managed
452 : * by vmm(4) on amd64/i386), don't do any updating, just return.
453 : *
454 : * pmap_nested() on other archs is #defined to 0, so this is a
455 : * no-op.
456 : */
457 0 : if (pmap_nested(map->pmap))
458 0 : return;
459 :
460 : /* Update the maxrss for the process. */
461 60 : if (map->flags & VM_MAP_ISVMSPACE) {
462 0 : p = curproc;
463 0 : KASSERT(p != NULL && &p->p_vmspace->vm_map == map);
464 :
465 0 : res = pmap_resident_count(map->pmap);
466 : /* Convert res from pages to kilobytes. */
467 0 : res <<= (PAGE_SHIFT - 10);
468 :
469 0 : if (p->p_ru.ru_maxrss < res)
470 0 : p->p_ru.ru_maxrss = res;
471 : }
472 0 : }
473 :
474 : /*
475 : * F A U L T - m a i n e n t r y p o i n t
476 : */
477 :
478 : /*
479 : * uvm_fault: page fault handler
480 : *
481 : * => called from MD code to resolve a page fault
482 : * => VM data structures usually should be unlocked. however, it is
483 : * possible to call here with the main map locked if the caller
484 : * gets a write lock, sets it recursive, and then calls us (c.f.
485 : * uvm_map_pageable). this should be avoided because it keeps
486 : * the map locked off during I/O.
487 : */
488 : #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \
489 : ~PROT_WRITE : PROT_MASK)
490 : int
491 0 : uvm_fault(vm_map_t orig_map, vaddr_t vaddr, vm_fault_t fault_type,
492 : vm_prot_t access_type)
493 : {
494 0 : struct uvm_faultinfo ufi;
495 : vm_prot_t enter_prot;
496 : boolean_t wired, narrow, promote, locked, shadowed;
497 0 : int npages, nback, nforw, centeridx, result, lcv, gotpages, ret;
498 : vaddr_t startva, currva;
499 : voff_t uoff;
500 0 : paddr_t pa;
501 : struct vm_amap *amap;
502 : struct uvm_object *uobj;
503 0 : struct vm_anon *anons_store[UVM_MAXRANGE], **anons, *anon, *oanon;
504 0 : struct vm_page *pages[UVM_MAXRANGE], *pg, *uobjpage;
505 :
506 : anon = NULL;
507 : pg = NULL;
508 :
509 76 : uvmexp.faults++; /* XXX: locking? */
510 :
511 : /* init the IN parameters in the ufi */
512 0 : ufi.orig_map = orig_map;
513 0 : ufi.orig_rvaddr = trunc_page(vaddr);
514 0 : ufi.orig_size = PAGE_SIZE; /* can't get any smaller than this */
515 0 : if (fault_type == VM_FAULT_WIRE)
516 0 : narrow = TRUE; /* don't look for neighborhood
517 : * pages on wire */
518 : else
519 : narrow = FALSE; /* normal fault */
520 :
521 : /* "goto ReFault" means restart the page fault from ground zero. */
522 : ReFault:
523 : /* lookup and lock the maps */
524 0 : if (uvmfault_lookup(&ufi, FALSE) == FALSE) {
525 0 : return (EFAULT);
526 : }
527 :
528 : #ifdef DIAGNOSTIC
529 0 : if ((ufi.map->flags & VM_MAP_PAGEABLE) == 0)
530 0 : panic("uvm_fault: fault on non-pageable map (%p, 0x%lx)",
531 : ufi.map, vaddr);
532 : #endif
533 :
534 : /* check protection */
535 0 : if ((ufi.entry->protection & access_type) != access_type) {
536 0 : uvmfault_unlockmaps(&ufi, FALSE);
537 0 : return (EACCES);
538 : }
539 :
540 : /*
541 : * "enter_prot" is the protection we want to enter the page in at.
542 : * for certain pages (e.g. copy-on-write pages) this protection can
543 : * be more strict than ufi.entry->protection. "wired" means either
544 : * the entry is wired or we are fault-wiring the pg.
545 : */
546 :
547 : enter_prot = ufi.entry->protection;
548 0 : wired = VM_MAPENT_ISWIRED(ufi.entry) || (fault_type == VM_FAULT_WIRE);
549 0 : if (wired)
550 0 : access_type = enter_prot; /* full access for wired */
551 :
552 : /* handle "needs_copy" case. */
553 0 : if (UVM_ET_ISNEEDSCOPY(ufi.entry)) {
554 0 : if ((access_type & PROT_WRITE) ||
555 0 : (ufi.entry->object.uvm_obj == NULL)) {
556 : /* need to clear */
557 0 : uvmfault_unlockmaps(&ufi, FALSE);
558 0 : uvmfault_amapcopy(&ufi);
559 0 : uvmexp.fltamcopy++;
560 0 : goto ReFault;
561 : } else {
562 : /*
563 : * ensure that we pmap_enter page R/O since
564 : * needs_copy is still true
565 : */
566 2 : enter_prot &= ~PROT_WRITE;
567 : }
568 0 : }
569 :
570 : /* identify the players */
571 0 : amap = ufi.entry->aref.ar_amap; /* top layer */
572 0 : uobj = ufi.entry->object.uvm_obj; /* bottom layer */
573 :
574 : /*
575 : * check for a case 0 fault. if nothing backing the entry then
576 : * error now.
577 : */
578 0 : if (amap == NULL && uobj == NULL) {
579 0 : uvmfault_unlockmaps(&ufi, FALSE);
580 0 : return (EFAULT);
581 : }
582 :
583 : /*
584 : * establish range of interest based on advice from mapper
585 : * and then clip to fit map entry. note that we only want
586 : * to do this the first time through the fault. if we
587 : * ReFault we will disable this by setting "narrow" to true.
588 : */
589 60 : if (narrow == FALSE) {
590 :
591 : /* wide fault (!narrow) */
592 62 : nback = min(uvmadvice[ufi.entry->advice].nback,
593 0 : (ufi.orig_rvaddr - ufi.entry->start) >> PAGE_SHIFT);
594 0 : startva = ufi.orig_rvaddr - ((vsize_t)nback << PAGE_SHIFT);
595 0 : nforw = min(uvmadvice[ufi.entry->advice].nforw,
596 0 : ((ufi.entry->end - ufi.orig_rvaddr) >>
597 0 : PAGE_SHIFT) - 1);
598 : /*
599 : * note: "-1" because we don't want to count the
600 : * faulting page as forw
601 : */
602 0 : npages = nback + nforw + 1;
603 : centeridx = nback;
604 :
605 : narrow = TRUE; /* ensure only once per-fault */
606 0 : } else {
607 : /* narrow fault! */
608 : nback = nforw = 0;
609 0 : startva = ufi.orig_rvaddr;
610 : npages = 1;
611 : centeridx = 0;
612 : }
613 :
614 : /* if we've got an amap, extract current anons. */
615 2 : if (amap) {
616 0 : anons = anons_store;
617 60 : amap_lookups(&ufi.entry->aref, startva - ufi.entry->start,
618 : anons, npages);
619 0 : } else {
620 : anons = NULL; /* to be safe */
621 : }
622 :
623 : /*
624 : * for MADV_SEQUENTIAL mappings we want to deactivate the back pages
625 : * now and then forget about them (for the rest of the fault).
626 : */
627 62 : if (ufi.entry->advice == MADV_SEQUENTIAL && nback != 0) {
628 : /* flush back-page anons? */
629 0 : if (amap)
630 0 : uvmfault_anonflush(anons, nback);
631 :
632 : /* flush object? */
633 0 : if (uobj) {
634 0 : uoff = (startva - ufi.entry->start) + ufi.entry->offset;
635 0 : (void) uobj->pgops->pgo_flush(uobj, uoff, uoff +
636 0 : ((vsize_t)nback << PAGE_SHIFT), PGO_DEACTIVATE);
637 0 : }
638 :
639 : /* now forget about the backpages */
640 0 : if (amap)
641 0 : anons += nback;
642 0 : startva += ((vsize_t)nback << PAGE_SHIFT);
643 0 : npages -= nback;
644 : centeridx = 0;
645 0 : }
646 :
647 : /*
648 : * map in the backpages and frontpages we found in the amap in hopes
649 : * of preventing future faults. we also init the pages[] array as
650 : * we go.
651 : */
652 : currva = startva;
653 : shadowed = FALSE;
654 60 : for (lcv = 0 ; lcv < npages ; lcv++, currva += PAGE_SIZE) {
655 : /*
656 : * dont play with VAs that are already mapped
657 : * except for center)
658 : */
659 3 : if (lcv != centeridx &&
660 0 : pmap_extract(ufi.orig_map->pmap, currva, &pa)) {
661 2 : pages[lcv] = PGO_DONTCARE;
662 0 : continue;
663 : }
664 :
665 : /* unmapped or center page. check if any anon at this level. */
666 61 : if (amap == NULL || anons[lcv] == NULL) {
667 1 : pages[lcv] = NULL;
668 0 : continue;
669 : }
670 :
671 : /* check for present page and map if possible. re-activate it. */
672 0 : pages[lcv] = PGO_DONTCARE;
673 60 : if (lcv == centeridx) { /* save center for later! */
674 : shadowed = TRUE;
675 0 : continue;
676 : }
677 0 : anon = anons[lcv];
678 0 : if (anon->an_page &&
679 0 : (anon->an_page->pg_flags & (PG_RELEASED|PG_BUSY)) == 0) {
680 0 : uvm_lock_pageq();
681 0 : uvm_pageactivate(anon->an_page); /* reactivate */
682 0 : uvm_unlock_pageq();
683 0 : uvmexp.fltnamap++;
684 :
685 : /*
686 : * Since this isn't the page that's actually faulting,
687 : * ignore pmap_enter() failures; it's not critical
688 : * that we enter these right now.
689 : */
690 0 : (void) pmap_enter(ufi.orig_map->pmap, currva,
691 0 : VM_PAGE_TO_PHYS(anon->an_page),
692 0 : (anon->an_ref > 1) ? (enter_prot & ~PROT_WRITE) :
693 : enter_prot,
694 0 : PMAP_CANFAIL |
695 0 : (VM_MAPENT_ISWIRED(ufi.entry) ? PMAP_WIRED : 0));
696 0 : }
697 : }
698 : if (npages > 1)
699 : pmap_update(ufi.orig_map->pmap);
700 :
701 : /* (shadowed == TRUE) if there is an anon at the faulting address */
702 : /*
703 : * note that if we are really short of RAM we could sleep in the above
704 : * call to pmap_enter. bad?
705 : *
706 : * XXX Actually, that is bad; pmap_enter() should just fail in that
707 : * XXX case. --thorpej
708 : */
709 : /*
710 : * if the desired page is not shadowed by the amap and we have a
711 : * backing object, then we check to see if the backing object would
712 : * prefer to handle the fault itself (rather than letting us do it
713 : * with the usual pgo_get hook). the backing object signals this by
714 : * providing a pgo_fault routine.
715 : */
716 0 : if (uobj && shadowed == FALSE && uobj->pgops->pgo_fault != NULL) {
717 0 : result = uobj->pgops->pgo_fault(&ufi, startva, pages, npages,
718 : centeridx, fault_type, access_type,
719 : PGO_LOCKED);
720 :
721 0 : if (result == VM_PAGER_OK)
722 0 : return (0); /* pgo_fault did pmap enter */
723 0 : else if (result == VM_PAGER_REFAULT)
724 0 : goto ReFault; /* try again! */
725 : else
726 0 : return (EACCES);
727 : }
728 :
729 : /*
730 : * now, if the desired page is not shadowed by the amap and we have
731 : * a backing object that does not have a special fault routine, then
732 : * we ask (with pgo_get) the object for resident pages that we care
733 : * about and attempt to map them in. we do not let pgo_get block
734 : * (PGO_LOCKED).
735 : *
736 : * ("get" has the option of doing a pmap_enter for us)
737 : */
738 0 : if (uobj && shadowed == FALSE) {
739 0 : uvmexp.fltlget++;
740 0 : gotpages = npages;
741 0 : (void) uobj->pgops->pgo_get(uobj, ufi.entry->offset +
742 0 : (startva - ufi.entry->start),
743 0 : pages, &gotpages, centeridx,
744 0 : access_type & MASK(ufi.entry),
745 0 : ufi.entry->advice, PGO_LOCKED);
746 :
747 : /* check for pages to map, if we got any */
748 0 : uobjpage = NULL;
749 0 : if (gotpages) {
750 : currva = startva;
751 0 : for (lcv = 0 ; lcv < npages ;
752 0 : lcv++, currva += PAGE_SIZE) {
753 0 : if (pages[lcv] == NULL ||
754 0 : pages[lcv] == PGO_DONTCARE)
755 : continue;
756 :
757 0 : KASSERT((pages[lcv]->pg_flags & PG_RELEASED) == 0);
758 :
759 : /*
760 : * if center page is resident and not
761 : * PG_BUSY, then pgo_get made it PG_BUSY
762 : * for us and gave us a handle to it.
763 : * remember this page as "uobjpage."
764 : * (for later use).
765 : */
766 0 : if (lcv == centeridx) {
767 0 : uobjpage = pages[lcv];
768 0 : continue;
769 : }
770 :
771 : /*
772 : * note: calling pgo_get with locked data
773 : * structures returns us pages which are
774 : * neither busy nor released, so we don't
775 : * need to check for this. we can just
776 : * directly enter the page (after moving it
777 : * to the head of the active queue [useful?]).
778 : */
779 :
780 0 : uvm_lock_pageq();
781 0 : uvm_pageactivate(pages[lcv]); /* reactivate */
782 0 : uvm_unlock_pageq();
783 0 : uvmexp.fltnomap++;
784 :
785 : /*
786 : * Since this page isn't the page that's
787 : * actually faulting, ignore pmap_enter()
788 : * failures; it's not critical that we
789 : * enter these right now.
790 : */
791 0 : (void) pmap_enter(ufi.orig_map->pmap, currva,
792 0 : VM_PAGE_TO_PHYS(pages[lcv]),
793 0 : enter_prot & MASK(ufi.entry),
794 0 : PMAP_CANFAIL |
795 0 : (wired ? PMAP_WIRED : 0));
796 :
797 : /*
798 : * NOTE: page can't be PG_WANTED because
799 : * we've held the lock the whole time
800 : * we've had the handle.
801 : */
802 0 : atomic_clearbits_int(&pages[lcv]->pg_flags,
803 : PG_BUSY);
804 : UVM_PAGE_OWN(pages[lcv], NULL);
805 0 : } /* for "lcv" loop */
806 : pmap_update(ufi.orig_map->pmap);
807 : } /* "gotpages" != 0 */
808 : /* note: object still _locked_ */
809 : } else {
810 60 : uobjpage = NULL;
811 : }
812 :
813 : /*
814 : * note that at this point we are done with any front or back pages.
815 : * we are now going to focus on the center page (i.e. the one we've
816 : * faulted on). if we have faulted on the top (anon) layer
817 : * [i.e. case 1], then the anon we want is anons[centeridx] (we have
818 : * not touched it yet). if we have faulted on the bottom (uobj)
819 : * layer [i.e. case 2] and the page was both present and available,
820 : * then we've got a pointer to it as "uobjpage" and we've already
821 : * made it BUSY.
822 : */
823 : /*
824 : * there are four possible cases we must address: 1A, 1B, 2A, and 2B
825 : */
826 : /* redirect case 2: if we are not shadowed, go to case 2. */
827 0 : if (shadowed == FALSE)
828 : goto Case2;
829 :
830 : /* handle case 1: fault on an anon in our amap */
831 0 : anon = anons[centeridx];
832 :
833 : /*
834 : * no matter if we have case 1A or case 1B we are going to need to
835 : * have the anon's memory resident. ensure that now.
836 : */
837 : /*
838 : * let uvmfault_anonget do the dirty work.
839 : * also, if it is OK, then the anon's page is on the queues.
840 : */
841 0 : result = uvmfault_anonget(&ufi, amap, anon);
842 0 : switch (result) {
843 : case VM_PAGER_OK:
844 : break;
845 :
846 : case VM_PAGER_REFAULT:
847 0 : goto ReFault;
848 :
849 : case VM_PAGER_ERROR:
850 : /*
851 : * An error occured while trying to bring in the
852 : * page -- this is the only error we return right
853 : * now.
854 : */
855 0 : return (EACCES); /* XXX */
856 : default:
857 : #ifdef DIAGNOSTIC
858 0 : panic("uvm_fault: uvmfault_anonget -> %d", result);
859 : #else
860 : return (EACCES);
861 : #endif
862 : }
863 :
864 : /*
865 : * if we are case 1B then we will need to allocate a new blank
866 : * anon to transfer the data into. note that we have a lock
867 : * on anon, so no one can busy or release the page until we are done.
868 : * also note that the ref count can't drop to zero here because
869 : * it is > 1 and we are only dropping one ref.
870 : *
871 : * in the (hopefully very rare) case that we are out of RAM we
872 : * will wait for more RAM, and refault.
873 : *
874 : * if we are out of anon VM we wait for RAM to become available.
875 : */
876 :
877 0 : if ((access_type & PROT_WRITE) != 0 && anon->an_ref > 1) {
878 0 : uvmexp.flt_acow++;
879 : oanon = anon; /* oanon = old */
880 0 : anon = uvm_analloc();
881 0 : if (anon) {
882 0 : pg = uvm_pagealloc(NULL, 0, anon, 0);
883 0 : }
884 :
885 : /* check for out of RAM */
886 0 : if (anon == NULL || pg == NULL) {
887 0 : uvmfault_unlockall(&ufi, amap, NULL, oanon);
888 0 : KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
889 0 : if (anon == NULL)
890 0 : uvmexp.fltnoanon++;
891 : else {
892 0 : uvm_anfree(anon);
893 0 : uvmexp.fltnoram++;
894 : }
895 :
896 0 : if (uvmexp.swpgonly == uvmexp.swpages)
897 0 : return (ENOMEM);
898 :
899 : /* out of RAM, wait for more */
900 0 : if (anon == NULL)
901 0 : uvm_anwait();
902 : else
903 0 : uvm_wait("flt_noram3");
904 0 : goto ReFault;
905 : }
906 :
907 : /* got all resources, replace anon with nanon */
908 0 : uvm_pagecopy(oanon->an_page, pg); /* pg now !PG_CLEAN */
909 : /* un-busy! new page */
910 0 : atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_FAKE);
911 : UVM_PAGE_OWN(pg, NULL);
912 0 : ret = amap_add(&ufi.entry->aref,
913 0 : ufi.orig_rvaddr - ufi.entry->start, anon, 1);
914 0 : KASSERT(ret == 0);
915 :
916 : /* deref: can not drop to zero here by defn! */
917 0 : oanon->an_ref--;
918 :
919 : /*
920 : * note: anon is _not_ locked, but we have the sole references
921 : * to in from amap.
922 : * thus, no one can get at it until we are done with it.
923 : */
924 0 : } else {
925 60 : uvmexp.flt_anon++;
926 : oanon = anon;
927 0 : pg = anon->an_page;
928 0 : if (anon->an_ref > 1) /* disallow writes to ref > 1 anons */
929 0 : enter_prot = enter_prot & ~PROT_WRITE;
930 : }
931 :
932 : /*
933 : * now map the page in ...
934 : * XXX: old fault unlocks object before pmap_enter. this seems
935 : * suspect since some other thread could blast the page out from
936 : * under us between the unlock and the pmap_enter.
937 : */
938 0 : if (pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg),
939 0 : enter_prot, access_type | PMAP_CANFAIL | (wired ? PMAP_WIRED : 0))
940 0 : != 0) {
941 : /*
942 : * No need to undo what we did; we can simply think of
943 : * this as the pmap throwing away the mapping information.
944 : *
945 : * We do, however, have to go through the ReFault path,
946 : * as the map may change while we're asleep.
947 : */
948 0 : uvmfault_unlockall(&ufi, amap, NULL, oanon);
949 0 : KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
950 0 : if (uvmexp.swpgonly == uvmexp.swpages) {
951 : /* XXX instrumentation */
952 0 : return (ENOMEM);
953 : }
954 : /* XXX instrumentation */
955 0 : uvm_wait("flt_pmfail1");
956 0 : goto ReFault;
957 : }
958 :
959 : /* ... update the page queues. */
960 0 : uvm_lock_pageq();
961 :
962 0 : if (fault_type == VM_FAULT_WIRE) {
963 0 : uvm_pagewire(pg);
964 : /*
965 : * since the now-wired page cannot be paged out,
966 : * release its swap resources for others to use.
967 : * since an anon with no swap cannot be PG_CLEAN,
968 : * clear its clean flag now.
969 : */
970 0 : atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
971 0 : uvm_anon_dropswap(anon);
972 0 : } else {
973 : /* activate it */
974 60 : uvm_pageactivate(pg);
975 : }
976 :
977 0 : uvm_unlock_pageq();
978 :
979 : /* done case 1! finish up by unlocking everything and returning success */
980 0 : uvmfault_unlockall(&ufi, amap, NULL, oanon);
981 : pmap_update(ufi.orig_map->pmap);
982 0 : return (0);
983 :
984 :
985 : Case2:
986 : /* handle case 2: faulting on backing object or zero fill */
987 : /*
988 : * note that uobjpage can not be PGO_DONTCARE at this point. we now
989 : * set uobjpage to PGO_DONTCARE if we are doing a zero fill. if we
990 : * have a backing object, check and see if we are going to promote
991 : * the data up to an anon during the fault.
992 : */
993 0 : if (uobj == NULL) {
994 0 : uobjpage = PGO_DONTCARE;
995 : promote = TRUE; /* always need anon here */
996 0 : } else {
997 0 : KASSERT(uobjpage != PGO_DONTCARE);
998 0 : promote = (access_type & PROT_WRITE) &&
999 0 : UVM_ET_ISCOPYONWRITE(ufi.entry);
1000 : }
1001 :
1002 : /*
1003 : * if uobjpage is not null then we do not need to do I/O to get the
1004 : * uobjpage.
1005 : *
1006 : * if uobjpage is null, then we need to ask the pager to
1007 : * get the data for us. once we have the data, we need to reverify
1008 : * the state the world. we are currently not holding any resources.
1009 : */
1010 0 : if (uobjpage) {
1011 : /* update rusage counters */
1012 0 : curproc->p_ru.ru_minflt++;
1013 0 : } else {
1014 : /* update rusage counters */
1015 0 : curproc->p_ru.ru_majflt++;
1016 :
1017 0 : uvmfault_unlockall(&ufi, amap, NULL, NULL);
1018 :
1019 0 : uvmexp.fltget++;
1020 0 : gotpages = 1;
1021 0 : uoff = (ufi.orig_rvaddr - ufi.entry->start) + ufi.entry->offset;
1022 0 : result = uobj->pgops->pgo_get(uobj, uoff, &uobjpage, &gotpages,
1023 0 : 0, access_type & MASK(ufi.entry), ufi.entry->advice,
1024 : PGO_SYNCIO);
1025 :
1026 : /* recover from I/O */
1027 0 : if (result != VM_PAGER_OK) {
1028 0 : KASSERT(result != VM_PAGER_PEND);
1029 :
1030 0 : if (result == VM_PAGER_AGAIN) {
1031 0 : tsleep(&lbolt, PVM, "fltagain2", 0);
1032 0 : goto ReFault;
1033 : }
1034 :
1035 0 : if (!UVM_ET_ISNOFAULT(ufi.entry))
1036 0 : return (EIO);
1037 :
1038 0 : uobjpage = PGO_DONTCARE;
1039 : promote = TRUE;
1040 0 : }
1041 :
1042 : /* re-verify the state of the world. */
1043 0 : locked = uvmfault_relock(&ufi);
1044 :
1045 : /*
1046 : * Re-verify that amap slot is still free. if there is
1047 : * a problem, we clean up.
1048 : */
1049 0 : if (locked && amap && amap_lookup(&ufi.entry->aref,
1050 0 : ufi.orig_rvaddr - ufi.entry->start)) {
1051 0 : if (locked)
1052 0 : uvmfault_unlockall(&ufi, amap, NULL, NULL);
1053 : locked = FALSE;
1054 0 : }
1055 :
1056 : /* didn't get the lock? release the page and retry. */
1057 0 : if (locked == FALSE && uobjpage != PGO_DONTCARE) {
1058 0 : uvm_lock_pageq();
1059 : /* make sure it is in queues */
1060 0 : uvm_pageactivate(uobjpage);
1061 0 : uvm_unlock_pageq();
1062 :
1063 0 : if (uobjpage->pg_flags & PG_WANTED)
1064 : /* still holding object lock */
1065 0 : wakeup(uobjpage);
1066 0 : atomic_clearbits_int(&uobjpage->pg_flags,
1067 : PG_BUSY|PG_WANTED);
1068 : UVM_PAGE_OWN(uobjpage, NULL);
1069 0 : goto ReFault;
1070 : }
1071 :
1072 : /*
1073 : * we have the data in uobjpage which is PG_BUSY
1074 : */
1075 : }
1076 :
1077 : /*
1078 : * notes:
1079 : * - at this point uobjpage can not be NULL
1080 : * - at this point uobjpage could be PG_WANTED (handle later)
1081 : */
1082 0 : if (promote == FALSE) {
1083 : /*
1084 : * we are not promoting. if the mapping is COW ensure that we
1085 : * don't give more access than we should (e.g. when doing a read
1086 : * fault on a COPYONWRITE mapping we want to map the COW page in
1087 : * R/O even though the entry protection could be R/W).
1088 : *
1089 : * set "pg" to the page we want to map in (uobjpage, usually)
1090 : */
1091 0 : uvmexp.flt_obj++;
1092 0 : if (UVM_ET_ISCOPYONWRITE(ufi.entry))
1093 0 : enter_prot &= ~PROT_WRITE;
1094 0 : pg = uobjpage; /* map in the actual object */
1095 :
1096 : /* assert(uobjpage != PGO_DONTCARE) */
1097 :
1098 : /*
1099 : * we are faulting directly on the page.
1100 : */
1101 0 : } else {
1102 : /*
1103 : * if we are going to promote the data to an anon we
1104 : * allocate a blank anon here and plug it into our amap.
1105 : */
1106 : #ifdef DIAGNOSTIC
1107 0 : if (amap == NULL)
1108 0 : panic("uvm_fault: want to promote data, but no anon");
1109 : #endif
1110 :
1111 0 : anon = uvm_analloc();
1112 0 : if (anon) {
1113 : /*
1114 : * In `Fill in data...' below, if
1115 : * uobjpage == PGO_DONTCARE, we want
1116 : * a zero'd, dirty page, so have
1117 : * uvm_pagealloc() do that for us.
1118 : */
1119 0 : pg = uvm_pagealloc(NULL, 0, anon,
1120 0 : (uobjpage == PGO_DONTCARE) ? UVM_PGA_ZERO : 0);
1121 0 : }
1122 :
1123 : /*
1124 : * out of memory resources?
1125 : */
1126 0 : if (anon == NULL || pg == NULL) {
1127 : /* arg! must unbusy our page and fail or sleep. */
1128 0 : if (uobjpage != PGO_DONTCARE) {
1129 0 : uvm_lock_pageq();
1130 0 : uvm_pageactivate(uobjpage);
1131 0 : uvm_unlock_pageq();
1132 :
1133 0 : if (uobjpage->pg_flags & PG_WANTED)
1134 0 : wakeup(uobjpage);
1135 0 : atomic_clearbits_int(&uobjpage->pg_flags,
1136 : PG_BUSY|PG_WANTED);
1137 : UVM_PAGE_OWN(uobjpage, NULL);
1138 0 : }
1139 :
1140 : /* unlock and fail ... */
1141 0 : uvmfault_unlockall(&ufi, amap, uobj, NULL);
1142 0 : KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
1143 0 : if (anon == NULL)
1144 0 : uvmexp.fltnoanon++;
1145 : else {
1146 0 : uvm_anfree(anon);
1147 0 : uvmexp.fltnoram++;
1148 : }
1149 :
1150 0 : if (uvmexp.swpgonly == uvmexp.swpages)
1151 0 : return (ENOMEM);
1152 :
1153 : /* out of RAM, wait for more */
1154 0 : if (anon == NULL)
1155 0 : uvm_anwait();
1156 : else
1157 0 : uvm_wait("flt_noram5");
1158 0 : goto ReFault;
1159 : }
1160 :
1161 : /* fill in the data */
1162 0 : if (uobjpage != PGO_DONTCARE) {
1163 0 : uvmexp.flt_prcopy++;
1164 : /* copy page [pg now dirty] */
1165 0 : uvm_pagecopy(uobjpage, pg);
1166 :
1167 : /*
1168 : * promote to shared amap? make sure all sharing
1169 : * procs see it
1170 : */
1171 0 : if ((amap_flags(amap) & AMAP_SHARED) != 0) {
1172 0 : pmap_page_protect(uobjpage, PROT_NONE);
1173 0 : }
1174 :
1175 : /* dispose of uobjpage. drop handle to uobj as well. */
1176 0 : if (uobjpage->pg_flags & PG_WANTED)
1177 0 : wakeup(uobjpage);
1178 0 : atomic_clearbits_int(&uobjpage->pg_flags,
1179 : PG_BUSY|PG_WANTED);
1180 : UVM_PAGE_OWN(uobjpage, NULL);
1181 0 : uvm_lock_pageq();
1182 0 : uvm_pageactivate(uobjpage);
1183 0 : uvm_unlock_pageq();
1184 : uobj = NULL;
1185 0 : } else {
1186 0 : uvmexp.flt_przero++;
1187 : /*
1188 : * Page is zero'd and marked dirty by uvm_pagealloc()
1189 : * above.
1190 : */
1191 : }
1192 :
1193 0 : if (amap_add(&ufi.entry->aref,
1194 0 : ufi.orig_rvaddr - ufi.entry->start, anon, 0)) {
1195 0 : uvmfault_unlockall(&ufi, amap, NULL, oanon);
1196 0 : KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
1197 0 : uvm_anfree(anon);
1198 0 : uvmexp.fltnoamap++;
1199 :
1200 0 : if (uvmexp.swpgonly == uvmexp.swpages)
1201 0 : return (ENOMEM);
1202 :
1203 0 : amap_populate(&ufi.entry->aref,
1204 0 : ufi.orig_rvaddr - ufi.entry->start);
1205 0 : goto ReFault;
1206 : }
1207 : }
1208 :
1209 : /* note: pg is either the uobjpage or the new page in the new anon */
1210 : /*
1211 : * all resources are present. we can now map it in and free our
1212 : * resources.
1213 : */
1214 0 : if (pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg),
1215 0 : enter_prot, access_type | PMAP_CANFAIL | (wired ? PMAP_WIRED : 0))
1216 0 : != 0) {
1217 : /*
1218 : * No need to undo what we did; we can simply think of
1219 : * this as the pmap throwing away the mapping information.
1220 : *
1221 : * We do, however, have to go through the ReFault path,
1222 : * as the map may change while we're asleep.
1223 : */
1224 0 : if (pg->pg_flags & PG_WANTED)
1225 0 : wakeup(pg);
1226 :
1227 0 : atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_FAKE|PG_WANTED);
1228 : UVM_PAGE_OWN(pg, NULL);
1229 0 : uvmfault_unlockall(&ufi, amap, uobj, NULL);
1230 0 : KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
1231 0 : if (uvmexp.swpgonly == uvmexp.swpages) {
1232 : /* XXX instrumentation */
1233 0 : return (ENOMEM);
1234 : }
1235 : /* XXX instrumentation */
1236 0 : uvm_wait("flt_pmfail2");
1237 0 : goto ReFault;
1238 : }
1239 :
1240 0 : uvm_lock_pageq();
1241 :
1242 0 : if (fault_type == VM_FAULT_WIRE) {
1243 0 : uvm_pagewire(pg);
1244 0 : if (pg->pg_flags & PQ_AOBJ) {
1245 : /*
1246 : * since the now-wired page cannot be paged out,
1247 : * release its swap resources for others to use.
1248 : * since an aobj page with no swap cannot be PG_CLEAN,
1249 : * clear its clean flag now.
1250 : */
1251 0 : atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
1252 0 : uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
1253 0 : }
1254 : } else {
1255 : /* activate it */
1256 0 : uvm_pageactivate(pg);
1257 : }
1258 0 : uvm_unlock_pageq();
1259 :
1260 0 : if (pg->pg_flags & PG_WANTED)
1261 0 : wakeup(pg);
1262 :
1263 0 : atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_FAKE|PG_WANTED);
1264 : UVM_PAGE_OWN(pg, NULL);
1265 0 : uvmfault_unlockall(&ufi, amap, uobj, NULL);
1266 : pmap_update(ufi.orig_map->pmap);
1267 :
1268 0 : return (0);
1269 0 : }
1270 :
1271 :
1272 : /*
1273 : * uvm_fault_wire: wire down a range of virtual addresses in a map.
1274 : *
1275 : * => map may be read-locked by caller, but MUST NOT be write-locked.
1276 : * => if map is read-locked, any operations which may cause map to
1277 : * be write-locked in uvm_fault() must be taken care of by
1278 : * the caller. See uvm_map_pageable().
1279 : */
1280 : int
1281 0 : uvm_fault_wire(vm_map_t map, vaddr_t start, vaddr_t end, vm_prot_t access_type)
1282 : {
1283 : vaddr_t va;
1284 : int rv;
1285 :
1286 : /*
1287 : * now fault it in a page at a time. if the fault fails then we have
1288 : * to undo what we have done. note that in uvm_fault PROT_NONE
1289 : * is replaced with the max protection if fault_type is VM_FAULT_WIRE.
1290 : */
1291 0 : for (va = start ; va < end ; va += PAGE_SIZE) {
1292 0 : rv = uvm_fault(map, va, VM_FAULT_WIRE, access_type);
1293 0 : if (rv) {
1294 0 : if (va != start) {
1295 0 : uvm_fault_unwire(map, start, va);
1296 0 : }
1297 0 : return (rv);
1298 : }
1299 : }
1300 :
1301 0 : return (0);
1302 0 : }
1303 :
1304 : /*
1305 : * uvm_fault_unwire(): unwire range of virtual space.
1306 : */
1307 : void
1308 0 : uvm_fault_unwire(vm_map_t map, vaddr_t start, vaddr_t end)
1309 : {
1310 :
1311 0 : vm_map_lock_read(map);
1312 0 : uvm_fault_unwire_locked(map, start, end);
1313 0 : vm_map_unlock_read(map);
1314 0 : }
1315 :
1316 : /*
1317 : * uvm_fault_unwire_locked(): the guts of uvm_fault_unwire().
1318 : *
1319 : * => map must be at least read-locked.
1320 : */
1321 : void
1322 0 : uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end)
1323 : {
1324 0 : vm_map_entry_t entry, next;
1325 0 : pmap_t pmap = vm_map_pmap(map);
1326 : vaddr_t va;
1327 0 : paddr_t pa;
1328 : struct vm_page *pg;
1329 :
1330 0 : KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
1331 :
1332 : /*
1333 : * we assume that the area we are unwiring has actually been wired
1334 : * in the first place. this means that we should be able to extract
1335 : * the PAs from the pmap. we also lock out the page daemon so that
1336 : * we can call uvm_pageunwire.
1337 : */
1338 0 : uvm_lock_pageq();
1339 :
1340 : /* find the beginning map entry for the region. */
1341 0 : KASSERT(start >= vm_map_min(map) && end <= vm_map_max(map));
1342 0 : if (uvm_map_lookup_entry(map, start, &entry) == FALSE)
1343 0 : panic("uvm_fault_unwire_locked: address not in map");
1344 :
1345 0 : for (va = start; va < end ; va += PAGE_SIZE) {
1346 0 : if (pmap_extract(pmap, va, &pa) == FALSE)
1347 : continue;
1348 :
1349 : /* find the map entry for the current address. */
1350 0 : KASSERT(va >= entry->start);
1351 0 : while (va >= entry->end) {
1352 0 : next = RBT_NEXT(uvm_map_addr, entry);
1353 0 : KASSERT(next != NULL && next->start <= entry->end);
1354 0 : entry = next;
1355 : }
1356 :
1357 : /* if the entry is no longer wired, tell the pmap. */
1358 0 : if (VM_MAPENT_ISWIRED(entry) == 0)
1359 0 : pmap_unwire(pmap, va);
1360 :
1361 0 : pg = PHYS_TO_VM_PAGE(pa);
1362 0 : if (pg)
1363 0 : uvm_pageunwire(pg);
1364 : }
1365 :
1366 0 : uvm_unlock_pageq();
1367 0 : }
1368 :
1369 : /*
1370 : * uvmfault_unlockmaps: unlock the maps
1371 : */
1372 : void
1373 0 : uvmfault_unlockmaps(struct uvm_faultinfo *ufi, boolean_t write_locked)
1374 : {
1375 : /*
1376 : * ufi can be NULL when this isn't really a fault,
1377 : * but merely paging in anon data.
1378 : */
1379 0 : if (ufi == NULL) {
1380 : return;
1381 : }
1382 :
1383 0 : uvmfault_update_stats(ufi);
1384 0 : if (write_locked) {
1385 0 : vm_map_unlock(ufi->map);
1386 0 : } else {
1387 0 : vm_map_unlock_read(ufi->map);
1388 : }
1389 0 : }
1390 :
1391 : /*
1392 : * uvmfault_unlockall: unlock everything passed in.
1393 : *
1394 : * => maps must be read-locked (not write-locked).
1395 : */
1396 : void
1397 0 : uvmfault_unlockall(struct uvm_faultinfo *ufi, struct vm_amap *amap,
1398 : struct uvm_object *uobj, struct vm_anon *anon)
1399 : {
1400 :
1401 0 : uvmfault_unlockmaps(ufi, FALSE);
1402 0 : }
1403 :
1404 : /*
1405 : * uvmfault_lookup: lookup a virtual address in a map
1406 : *
1407 : * => caller must provide a uvm_faultinfo structure with the IN
1408 : * params properly filled in
1409 : * => we will lookup the map entry (handling submaps) as we go
1410 : * => if the lookup is a success we will return with the maps locked
1411 : * => if "write_lock" is TRUE, we write_lock the map, otherwise we only
1412 : * get a read lock.
1413 : * => note that submaps can only appear in the kernel and they are
1414 : * required to use the same virtual addresses as the map they
1415 : * are referenced by (thus address translation between the main
1416 : * map and the submap is unnecessary).
1417 : */
1418 :
1419 : boolean_t
1420 0 : uvmfault_lookup(struct uvm_faultinfo *ufi, boolean_t write_lock)
1421 : {
1422 : vm_map_t tmpmap;
1423 :
1424 : /* init ufi values for lookup. */
1425 76 : ufi->map = ufi->orig_map;
1426 0 : ufi->size = ufi->orig_size;
1427 :
1428 : /*
1429 : * keep going down levels until we are done. note that there can
1430 : * only be two levels so we won't loop very long.
1431 : */
1432 0 : while (1) {
1433 0 : if (ufi->orig_rvaddr < ufi->map->min_offset ||
1434 0 : ufi->orig_rvaddr >= ufi->map->max_offset)
1435 0 : return(FALSE);
1436 :
1437 : /* lock map */
1438 0 : if (write_lock) {
1439 0 : vm_map_lock(ufi->map);
1440 0 : } else {
1441 75 : vm_map_lock_read(ufi->map);
1442 : }
1443 :
1444 : /* lookup */
1445 0 : if (!uvm_map_lookup_entry(ufi->map, ufi->orig_rvaddr,
1446 0 : &ufi->entry)) {
1447 0 : uvmfault_unlockmaps(ufi, write_lock);
1448 0 : return(FALSE);
1449 : }
1450 :
1451 : /* reduce size if necessary */
1452 0 : if (ufi->entry->end - ufi->orig_rvaddr < ufi->size)
1453 0 : ufi->size = ufi->entry->end - ufi->orig_rvaddr;
1454 :
1455 : /*
1456 : * submap? replace map with the submap and lookup again.
1457 : * note: VAs in submaps must match VAs in main map.
1458 : */
1459 63 : if (UVM_ET_ISSUBMAP(ufi->entry)) {
1460 0 : tmpmap = ufi->entry->object.sub_map;
1461 0 : uvmfault_unlockmaps(ufi, write_lock);
1462 0 : ufi->map = tmpmap;
1463 0 : continue;
1464 : }
1465 :
1466 : /* got it! */
1467 62 : ufi->mapv = ufi->map->timestamp;
1468 0 : return(TRUE);
1469 :
1470 : }
1471 : /*NOTREACHED*/
1472 0 : }
1473 :
1474 : /*
1475 : * uvmfault_relock: attempt to relock the same version of the map
1476 : *
1477 : * => fault data structures should be unlocked before calling.
1478 : * => if a success (TRUE) maps will be locked after call.
1479 : */
1480 : boolean_t
1481 0 : uvmfault_relock(struct uvm_faultinfo *ufi)
1482 : {
1483 : /*
1484 : * ufi can be NULL when this isn't really a fault,
1485 : * but merely paging in anon data.
1486 : */
1487 0 : if (ufi == NULL) {
1488 0 : return TRUE;
1489 : }
1490 :
1491 0 : uvmexp.fltrelck++;
1492 :
1493 : /*
1494 : * relock map. fail if version mismatch (in which case nothing
1495 : * gets locked).
1496 : */
1497 0 : vm_map_lock_read(ufi->map);
1498 0 : if (ufi->mapv != ufi->map->timestamp) {
1499 0 : vm_map_unlock_read(ufi->map);
1500 0 : return(FALSE);
1501 : }
1502 :
1503 0 : uvmexp.fltrelckok++;
1504 0 : return(TRUE); /* got it! */
1505 0 : }
|