Line data Source code
1 : /* $OpenBSD: uvm_aobj.c,v 1.85 2017/01/31 17:08:51 dhill Exp $ */
2 : /* $NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $ */
3 :
4 : /*
5 : * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
6 : * Washington University.
7 : * All rights reserved.
8 : *
9 : * Redistribution and use in source and binary forms, with or without
10 : * modification, are permitted provided that the following conditions
11 : * are met:
12 : * 1. Redistributions of source code must retain the above copyright
13 : * notice, this list of conditions and the following disclaimer.
14 : * 2. Redistributions in binary form must reproduce the above copyright
15 : * notice, this list of conditions and the following disclaimer in the
16 : * documentation and/or other materials provided with the distribution.
17 : *
18 : * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 : * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 : * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 : * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 : * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 : * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 : * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 : * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 : * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 : * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 : *
29 : * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
30 : */
31 : /*
32 : * uvm_aobj.c: anonymous memory uvm_object pager
33 : *
34 : * author: Chuck Silvers <chuq@chuq.com>
35 : * started: Jan-1998
36 : *
37 : * - design mostly from Chuck Cranor
38 : */
39 :
40 : #include <sys/param.h>
41 : #include <sys/systm.h>
42 : #include <sys/malloc.h>
43 : #include <sys/kernel.h>
44 : #include <sys/pool.h>
45 : #include <sys/stdint.h>
46 : #include <sys/atomic.h>
47 :
48 : #include <uvm/uvm.h>
49 :
50 : /*
51 : * an aobj manages anonymous-memory backed uvm_objects. in addition
52 : * to keeping the list of resident pages, it also keeps a list of
53 : * allocated swap blocks. depending on the size of the aobj this list
54 : * of allocated swap blocks is either stored in an array (small objects)
55 : * or in a hash table (large objects).
56 : */
57 :
58 : /*
59 : * local structures
60 : */
61 :
62 : /*
63 : * for hash tables, we break the address space of the aobj into blocks
64 : * of UAO_SWHASH_CLUSTER_SIZE pages. we require the cluster size to
65 : * be a power of two.
66 : */
67 : #define UAO_SWHASH_CLUSTER_SHIFT 4
68 : #define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT)
69 :
70 : /* get the "tag" for this page index */
71 : #define UAO_SWHASH_ELT_TAG(PAGEIDX) \
72 : ((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT)
73 :
74 : /* given an ELT and a page index, find the swap slot */
75 : #define UAO_SWHASH_ELT_PAGESLOT_IDX(PAGEIDX) \
76 : ((PAGEIDX) & (UAO_SWHASH_CLUSTER_SIZE - 1))
77 : #define UAO_SWHASH_ELT_PAGESLOT(ELT, PAGEIDX) \
78 : ((ELT)->slots[(PAGEIDX) & (UAO_SWHASH_CLUSTER_SIZE - 1)])
79 :
80 : /* given an ELT, return its pageidx base */
81 : #define UAO_SWHASH_ELT_PAGEIDX_BASE(ELT) \
82 : ((ELT)->tag << UAO_SWHASH_CLUSTER_SHIFT)
83 :
84 : /*
85 : * the swhash hash function
86 : */
87 : #define UAO_SWHASH_HASH(AOBJ, PAGEIDX) \
88 : (&(AOBJ)->u_swhash[(((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT) \
89 : & (AOBJ)->u_swhashmask)])
90 :
91 : /*
92 : * the swhash threshold determines if we will use an array or a
93 : * hash table to store the list of allocated swap blocks.
94 : */
95 :
96 : #define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4)
97 :
98 : /*
99 : * the number of buckets in a swhash, with an upper bound
100 : */
101 : #define UAO_SWHASH_MAXBUCKETS 256
102 : #define UAO_SWHASH_BUCKETS(pages) \
103 : (min((pages) >> UAO_SWHASH_CLUSTER_SHIFT, UAO_SWHASH_MAXBUCKETS))
104 :
105 :
106 : /*
107 : * uao_swhash_elt: when a hash table is being used, this structure defines
108 : * the format of an entry in the bucket list.
109 : */
110 : struct uao_swhash_elt {
111 : LIST_ENTRY(uao_swhash_elt) list; /* the hash list */
112 : voff_t tag; /* our 'tag' */
113 : int count; /* our number of active slots */
114 : int slots[UAO_SWHASH_CLUSTER_SIZE]; /* the slots */
115 : };
116 :
117 : /*
118 : * uao_swhash: the swap hash table structure
119 : */
120 : LIST_HEAD(uao_swhash, uao_swhash_elt);
121 :
122 : /*
123 : * uao_swhash_elt_pool: pool of uao_swhash_elt structures
124 : */
125 : struct pool uao_swhash_elt_pool;
126 :
127 : /*
128 : * uvm_aobj: the actual anon-backed uvm_object
129 : *
130 : * => the uvm_object is at the top of the structure, this allows
131 : * (struct uvm_aobj *) == (struct uvm_object *)
132 : * => only one of u_swslots and u_swhash is used in any given aobj
133 : */
134 : struct uvm_aobj {
135 : struct uvm_object u_obj; /* has: pgops, memt, #pages, #refs */
136 : int u_pages; /* number of pages in entire object */
137 : int u_flags; /* the flags (see uvm_aobj.h) */
138 : /*
139 : * Either an array or hashtable (array of bucket heads) of
140 : * offset -> swapslot mappings for the aobj.
141 : */
142 : #define u_swslots u_swap.slot_array
143 : #define u_swhash u_swap.slot_hash
144 : union swslots {
145 : int *slot_array;
146 : struct uao_swhash *slot_hash;
147 : } u_swap;
148 : u_long u_swhashmask; /* mask for hashtable */
149 : LIST_ENTRY(uvm_aobj) u_list; /* global list of aobjs */
150 : };
151 :
152 : /*
153 : * uvm_aobj_pool: pool of uvm_aobj structures
154 : */
155 : struct pool uvm_aobj_pool;
156 :
157 : /*
158 : * local functions
159 : */
160 : static struct uao_swhash_elt *uao_find_swhash_elt(struct uvm_aobj *, int,
161 : boolean_t);
162 : static int uao_find_swslot(struct uvm_aobj *, int);
163 : static boolean_t uao_flush(struct uvm_object *, voff_t,
164 : voff_t, int);
165 : static void uao_free(struct uvm_aobj *);
166 : static int uao_get(struct uvm_object *, voff_t,
167 : vm_page_t *, int *, int, vm_prot_t,
168 : int, int);
169 : static boolean_t uao_pagein(struct uvm_aobj *, int, int);
170 : static boolean_t uao_pagein_page(struct uvm_aobj *, int);
171 :
172 : void uao_dropswap_range(struct uvm_object *, voff_t, voff_t);
173 : void uao_shrink_flush(struct uvm_object *, int, int);
174 : int uao_shrink_hash(struct uvm_object *, int);
175 : int uao_shrink_array(struct uvm_object *, int);
176 : int uao_shrink_convert(struct uvm_object *, int);
177 :
178 : int uao_grow_hash(struct uvm_object *, int);
179 : int uao_grow_array(struct uvm_object *, int);
180 : int uao_grow_convert(struct uvm_object *, int);
181 :
182 : /*
183 : * aobj_pager
184 : *
185 : * note that some functions (e.g. put) are handled elsewhere
186 : */
187 : struct uvm_pagerops aobj_pager = {
188 : NULL, /* init */
189 : uao_reference, /* reference */
190 : uao_detach, /* detach */
191 : NULL, /* fault */
192 : uao_flush, /* flush */
193 : uao_get, /* get */
194 : };
195 :
196 : /*
197 : * uao_list: global list of active aobjs, locked by uao_list_lock
198 : *
199 : * Lock ordering: generally the locking order is object lock, then list lock.
200 : * in the case of swap off we have to iterate over the list, and thus the
201 : * ordering is reversed. In that case we must use trylocking to prevent
202 : * deadlock.
203 : */
204 : static LIST_HEAD(aobjlist, uvm_aobj) uao_list = LIST_HEAD_INITIALIZER(uao_list);
205 : static struct mutex uao_list_lock = MUTEX_INITIALIZER(IPL_NONE);
206 :
207 :
208 : /*
209 : * functions
210 : */
211 : /*
212 : * hash table/array related functions
213 : */
214 : /*
215 : * uao_find_swhash_elt: find (or create) a hash table entry for a page
216 : * offset.
217 : */
218 : static struct uao_swhash_elt *
219 0 : uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, boolean_t create)
220 : {
221 : struct uao_swhash *swhash;
222 : struct uao_swhash_elt *elt;
223 : voff_t page_tag;
224 :
225 0 : swhash = UAO_SWHASH_HASH(aobj, pageidx); /* first hash to get bucket */
226 : page_tag = UAO_SWHASH_ELT_TAG(pageidx); /* tag to search for */
227 :
228 : /* now search the bucket for the requested tag */
229 0 : LIST_FOREACH(elt, swhash, list) {
230 0 : if (elt->tag == page_tag)
231 0 : return(elt);
232 : }
233 :
234 : /* fail now if we are not allowed to create a new entry in the bucket */
235 0 : if (!create)
236 0 : return NULL;
237 :
238 : /* allocate a new entry for the bucket and init/insert it in */
239 0 : elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT | PR_ZERO);
240 : /*
241 : * XXX We cannot sleep here as the hash table might disappear
242 : * from under our feet. And we run the risk of deadlocking
243 : * the pagedeamon. In fact this code will only be called by
244 : * the pagedaemon and allocation will only fail if we
245 : * exhausted the pagedeamon reserve. In that case we're
246 : * doomed anyway, so panic.
247 : */
248 0 : if (elt == NULL)
249 0 : panic("%s: can't allocate entry", __func__);
250 0 : LIST_INSERT_HEAD(swhash, elt, list);
251 0 : elt->tag = page_tag;
252 :
253 0 : return(elt);
254 0 : }
255 :
256 : /*
257 : * uao_find_swslot: find the swap slot number for an aobj/pageidx
258 : */
259 : __inline static int
260 0 : uao_find_swslot(struct uvm_aobj *aobj, int pageidx)
261 : {
262 :
263 : /* if noswap flag is set, then we never return a slot */
264 0 : if (aobj->u_flags & UAO_FLAG_NOSWAP)
265 0 : return(0);
266 :
267 : /* if hashing, look in hash table. */
268 0 : if (aobj->u_pages > UAO_SWHASH_THRESHOLD) {
269 : struct uao_swhash_elt *elt =
270 0 : uao_find_swhash_elt(aobj, pageidx, FALSE);
271 :
272 0 : if (elt)
273 0 : return(UAO_SWHASH_ELT_PAGESLOT(elt, pageidx));
274 : else
275 0 : return(0);
276 : }
277 :
278 : /* otherwise, look in the array */
279 0 : return(aobj->u_swslots[pageidx]);
280 0 : }
281 :
282 : /*
283 : * uao_set_swslot: set the swap slot for a page in an aobj.
284 : *
285 : * => setting a slot to zero frees the slot
286 : */
287 : int
288 0 : uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
289 : {
290 0 : struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
291 : int oldslot;
292 :
293 : /* if noswap flag is set, then we can't set a slot */
294 0 : if (aobj->u_flags & UAO_FLAG_NOSWAP) {
295 0 : if (slot == 0)
296 0 : return(0); /* a clear is ok */
297 :
298 : /* but a set is not */
299 0 : printf("uao_set_swslot: uobj = %p\n", uobj);
300 0 : panic("uao_set_swslot: attempt to set a slot"
301 : " on a NOSWAP object");
302 : }
303 :
304 : /* are we using a hash table? if so, add it in the hash. */
305 0 : if (aobj->u_pages > UAO_SWHASH_THRESHOLD) {
306 : /*
307 : * Avoid allocating an entry just to free it again if
308 : * the page had not swap slot in the first place, and
309 : * we are freeing.
310 : */
311 : struct uao_swhash_elt *elt =
312 0 : uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE);
313 0 : if (elt == NULL) {
314 0 : KASSERT(slot == 0);
315 0 : return (0);
316 : }
317 :
318 0 : oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
319 0 : UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
320 :
321 : /*
322 : * now adjust the elt's reference counter and free it if we've
323 : * dropped it to zero.
324 : */
325 : /* an allocation? */
326 0 : if (slot) {
327 0 : if (oldslot == 0)
328 0 : elt->count++;
329 : } else { /* freeing slot ... */
330 0 : if (oldslot) /* to be safe */
331 0 : elt->count--;
332 :
333 0 : if (elt->count == 0) {
334 0 : LIST_REMOVE(elt, list);
335 0 : pool_put(&uao_swhash_elt_pool, elt);
336 0 : }
337 : }
338 0 : } else {
339 : /* we are using an array */
340 0 : oldslot = aobj->u_swslots[pageidx];
341 0 : aobj->u_swslots[pageidx] = slot;
342 : }
343 0 : return (oldslot);
344 0 : }
345 : /*
346 : * end of hash/array functions
347 : */
348 :
349 : /*
350 : * uao_free: free all resources held by an aobj, and then free the aobj
351 : *
352 : * => the aobj should be dead
353 : */
354 : static void
355 0 : uao_free(struct uvm_aobj *aobj)
356 : {
357 :
358 0 : if (aobj->u_pages > UAO_SWHASH_THRESHOLD) {
359 0 : int i, hashbuckets = aobj->u_swhashmask + 1;
360 :
361 : /*
362 : * free the swslots from each hash bucket,
363 : * then the hash bucket, and finally the hash table itself.
364 : */
365 0 : for (i = 0; i < hashbuckets; i++) {
366 : struct uao_swhash_elt *elt, *next;
367 :
368 0 : for (elt = LIST_FIRST(&aobj->u_swhash[i]);
369 0 : elt != NULL;
370 : elt = next) {
371 : int j;
372 :
373 0 : for (j = 0; j < UAO_SWHASH_CLUSTER_SIZE; j++) {
374 0 : int slot = elt->slots[j];
375 :
376 0 : if (slot == 0) {
377 0 : continue;
378 : }
379 0 : uvm_swap_free(slot, 1);
380 : /*
381 : * this page is no longer
382 : * only in swap.
383 : */
384 0 : uvmexp.swpgonly--;
385 0 : }
386 :
387 0 : next = LIST_NEXT(elt, list);
388 0 : pool_put(&uao_swhash_elt_pool, elt);
389 : }
390 : }
391 :
392 0 : hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
393 0 : } else {
394 : int i;
395 :
396 : /* free the array */
397 0 : for (i = 0; i < aobj->u_pages; i++) {
398 0 : int slot = aobj->u_swslots[i];
399 :
400 0 : if (slot) {
401 0 : uvm_swap_free(slot, 1);
402 : /* this page is no longer only in swap. */
403 0 : uvmexp.swpgonly--;
404 0 : }
405 : }
406 0 : free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
407 : }
408 :
409 : /* finally free the aobj itself */
410 0 : pool_put(&uvm_aobj_pool, aobj);
411 0 : }
412 :
413 : /*
414 : * pager functions
415 : */
416 :
417 : /*
418 : * Shrink an aobj to a given number of pages. The procedure is always the same:
419 : * assess the necessity of data structure conversion (hash to array), secure
420 : * resources, flush pages and drop swap slots.
421 : *
422 : */
423 :
424 : void
425 0 : uao_shrink_flush(struct uvm_object *uobj, int startpg, int endpg)
426 : {
427 0 : KASSERT(startpg < endpg);
428 0 : KASSERT(uobj->uo_refs == 1);
429 0 : uao_flush(uobj, (voff_t)startpg << PAGE_SHIFT,
430 0 : (voff_t)endpg << PAGE_SHIFT, PGO_FREE);
431 0 : uao_dropswap_range(uobj, startpg, endpg);
432 0 : }
433 :
434 : int
435 0 : uao_shrink_hash(struct uvm_object *uobj, int pages)
436 : {
437 0 : struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
438 : struct uao_swhash *new_swhash;
439 : struct uao_swhash_elt *elt;
440 0 : unsigned long new_hashmask;
441 : int i;
442 :
443 0 : KASSERT(aobj->u_pages > UAO_SWHASH_THRESHOLD);
444 :
445 : /*
446 : * If the size of the hash table doesn't change, all we need to do is
447 : * to adjust the page count.
448 : */
449 0 : if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) {
450 0 : uao_shrink_flush(uobj, pages, aobj->u_pages);
451 0 : aobj->u_pages = pages;
452 0 : return 0;
453 : }
454 :
455 0 : new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
456 : M_WAITOK | M_CANFAIL, &new_hashmask);
457 0 : if (new_swhash == NULL)
458 0 : return ENOMEM;
459 :
460 0 : uao_shrink_flush(uobj, pages, aobj->u_pages);
461 :
462 : /*
463 : * Even though the hash table size is changing, the hash of the buckets
464 : * we are interested in copying should not change.
465 : */
466 0 : for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) {
467 0 : while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) {
468 : elt = LIST_FIRST(&aobj->u_swhash[i]);
469 0 : LIST_REMOVE(elt, list);
470 0 : LIST_INSERT_HEAD(&new_swhash[i], elt, list);
471 : }
472 : }
473 :
474 0 : hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
475 :
476 0 : aobj->u_swhash = new_swhash;
477 0 : aobj->u_pages = pages;
478 0 : aobj->u_swhashmask = new_hashmask;
479 :
480 0 : return 0;
481 0 : }
482 :
483 : int
484 0 : uao_shrink_convert(struct uvm_object *uobj, int pages)
485 : {
486 0 : struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
487 : struct uao_swhash_elt *elt;
488 : int i, *new_swslots;
489 :
490 0 : new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
491 : M_WAITOK | M_CANFAIL | M_ZERO);
492 0 : if (new_swslots == NULL)
493 0 : return ENOMEM;
494 :
495 0 : uao_shrink_flush(uobj, pages, aobj->u_pages);
496 :
497 : /* Convert swap slots from hash to array. */
498 0 : for (i = 0; i < pages; i++) {
499 0 : elt = uao_find_swhash_elt(aobj, i, FALSE);
500 0 : if (elt != NULL) {
501 0 : new_swslots[i] = UAO_SWHASH_ELT_PAGESLOT(elt, i);
502 0 : if (new_swslots[i] != 0)
503 0 : elt->count--;
504 0 : if (elt->count == 0) {
505 0 : LIST_REMOVE(elt, list);
506 0 : pool_put(&uao_swhash_elt_pool, elt);
507 0 : }
508 : }
509 : }
510 :
511 0 : hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
512 :
513 0 : aobj->u_swslots = new_swslots;
514 0 : aobj->u_pages = pages;
515 :
516 0 : return 0;
517 0 : }
518 :
519 : int
520 0 : uao_shrink_array(struct uvm_object *uobj, int pages)
521 : {
522 0 : struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
523 : int i, *new_swslots;
524 :
525 0 : new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
526 : M_WAITOK | M_CANFAIL | M_ZERO);
527 0 : if (new_swslots == NULL)
528 0 : return ENOMEM;
529 :
530 0 : uao_shrink_flush(uobj, pages, aobj->u_pages);
531 :
532 0 : for (i = 0; i < pages; i++)
533 0 : new_swslots[i] = aobj->u_swslots[i];
534 :
535 0 : free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
536 :
537 0 : aobj->u_swslots = new_swslots;
538 0 : aobj->u_pages = pages;
539 :
540 0 : return 0;
541 0 : }
542 :
543 : int
544 0 : uao_shrink(struct uvm_object *uobj, int pages)
545 : {
546 0 : struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
547 :
548 0 : KASSERT(pages < aobj->u_pages);
549 :
550 : /*
551 : * Distinguish between three possible cases:
552 : * 1. aobj uses hash and must be converted to array.
553 : * 2. aobj uses array and array size needs to be adjusted.
554 : * 3. aobj uses hash and hash size needs to be adjusted.
555 : */
556 0 : if (pages > UAO_SWHASH_THRESHOLD)
557 0 : return uao_shrink_hash(uobj, pages); /* case 3 */
558 0 : else if (aobj->u_pages > UAO_SWHASH_THRESHOLD)
559 0 : return uao_shrink_convert(uobj, pages); /* case 1 */
560 : else
561 0 : return uao_shrink_array(uobj, pages); /* case 2 */
562 0 : }
563 :
564 : /*
565 : * Grow an aobj to a given number of pages. Right now we only adjust the swap
566 : * slots. We could additionally handle page allocation directly, so that they
567 : * don't happen through uvm_fault(). That would allow us to use another
568 : * mechanism for the swap slots other than malloc(). It is thus mandatory that
569 : * the caller of these functions does not allow faults to happen in case of
570 : * growth error.
571 : */
572 : int
573 0 : uao_grow_array(struct uvm_object *uobj, int pages)
574 : {
575 0 : struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
576 : int i, *new_swslots;
577 :
578 0 : KASSERT(aobj->u_pages <= UAO_SWHASH_THRESHOLD);
579 :
580 0 : new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
581 : M_WAITOK | M_CANFAIL | M_ZERO);
582 0 : if (new_swslots == NULL)
583 0 : return ENOMEM;
584 :
585 0 : for (i = 0; i < aobj->u_pages; i++)
586 0 : new_swslots[i] = aobj->u_swslots[i];
587 :
588 0 : free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
589 :
590 0 : aobj->u_swslots = new_swslots;
591 0 : aobj->u_pages = pages;
592 :
593 0 : return 0;
594 0 : }
595 :
596 : int
597 0 : uao_grow_hash(struct uvm_object *uobj, int pages)
598 : {
599 0 : struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
600 : struct uao_swhash *new_swhash;
601 : struct uao_swhash_elt *elt;
602 0 : unsigned long new_hashmask;
603 : int i;
604 :
605 0 : KASSERT(pages > UAO_SWHASH_THRESHOLD);
606 :
607 : /*
608 : * If the size of the hash table doesn't change, all we need to do is
609 : * to adjust the page count.
610 : */
611 0 : if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) {
612 0 : aobj->u_pages = pages;
613 0 : return 0;
614 : }
615 :
616 0 : KASSERT(UAO_SWHASH_BUCKETS(aobj->u_pages) < UAO_SWHASH_BUCKETS(pages));
617 :
618 0 : new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
619 : M_WAITOK | M_CANFAIL, &new_hashmask);
620 0 : if (new_swhash == NULL)
621 0 : return ENOMEM;
622 :
623 0 : for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) {
624 0 : while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) {
625 : elt = LIST_FIRST(&aobj->u_swhash[i]);
626 0 : LIST_REMOVE(elt, list);
627 0 : LIST_INSERT_HEAD(&new_swhash[i], elt, list);
628 : }
629 : }
630 :
631 0 : hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
632 :
633 0 : aobj->u_swhash = new_swhash;
634 0 : aobj->u_pages = pages;
635 0 : aobj->u_swhashmask = new_hashmask;
636 :
637 0 : return 0;
638 0 : }
639 :
640 : int
641 0 : uao_grow_convert(struct uvm_object *uobj, int pages)
642 : {
643 0 : struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
644 : struct uao_swhash *new_swhash;
645 : struct uao_swhash_elt *elt;
646 0 : unsigned long new_hashmask;
647 : int i, *old_swslots;
648 :
649 0 : new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
650 : M_WAITOK | M_CANFAIL, &new_hashmask);
651 0 : if (new_swhash == NULL)
652 0 : return ENOMEM;
653 :
654 : /* Set these now, so we can use uao_find_swhash_elt(). */
655 0 : old_swslots = aobj->u_swslots;
656 0 : aobj->u_swhash = new_swhash;
657 0 : aobj->u_swhashmask = new_hashmask;
658 :
659 0 : for (i = 0; i < aobj->u_pages; i++) {
660 0 : if (old_swslots[i] != 0) {
661 0 : elt = uao_find_swhash_elt(aobj, i, TRUE);
662 0 : elt->count++;
663 0 : UAO_SWHASH_ELT_PAGESLOT(elt, i) = old_swslots[i];
664 0 : }
665 : }
666 :
667 0 : free(old_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
668 0 : aobj->u_pages = pages;
669 :
670 0 : return 0;
671 0 : }
672 :
673 : int
674 0 : uao_grow(struct uvm_object *uobj, int pages)
675 : {
676 0 : struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
677 :
678 0 : KASSERT(pages > aobj->u_pages);
679 :
680 : /*
681 : * Distinguish between three possible cases:
682 : * 1. aobj uses hash and hash size needs to be adjusted.
683 : * 2. aobj uses array and array size needs to be adjusted.
684 : * 3. aobj uses array and must be converted to hash.
685 : */
686 0 : if (pages <= UAO_SWHASH_THRESHOLD)
687 0 : return uao_grow_array(uobj, pages); /* case 2 */
688 0 : else if (aobj->u_pages > UAO_SWHASH_THRESHOLD)
689 0 : return uao_grow_hash(uobj, pages); /* case 1 */
690 : else
691 0 : return uao_grow_convert(uobj, pages);
692 0 : }
693 :
694 : /*
695 : * uao_create: create an aobj of the given size and return its uvm_object.
696 : *
697 : * => for normal use, flags are zero or UAO_FLAG_CANFAIL.
698 : * => for the kernel object, the flags are:
699 : * UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
700 : * UAO_FLAG_KERNSWAP - enable swapping of kernel object (" ")
701 : */
702 : struct uvm_object *
703 0 : uao_create(vsize_t size, int flags)
704 : {
705 : static struct uvm_aobj kernel_object_store; /* home of kernel_object */
706 : static int kobj_alloced = 0; /* not allocated yet */
707 0 : int pages = round_page(size) >> PAGE_SHIFT;
708 : int refs = UVM_OBJ_KERN;
709 : int mflags;
710 : struct uvm_aobj *aobj;
711 :
712 : /* malloc a new aobj unless we are asked for the kernel object */
713 0 : if (flags & UAO_FLAG_KERNOBJ) { /* want kernel object? */
714 0 : if (kobj_alloced)
715 0 : panic("uao_create: kernel object already allocated");
716 :
717 : aobj = &kernel_object_store;
718 0 : aobj->u_pages = pages;
719 0 : aobj->u_flags = UAO_FLAG_NOSWAP; /* no swap to start */
720 : /* we are special, we never die */
721 0 : kobj_alloced = UAO_FLAG_KERNOBJ;
722 0 : } else if (flags & UAO_FLAG_KERNSWAP) {
723 : aobj = &kernel_object_store;
724 0 : if (kobj_alloced != UAO_FLAG_KERNOBJ)
725 0 : panic("uao_create: asked to enable swap on kernel object");
726 0 : kobj_alloced = UAO_FLAG_KERNSWAP;
727 0 : } else { /* normal object */
728 0 : aobj = pool_get(&uvm_aobj_pool, PR_WAITOK);
729 0 : aobj->u_pages = pages;
730 0 : aobj->u_flags = 0; /* normal object */
731 : refs = 1; /* normal object so 1 ref */
732 : }
733 :
734 : /* allocate hash/array if necessary */
735 0 : if (flags == 0 || (flags & (UAO_FLAG_KERNSWAP | UAO_FLAG_CANFAIL))) {
736 0 : if (flags)
737 0 : mflags = M_NOWAIT;
738 : else
739 : mflags = M_WAITOK;
740 :
741 : /* allocate hash table or array depending on object size */
742 0 : if (aobj->u_pages > UAO_SWHASH_THRESHOLD) {
743 0 : aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(pages),
744 0 : M_UVMAOBJ, mflags, &aobj->u_swhashmask);
745 0 : if (aobj->u_swhash == NULL) {
746 0 : if (flags & UAO_FLAG_CANFAIL) {
747 0 : pool_put(&uvm_aobj_pool, aobj);
748 0 : return (NULL);
749 : }
750 0 : panic("uao_create: hashinit swhash failed");
751 : }
752 : } else {
753 0 : aobj->u_swslots = mallocarray(pages, sizeof(int),
754 0 : M_UVMAOBJ, mflags|M_ZERO);
755 0 : if (aobj->u_swslots == NULL) {
756 0 : if (flags & UAO_FLAG_CANFAIL) {
757 0 : pool_put(&uvm_aobj_pool, aobj);
758 0 : return (NULL);
759 : }
760 0 : panic("uao_create: malloc swslots failed");
761 : }
762 : }
763 :
764 0 : if (flags & UAO_FLAG_KERNSWAP) {
765 0 : aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
766 0 : return(&aobj->u_obj);
767 : /* done! */
768 : }
769 : }
770 :
771 0 : uvm_objinit(&aobj->u_obj, &aobj_pager, refs);
772 :
773 : /* now that aobj is ready, add it to the global list */
774 0 : mtx_enter(&uao_list_lock);
775 0 : LIST_INSERT_HEAD(&uao_list, aobj, u_list);
776 0 : mtx_leave(&uao_list_lock);
777 :
778 0 : return(&aobj->u_obj);
779 0 : }
780 :
781 :
782 :
783 : /*
784 : * uao_init: set up aobj pager subsystem
785 : *
786 : * => called at boot time from uvm_pager_init()
787 : */
788 : void
789 0 : uao_init(void)
790 : {
791 : static int uao_initialized;
792 :
793 0 : if (uao_initialized)
794 : return;
795 0 : uao_initialized = TRUE;
796 :
797 : /*
798 : * NOTE: Pages for this pool must not come from a pageable
799 : * kernel map!
800 : */
801 0 : pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt), 0,
802 : IPL_NONE, PR_WAITOK, "uaoeltpl", NULL);
803 0 : pool_init(&uvm_aobj_pool, sizeof(struct uvm_aobj), 0,
804 : IPL_NONE, PR_WAITOK, "aobjpl", NULL);
805 0 : }
806 :
807 : /*
808 : * uao_reference: add a ref to an aobj
809 : */
810 : void
811 0 : uao_reference(struct uvm_object *uobj)
812 : {
813 0 : uao_reference_locked(uobj);
814 0 : }
815 :
816 : /*
817 : * uao_reference_locked: add a ref to an aobj
818 : */
819 : void
820 0 : uao_reference_locked(struct uvm_object *uobj)
821 : {
822 :
823 : /* kernel_object already has plenty of references, leave it alone. */
824 0 : if (UVM_OBJ_IS_KERN_OBJECT(uobj))
825 : return;
826 :
827 0 : uobj->uo_refs++; /* bump! */
828 0 : }
829 :
830 :
831 : /*
832 : * uao_detach: drop a reference to an aobj
833 : */
834 : void
835 0 : uao_detach(struct uvm_object *uobj)
836 : {
837 0 : uao_detach_locked(uobj);
838 0 : }
839 :
840 :
841 : /*
842 : * uao_detach_locked: drop a reference to an aobj
843 : *
844 : * => aobj may freed upon return.
845 : */
846 : void
847 0 : uao_detach_locked(struct uvm_object *uobj)
848 : {
849 0 : struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
850 : struct vm_page *pg;
851 :
852 : /* detaching from kernel_object is a noop. */
853 0 : if (UVM_OBJ_IS_KERN_OBJECT(uobj)) {
854 0 : return;
855 : }
856 :
857 0 : uobj->uo_refs--; /* drop ref! */
858 0 : if (uobj->uo_refs) { /* still more refs? */
859 0 : return;
860 : }
861 :
862 : /* remove the aobj from the global list. */
863 0 : mtx_enter(&uao_list_lock);
864 0 : LIST_REMOVE(aobj, u_list);
865 0 : mtx_leave(&uao_list_lock);
866 :
867 : /*
868 : * Free all pages left in the object. If they're busy, wait
869 : * for them to become available before we kill it.
870 : * Release swap resources then free the page.
871 : */
872 0 : uvm_lock_pageq();
873 0 : while((pg = RBT_ROOT(uvm_objtree, &uobj->memt)) != NULL) {
874 0 : if (pg->pg_flags & PG_BUSY) {
875 0 : atomic_setbits_int(&pg->pg_flags, PG_WANTED);
876 0 : uvm_unlock_pageq();
877 0 : UVM_WAIT(pg, 0, "uao_det", 0);
878 0 : uvm_lock_pageq();
879 0 : continue;
880 : }
881 0 : pmap_page_protect(pg, PROT_NONE);
882 0 : uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
883 0 : uvm_pagefree(pg);
884 : }
885 0 : uvm_unlock_pageq();
886 :
887 : /* finally, free the rest. */
888 0 : uao_free(aobj);
889 0 : }
890 :
891 : /*
892 : * uao_flush: "flush" pages out of a uvm object
893 : *
894 : * => if PGO_CLEANIT is not set, then we will not block.
895 : * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
896 : * for flushing.
897 : * => NOTE: we are allowed to lock the page queues, so the caller
898 : * must not be holding the lock on them [e.g. pagedaemon had
899 : * better not call us with the queues locked]
900 : * => we return TRUE unless we encountered some sort of I/O error
901 : * XXXJRT currently never happens, as we never directly initiate
902 : * XXXJRT I/O
903 : */
904 : boolean_t
905 0 : uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
906 : {
907 0 : struct uvm_aobj *aobj = (struct uvm_aobj *) uobj;
908 : struct vm_page *pp;
909 : voff_t curoff;
910 :
911 0 : if (flags & PGO_ALLPAGES) {
912 : start = 0;
913 0 : stop = (voff_t)aobj->u_pages << PAGE_SHIFT;
914 0 : } else {
915 0 : start = trunc_page(start);
916 0 : stop = round_page(stop);
917 0 : if (stop > ((voff_t)aobj->u_pages << PAGE_SHIFT)) {
918 0 : printf("uao_flush: strange, got an out of range "
919 : "flush (fixed)\n");
920 0 : stop = (voff_t)aobj->u_pages << PAGE_SHIFT;
921 0 : }
922 : }
923 :
924 : /*
925 : * Don't need to do any work here if we're not freeing
926 : * or deactivating pages.
927 : */
928 0 : if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
929 0 : return (TRUE);
930 :
931 : curoff = start;
932 0 : for (;;) {
933 0 : if (curoff < stop) {
934 0 : pp = uvm_pagelookup(uobj, curoff);
935 0 : curoff += PAGE_SIZE;
936 0 : if (pp == NULL)
937 0 : continue;
938 : } else {
939 : break;
940 : }
941 :
942 : /* Make sure page is unbusy, else wait for it. */
943 0 : if (pp->pg_flags & PG_BUSY) {
944 0 : atomic_setbits_int(&pp->pg_flags, PG_WANTED);
945 0 : UVM_WAIT(pp, 0, "uaoflsh", 0);
946 : curoff -= PAGE_SIZE;
947 0 : continue;
948 : }
949 :
950 0 : switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
951 : /*
952 : * XXX In these first 3 cases, we always just
953 : * XXX deactivate the page. We may want to
954 : * XXX handle the different cases more specifically
955 : * XXX in the future.
956 : */
957 : case PGO_CLEANIT|PGO_FREE:
958 : /* FALLTHROUGH */
959 : case PGO_CLEANIT|PGO_DEACTIVATE:
960 : /* FALLTHROUGH */
961 : case PGO_DEACTIVATE:
962 : deactivate_it:
963 : /* skip the page if it's wired */
964 0 : if (pp->wire_count != 0)
965 0 : continue;
966 :
967 0 : uvm_lock_pageq();
968 : /* zap all mappings for the page. */
969 0 : pmap_page_protect(pp, PROT_NONE);
970 :
971 : /* ...and deactivate the page. */
972 0 : uvm_pagedeactivate(pp);
973 0 : uvm_unlock_pageq();
974 :
975 0 : continue;
976 : case PGO_FREE:
977 : /*
978 : * If there are multiple references to
979 : * the object, just deactivate the page.
980 : */
981 0 : if (uobj->uo_refs > 1)
982 : goto deactivate_it;
983 :
984 : /* XXX skip the page if it's wired */
985 0 : if (pp->wire_count != 0)
986 0 : continue;
987 :
988 : /* zap all mappings for the page. */
989 0 : pmap_page_protect(pp, PROT_NONE);
990 :
991 0 : uao_dropswap(uobj, pp->offset >> PAGE_SHIFT);
992 0 : uvm_lock_pageq();
993 0 : uvm_pagefree(pp);
994 0 : uvm_unlock_pageq();
995 :
996 0 : continue;
997 : default:
998 0 : panic("uao_flush: weird flags");
999 : }
1000 : }
1001 :
1002 0 : return (TRUE);
1003 0 : }
1004 :
1005 : /*
1006 : * uao_get: fetch me a page
1007 : *
1008 : * we have three cases:
1009 : * 1: page is resident -> just return the page.
1010 : * 2: page is zero-fill -> allocate a new page and zero it.
1011 : * 3: page is swapped out -> fetch the page from swap.
1012 : *
1013 : * cases 1 and 2 can be handled with PGO_LOCKED, case 3 cannot.
1014 : * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
1015 : * then we will need to return VM_PAGER_UNLOCK.
1016 : *
1017 : * => flags: PGO_ALLPAGES: get all of the pages
1018 : * PGO_LOCKED: fault data structures are locked
1019 : * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
1020 : * => NOTE: caller must check for released pages!!
1021 : */
1022 : static int
1023 0 : uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
1024 : int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
1025 : {
1026 0 : struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1027 : voff_t current_offset;
1028 : vm_page_t ptmp;
1029 : int lcv, gotpages, maxpages, swslot, rv, pageidx;
1030 : boolean_t done;
1031 :
1032 : /* get number of pages */
1033 0 : maxpages = *npagesp;
1034 :
1035 : /* step 1: handled the case where fault data structures are locked. */
1036 0 : if (flags & PGO_LOCKED) {
1037 : /* step 1a: get pages that are already resident. */
1038 :
1039 : done = TRUE; /* be optimistic */
1040 : gotpages = 0; /* # of pages we got so far */
1041 :
1042 0 : for (lcv = 0, current_offset = offset ; lcv < maxpages ;
1043 0 : lcv++, current_offset += PAGE_SIZE) {
1044 : /* do we care about this page? if not, skip it */
1045 0 : if (pps[lcv] == PGO_DONTCARE)
1046 : continue;
1047 :
1048 0 : ptmp = uvm_pagelookup(uobj, current_offset);
1049 :
1050 : /*
1051 : * if page is new, attempt to allocate the page,
1052 : * zero-fill'd.
1053 : */
1054 0 : if (ptmp == NULL && uao_find_swslot(aobj,
1055 0 : current_offset >> PAGE_SHIFT) == 0) {
1056 0 : ptmp = uvm_pagealloc(uobj, current_offset,
1057 : NULL, UVM_PGA_ZERO);
1058 0 : if (ptmp) {
1059 : /* new page */
1060 0 : atomic_clearbits_int(&ptmp->pg_flags,
1061 : PG_BUSY|PG_FAKE);
1062 0 : atomic_setbits_int(&ptmp->pg_flags,
1063 : PQ_AOBJ);
1064 : UVM_PAGE_OWN(ptmp, NULL);
1065 0 : }
1066 : }
1067 :
1068 : /* to be useful must get a non-busy page */
1069 0 : if (ptmp == NULL ||
1070 0 : (ptmp->pg_flags & PG_BUSY) != 0) {
1071 0 : if (lcv == centeridx ||
1072 0 : (flags & PGO_ALLPAGES) != 0)
1073 : /* need to do a wait or I/O! */
1074 0 : done = FALSE;
1075 : continue;
1076 : }
1077 :
1078 : /*
1079 : * useful page: busy it and plug it in our
1080 : * result array
1081 : */
1082 : /* caller must un-busy this page */
1083 0 : atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
1084 : UVM_PAGE_OWN(ptmp, "uao_get1");
1085 0 : pps[lcv] = ptmp;
1086 0 : gotpages++;
1087 :
1088 0 : }
1089 :
1090 : /*
1091 : * step 1b: now we've either done everything needed or we
1092 : * to unlock and do some waiting or I/O.
1093 : */
1094 0 : *npagesp = gotpages;
1095 0 : if (done)
1096 : /* bingo! */
1097 0 : return(VM_PAGER_OK);
1098 : else
1099 : /* EEK! Need to unlock and I/O */
1100 0 : return(VM_PAGER_UNLOCK);
1101 : }
1102 :
1103 : /*
1104 : * step 2: get non-resident or busy pages.
1105 : * data structures are unlocked.
1106 : */
1107 0 : for (lcv = 0, current_offset = offset ; lcv < maxpages ;
1108 0 : lcv++, current_offset += PAGE_SIZE) {
1109 : /*
1110 : * - skip over pages we've already gotten or don't want
1111 : * - skip over pages we don't _have_ to get
1112 : */
1113 0 : if (pps[lcv] != NULL ||
1114 0 : (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
1115 : continue;
1116 :
1117 0 : pageidx = current_offset >> PAGE_SHIFT;
1118 :
1119 : /*
1120 : * we have yet to locate the current page (pps[lcv]). we
1121 : * first look for a page that is already at the current offset.
1122 : * if we find a page, we check to see if it is busy or
1123 : * released. if that is the case, then we sleep on the page
1124 : * until it is no longer busy or released and repeat the lookup.
1125 : * if the page we found is neither busy nor released, then we
1126 : * busy it (so we own it) and plug it into pps[lcv]. this
1127 : * 'break's the following while loop and indicates we are
1128 : * ready to move on to the next page in the "lcv" loop above.
1129 : *
1130 : * if we exit the while loop with pps[lcv] still set to NULL,
1131 : * then it means that we allocated a new busy/fake/clean page
1132 : * ptmp in the object and we need to do I/O to fill in the data.
1133 : */
1134 :
1135 : /* top of "pps" while loop */
1136 0 : while (pps[lcv] == NULL) {
1137 : /* look for a resident page */
1138 0 : ptmp = uvm_pagelookup(uobj, current_offset);
1139 :
1140 : /* not resident? allocate one now (if we can) */
1141 0 : if (ptmp == NULL) {
1142 :
1143 0 : ptmp = uvm_pagealloc(uobj, current_offset,
1144 : NULL, 0);
1145 :
1146 : /* out of RAM? */
1147 0 : if (ptmp == NULL) {
1148 0 : uvm_wait("uao_getpage");
1149 : /* goto top of pps while loop */
1150 0 : continue;
1151 : }
1152 :
1153 : /*
1154 : * safe with PQ's unlocked: because we just
1155 : * alloc'd the page
1156 : */
1157 0 : atomic_setbits_int(&ptmp->pg_flags, PQ_AOBJ);
1158 :
1159 : /*
1160 : * got new page ready for I/O. break pps while
1161 : * loop. pps[lcv] is still NULL.
1162 : */
1163 0 : break;
1164 : }
1165 :
1166 : /* page is there, see if we need to wait on it */
1167 0 : if ((ptmp->pg_flags & PG_BUSY) != 0) {
1168 0 : atomic_setbits_int(&ptmp->pg_flags, PG_WANTED);
1169 0 : UVM_WAIT(ptmp, FALSE, "uao_get", 0);
1170 0 : continue; /* goto top of pps while loop */
1171 : }
1172 :
1173 : /*
1174 : * if we get here then the page has become resident and
1175 : * unbusy between steps 1 and 2. we busy it now (so we
1176 : * own it) and set pps[lcv] (so that we exit the while
1177 : * loop).
1178 : */
1179 : /* we own it, caller must un-busy */
1180 0 : atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
1181 : UVM_PAGE_OWN(ptmp, "uao_get2");
1182 0 : pps[lcv] = ptmp;
1183 : }
1184 :
1185 : /*
1186 : * if we own the valid page at the correct offset, pps[lcv] will
1187 : * point to it. nothing more to do except go to the next page.
1188 : */
1189 0 : if (pps[lcv])
1190 : continue; /* next lcv */
1191 :
1192 : /*
1193 : * we have a "fake/busy/clean" page that we just allocated.
1194 : * do the needed "i/o", either reading from swap or zeroing.
1195 : */
1196 0 : swslot = uao_find_swslot(aobj, pageidx);
1197 :
1198 : /* just zero the page if there's nothing in swap. */
1199 0 : if (swslot == 0) {
1200 : /* page hasn't existed before, just zero it. */
1201 0 : uvm_pagezero(ptmp);
1202 0 : } else {
1203 : /* page in the swapped-out page. */
1204 0 : rv = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
1205 :
1206 : /* I/O done. check for errors. */
1207 0 : if (rv != VM_PAGER_OK) {
1208 : /*
1209 : * remove the swap slot from the aobj
1210 : * and mark the aobj as having no real slot.
1211 : * don't free the swap slot, thus preventing
1212 : * it from being used again.
1213 : */
1214 0 : swslot = uao_set_swslot(&aobj->u_obj, pageidx,
1215 : SWSLOT_BAD);
1216 0 : uvm_swap_markbad(swslot, 1);
1217 :
1218 0 : if (ptmp->pg_flags & PG_WANTED)
1219 0 : wakeup(ptmp);
1220 0 : atomic_clearbits_int(&ptmp->pg_flags,
1221 : PG_WANTED|PG_BUSY);
1222 : UVM_PAGE_OWN(ptmp, NULL);
1223 0 : uvm_lock_pageq();
1224 0 : uvm_pagefree(ptmp);
1225 0 : uvm_unlock_pageq();
1226 :
1227 0 : return (rv);
1228 : }
1229 : }
1230 :
1231 : /*
1232 : * we got the page! clear the fake flag (indicates valid
1233 : * data now in page) and plug into our result array. note
1234 : * that page is still busy.
1235 : *
1236 : * it is the callers job to:
1237 : * => check if the page is released
1238 : * => unbusy the page
1239 : * => activate the page
1240 : */
1241 :
1242 : /* data is valid ... */
1243 0 : atomic_clearbits_int(&ptmp->pg_flags, PG_FAKE);
1244 0 : pmap_clear_modify(ptmp); /* ... and clean */
1245 0 : pps[lcv] = ptmp;
1246 :
1247 0 : } /* lcv loop */
1248 :
1249 0 : return(VM_PAGER_OK);
1250 0 : }
1251 :
1252 : /*
1253 : * uao_dropswap: release any swap resources from this aobj page.
1254 : */
1255 : int
1256 0 : uao_dropswap(struct uvm_object *uobj, int pageidx)
1257 : {
1258 : int slot;
1259 :
1260 0 : slot = uao_set_swslot(uobj, pageidx, 0);
1261 0 : if (slot) {
1262 0 : uvm_swap_free(slot, 1);
1263 0 : }
1264 0 : return (slot);
1265 : }
1266 :
1267 : /*
1268 : * page in every page in every aobj that is paged-out to a range of swslots.
1269 : *
1270 : * => returns TRUE if pagein was aborted due to lack of memory.
1271 : */
1272 : boolean_t
1273 0 : uao_swap_off(int startslot, int endslot)
1274 : {
1275 : struct uvm_aobj *aobj, *nextaobj, *prevaobj = NULL;
1276 :
1277 : /* walk the list of all aobjs. */
1278 0 : mtx_enter(&uao_list_lock);
1279 :
1280 0 : for (aobj = LIST_FIRST(&uao_list);
1281 0 : aobj != NULL;
1282 : aobj = nextaobj) {
1283 : boolean_t rv;
1284 :
1285 : /*
1286 : * add a ref to the aobj so it doesn't disappear
1287 : * while we're working.
1288 : */
1289 0 : uao_reference_locked(&aobj->u_obj);
1290 :
1291 : /*
1292 : * now it's safe to unlock the uao list.
1293 : * note that lock interleaving is alright with IPL_NONE mutexes.
1294 : */
1295 0 : mtx_leave(&uao_list_lock);
1296 :
1297 0 : if (prevaobj) {
1298 0 : uao_detach_locked(&prevaobj->u_obj);
1299 : prevaobj = NULL;
1300 0 : }
1301 :
1302 : /*
1303 : * page in any pages in the swslot range.
1304 : * if there's an error, abort and return the error.
1305 : */
1306 0 : rv = uao_pagein(aobj, startslot, endslot);
1307 0 : if (rv) {
1308 0 : uao_detach_locked(&aobj->u_obj);
1309 0 : return rv;
1310 : }
1311 :
1312 : /*
1313 : * we're done with this aobj.
1314 : * relock the list and drop our ref on the aobj.
1315 : */
1316 0 : mtx_enter(&uao_list_lock);
1317 0 : nextaobj = LIST_NEXT(aobj, u_list);
1318 : /*
1319 : * prevaobj means that we have an object that we need
1320 : * to drop a reference for. We can't just drop it now with
1321 : * the list locked since that could cause lock recursion in
1322 : * the case where we reduce the refcount to 0. It will be
1323 : * released the next time we drop the list lock.
1324 : */
1325 : prevaobj = aobj;
1326 0 : }
1327 :
1328 : /* done with traversal, unlock the list */
1329 0 : mtx_leave(&uao_list_lock);
1330 0 : if (prevaobj) {
1331 0 : uao_detach_locked(&prevaobj->u_obj);
1332 0 : }
1333 0 : return FALSE;
1334 0 : }
1335 :
1336 : /*
1337 : * page in any pages from aobj in the given range.
1338 : *
1339 : * => returns TRUE if pagein was aborted due to lack of memory.
1340 : */
1341 : static boolean_t
1342 0 : uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot)
1343 : {
1344 : boolean_t rv;
1345 :
1346 0 : if (aobj->u_pages > UAO_SWHASH_THRESHOLD) {
1347 : struct uao_swhash_elt *elt;
1348 0 : int bucket;
1349 :
1350 : restart:
1351 0 : for (bucket = aobj->u_swhashmask; bucket >= 0; bucket--) {
1352 0 : for (elt = LIST_FIRST(&aobj->u_swhash[bucket]);
1353 0 : elt != NULL;
1354 0 : elt = LIST_NEXT(elt, list)) {
1355 : int i;
1356 :
1357 0 : for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
1358 0 : int slot = elt->slots[i];
1359 :
1360 : /* if slot isn't in range, skip it. */
1361 0 : if (slot < startslot ||
1362 0 : slot >= endslot) {
1363 0 : continue;
1364 : }
1365 :
1366 : /*
1367 : * process the page,
1368 : * the start over on this object
1369 : * since the swhash elt
1370 : * may have been freed.
1371 : */
1372 0 : rv = uao_pagein_page(aobj,
1373 0 : UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
1374 0 : if (rv) {
1375 0 : return rv;
1376 : }
1377 0 : goto restart;
1378 : }
1379 0 : }
1380 : }
1381 0 : } else {
1382 : int i;
1383 :
1384 0 : for (i = 0; i < aobj->u_pages; i++) {
1385 0 : int slot = aobj->u_swslots[i];
1386 :
1387 : /* if the slot isn't in range, skip it */
1388 0 : if (slot < startslot || slot >= endslot) {
1389 0 : continue;
1390 : }
1391 :
1392 : /* process the page. */
1393 0 : rv = uao_pagein_page(aobj, i);
1394 0 : if (rv) {
1395 0 : return rv;
1396 : }
1397 0 : }
1398 0 : }
1399 :
1400 0 : return FALSE;
1401 0 : }
1402 :
1403 : /*
1404 : * page in a page from an aobj. used for swap_off.
1405 : * returns TRUE if pagein was aborted due to lack of memory.
1406 : */
1407 : static boolean_t
1408 0 : uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
1409 : {
1410 0 : struct vm_page *pg;
1411 0 : int rv, slot, npages;
1412 :
1413 0 : pg = NULL;
1414 0 : npages = 1;
1415 0 : rv = uao_get(&aobj->u_obj, (voff_t)pageidx << PAGE_SHIFT,
1416 : &pg, &npages, 0, PROT_READ | PROT_WRITE, 0, 0);
1417 :
1418 0 : switch (rv) {
1419 : case VM_PAGER_OK:
1420 : break;
1421 :
1422 : case VM_PAGER_ERROR:
1423 : case VM_PAGER_REFAULT:
1424 : /*
1425 : * nothing more to do on errors.
1426 : * VM_PAGER_REFAULT can only mean that the anon was freed,
1427 : * so again there's nothing to do.
1428 : */
1429 0 : return FALSE;
1430 : }
1431 :
1432 : /*
1433 : * ok, we've got the page now.
1434 : * mark it as dirty, clear its swslot and un-busy it.
1435 : */
1436 0 : slot = uao_set_swslot(&aobj->u_obj, pageidx, 0);
1437 0 : uvm_swap_free(slot, 1);
1438 0 : atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_CLEAN|PG_FAKE);
1439 : UVM_PAGE_OWN(pg, NULL);
1440 :
1441 : /* deactivate the page (to put it on a page queue). */
1442 0 : pmap_clear_reference(pg);
1443 0 : uvm_lock_pageq();
1444 0 : uvm_pagedeactivate(pg);
1445 0 : uvm_unlock_pageq();
1446 :
1447 0 : return FALSE;
1448 0 : }
1449 :
1450 : /*
1451 : * XXX pedro: Once we are comfortable enough with this function, we can adapt
1452 : * uao_free() to use it.
1453 : *
1454 : * uao_dropswap_range: drop swapslots in the range.
1455 : *
1456 : * => aobj must be locked and is returned locked.
1457 : * => start is inclusive. end is exclusive.
1458 : */
1459 : void
1460 0 : uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
1461 : {
1462 0 : struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1463 : int swpgonlydelta = 0;
1464 :
1465 : /* KASSERT(mutex_owned(uobj->vmobjlock)); */
1466 :
1467 0 : if (end == 0) {
1468 : end = INT64_MAX;
1469 0 : }
1470 :
1471 0 : if (aobj->u_pages > UAO_SWHASH_THRESHOLD) {
1472 0 : int i, hashbuckets = aobj->u_swhashmask + 1;
1473 : voff_t taghi;
1474 : voff_t taglo;
1475 :
1476 0 : taglo = UAO_SWHASH_ELT_TAG(start);
1477 0 : taghi = UAO_SWHASH_ELT_TAG(end);
1478 :
1479 0 : for (i = 0; i < hashbuckets; i++) {
1480 : struct uao_swhash_elt *elt, *next;
1481 :
1482 0 : for (elt = LIST_FIRST(&aobj->u_swhash[i]);
1483 0 : elt != NULL;
1484 : elt = next) {
1485 : int startidx, endidx;
1486 : int j;
1487 :
1488 0 : next = LIST_NEXT(elt, list);
1489 :
1490 0 : if (elt->tag < taglo || taghi < elt->tag) {
1491 0 : continue;
1492 : }
1493 :
1494 0 : if (elt->tag == taglo) {
1495 : startidx =
1496 0 : UAO_SWHASH_ELT_PAGESLOT_IDX(start);
1497 0 : } else {
1498 : startidx = 0;
1499 : }
1500 :
1501 0 : if (elt->tag == taghi) {
1502 : endidx =
1503 0 : UAO_SWHASH_ELT_PAGESLOT_IDX(end);
1504 0 : } else {
1505 : endidx = UAO_SWHASH_CLUSTER_SIZE;
1506 : }
1507 :
1508 0 : for (j = startidx; j < endidx; j++) {
1509 0 : int slot = elt->slots[j];
1510 :
1511 0 : KASSERT(uvm_pagelookup(&aobj->u_obj,
1512 : (voff_t)(UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
1513 : + j) << PAGE_SHIFT) == NULL);
1514 :
1515 0 : if (slot > 0) {
1516 0 : uvm_swap_free(slot, 1);
1517 0 : swpgonlydelta++;
1518 0 : KASSERT(elt->count > 0);
1519 0 : elt->slots[j] = 0;
1520 0 : elt->count--;
1521 0 : }
1522 : }
1523 :
1524 0 : if (elt->count == 0) {
1525 0 : LIST_REMOVE(elt, list);
1526 0 : pool_put(&uao_swhash_elt_pool, elt);
1527 0 : }
1528 0 : }
1529 : }
1530 0 : } else {
1531 : int i;
1532 :
1533 0 : if (aobj->u_pages < end) {
1534 : end = aobj->u_pages;
1535 0 : }
1536 0 : for (i = start; i < end; i++) {
1537 0 : int slot = aobj->u_swslots[i];
1538 :
1539 0 : if (slot > 0) {
1540 0 : uvm_swap_free(slot, 1);
1541 0 : swpgonlydelta++;
1542 0 : }
1543 : }
1544 : }
1545 :
1546 : /*
1547 : * adjust the counter of pages only in swap for all
1548 : * the swap slots we've freed.
1549 : */
1550 0 : if (swpgonlydelta > 0) {
1551 0 : KASSERT(uvmexp.swpgonly >= swpgonlydelta);
1552 0 : uvmexp.swpgonly -= swpgonlydelta;
1553 0 : }
1554 0 : }
|