Line data Source code
1 : /* $OpenBSD: uvm_km.c,v 1.130 2017/05/11 06:55:47 dlg Exp $ */
2 : /* $NetBSD: uvm_km.c,v 1.42 2001/01/14 02:10:01 thorpej Exp $ */
3 :
4 : /*
5 : * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 : * Copyright (c) 1991, 1993, The Regents of the University of California.
7 : *
8 : * All rights reserved.
9 : *
10 : * This code is derived from software contributed to Berkeley by
11 : * The Mach Operating System project at Carnegie-Mellon University.
12 : *
13 : * Redistribution and use in source and binary forms, with or without
14 : * modification, are permitted provided that the following conditions
15 : * are met:
16 : * 1. Redistributions of source code must retain the above copyright
17 : * notice, this list of conditions and the following disclaimer.
18 : * 2. Redistributions in binary form must reproduce the above copyright
19 : * notice, this list of conditions and the following disclaimer in the
20 : * documentation and/or other materials provided with the distribution.
21 : * 3. Neither the name of the University nor the names of its contributors
22 : * may be used to endorse or promote products derived from this software
23 : * without specific prior written permission.
24 : *
25 : * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 : * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 : * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 : * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 : * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 : * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 : * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 : * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 : * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 : * SUCH DAMAGE.
36 : *
37 : * @(#)vm_kern.c 8.3 (Berkeley) 1/12/94
38 : * from: Id: uvm_km.c,v 1.1.2.14 1998/02/06 05:19:27 chs Exp
39 : *
40 : *
41 : * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42 : * All rights reserved.
43 : *
44 : * Permission to use, copy, modify and distribute this software and
45 : * its documentation is hereby granted, provided that both the copyright
46 : * notice and this permission notice appear in all copies of the
47 : * software, derivative works or modified versions, and any portions
48 : * thereof, and that both notices appear in supporting documentation.
49 : *
50 : * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 : * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 : * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 : *
54 : * Carnegie Mellon requests users of this software to return to
55 : *
56 : * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
57 : * School of Computer Science
58 : * Carnegie Mellon University
59 : * Pittsburgh PA 15213-3890
60 : *
61 : * any improvements or extensions that they make and grant Carnegie the
62 : * rights to redistribute these changes.
63 : */
64 :
65 : /*
66 : * uvm_km.c: handle kernel memory allocation and management
67 : */
68 :
69 : /*
70 : * overview of kernel memory management:
71 : *
72 : * the kernel virtual address space is mapped by "kernel_map." kernel_map
73 : * starts at a machine-dependent address and is VM_KERNEL_SPACE_SIZE bytes
74 : * large.
75 : *
76 : * the kernel_map has several "submaps." submaps can only appear in
77 : * the kernel_map (user processes can't use them). submaps "take over"
78 : * the management of a sub-range of the kernel's address space. submaps
79 : * are typically allocated at boot time and are never released. kernel
80 : * virtual address space that is mapped by a submap is locked by the
81 : * submap's lock -- not the kernel_map's lock.
82 : *
83 : * thus, the useful feature of submaps is that they allow us to break
84 : * up the locking and protection of the kernel address space into smaller
85 : * chunks.
86 : *
87 : * The VM system has several standard kernel submaps:
88 : * kmem_map: Contains only wired kernel memory for malloc(9).
89 : * Note: All access to this map must be protected by splvm as
90 : * calls to malloc(9) are allowed in interrupt handlers.
91 : * exec_map: Memory to hold arguments to system calls are allocated from
92 : * this map.
93 : * XXX: This is primeraly used to artificially limit the number
94 : * of concurrent processes doing an exec.
95 : * phys_map: Buffers for vmapbuf (physio) are allocated from this map.
96 : *
97 : * the kernel allocates its private memory out of special uvm_objects whose
98 : * reference count is set to UVM_OBJ_KERN (thus indicating that the objects
99 : * are "special" and never die). all kernel objects should be thought of
100 : * as large, fixed-sized, sparsely populated uvm_objects. each kernel
101 : * object is equal to the size of kernel virtual address space (i.e.
102 : * VM_KERNEL_SPACE_SIZE).
103 : *
104 : * most kernel private memory lives in kernel_object. the only exception
105 : * to this is for memory that belongs to submaps that must be protected
106 : * by splvm(). each of these submaps manages their own pages.
107 : *
108 : * note that just because a kernel object spans the entire kernel virtual
109 : * address space doesn't mean that it has to be mapped into the entire space.
110 : * large chunks of a kernel object's space go unused either because
111 : * that area of kernel VM is unmapped, or there is some other type of
112 : * object mapped into that range (e.g. a vnode). for submap's kernel
113 : * objects, the only part of the object that can ever be populated is the
114 : * offsets that are managed by the submap.
115 : *
116 : * note that the "offset" in a kernel object is always the kernel virtual
117 : * address minus the vm_map_min(kernel_map).
118 : * example:
119 : * suppose kernel_map starts at 0xf8000000 and the kernel does a
120 : * uvm_km_alloc(kernel_map, PAGE_SIZE) [allocate 1 wired down page in the
121 : * kernel map]. if uvm_km_alloc returns virtual address 0xf8235000,
122 : * then that means that the page at offset 0x235000 in kernel_object is
123 : * mapped at 0xf8235000.
124 : *
125 : * kernel objects have one other special property: when the kernel virtual
126 : * memory mapping them is unmapped, the backing memory in the object is
127 : * freed right away. this is done with the uvm_km_pgremove() function.
128 : * this has to be done because there is no backing store for kernel pages
129 : * and no need to save them after they are no longer referenced.
130 : */
131 :
132 : #include <sys/param.h>
133 : #include <sys/systm.h>
134 : #include <sys/kthread.h>
135 : #include <uvm/uvm.h>
136 :
137 : /*
138 : * global data structures
139 : */
140 :
141 : struct vm_map *kernel_map = NULL;
142 :
143 : /* Unconstraint range. */
144 : struct uvm_constraint_range no_constraint = { 0x0, (paddr_t)-1 };
145 :
146 : /*
147 : * local data structues
148 : */
149 : static struct vm_map kernel_map_store;
150 :
151 : /*
152 : * uvm_km_init: init kernel maps and objects to reflect reality (i.e.
153 : * KVM already allocated for text, data, bss, and static data structures).
154 : *
155 : * => KVM is defined by [base.. base + VM_KERNEL_SPACE_SIZE].
156 : * we assume that [base -> start] has already been allocated and that
157 : * "end" is the end of the kernel image span.
158 : */
159 : void
160 0 : uvm_km_init(vaddr_t base, vaddr_t start, vaddr_t end)
161 : {
162 : /* kernel_object: for pageable anonymous kernel memory */
163 0 : uao_init();
164 0 : uvm.kernel_object = uao_create(VM_KERNEL_SPACE_SIZE, UAO_FLAG_KERNOBJ);
165 :
166 : /*
167 : * init the map and reserve already allocated kernel space
168 : * before installing.
169 : */
170 :
171 0 : uvm_map_setup(&kernel_map_store, base, end,
172 : #ifdef KVA_GUARDPAGES
173 : VM_MAP_PAGEABLE | VM_MAP_GUARDPAGES
174 : #else
175 : VM_MAP_PAGEABLE
176 : #endif
177 : );
178 0 : kernel_map_store.pmap = pmap_kernel();
179 0 : if (base != start && uvm_map(&kernel_map_store, &base, start - base,
180 : NULL, UVM_UNKNOWN_OFFSET, 0,
181 : UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
182 0 : MAP_INHERIT_NONE, MADV_RANDOM, UVM_FLAG_FIXED)) != 0)
183 0 : panic("uvm_km_init: could not reserve space for kernel");
184 :
185 0 : kernel_map = &kernel_map_store;
186 0 : }
187 :
188 : /*
189 : * uvm_km_suballoc: allocate a submap in the kernel map. once a submap
190 : * is allocated all references to that area of VM must go through it. this
191 : * allows the locking of VAs in kernel_map to be broken up into regions.
192 : *
193 : * => if `fixed' is true, *min specifies where the region described
194 : * by the submap must start
195 : * => if submap is non NULL we use that as the submap, otherwise we
196 : * alloc a new map
197 : */
198 : struct vm_map *
199 0 : uvm_km_suballoc(struct vm_map *map, vaddr_t *min, vaddr_t *max, vsize_t size,
200 : int flags, boolean_t fixed, struct vm_map *submap)
201 : {
202 0 : int mapflags = UVM_FLAG_NOMERGE | (fixed ? UVM_FLAG_FIXED : 0);
203 :
204 0 : size = round_page(size); /* round up to pagesize */
205 :
206 : /* first allocate a blank spot in the parent map */
207 0 : if (uvm_map(map, min, size, NULL, UVM_UNKNOWN_OFFSET, 0,
208 0 : UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
209 0 : MAP_INHERIT_NONE, MADV_RANDOM, mapflags)) != 0) {
210 0 : panic("uvm_km_suballoc: unable to allocate space in parent map");
211 : }
212 :
213 : /* set VM bounds (min is filled in by uvm_map) */
214 0 : *max = *min + size;
215 :
216 : /* add references to pmap and create or init the submap */
217 0 : pmap_reference(vm_map_pmap(map));
218 0 : if (submap == NULL) {
219 0 : submap = uvm_map_create(vm_map_pmap(map), *min, *max, flags);
220 0 : if (submap == NULL)
221 0 : panic("uvm_km_suballoc: unable to create submap");
222 : } else {
223 0 : uvm_map_setup(submap, *min, *max, flags);
224 0 : submap->pmap = vm_map_pmap(map);
225 : }
226 :
227 : /* now let uvm_map_submap plug in it... */
228 0 : if (uvm_map_submap(map, *min, *max, submap) != 0)
229 0 : panic("uvm_km_suballoc: submap allocation failed");
230 :
231 0 : return(submap);
232 : }
233 :
234 : /*
235 : * uvm_km_pgremove: remove pages from a kernel uvm_object.
236 : *
237 : * => when you unmap a part of anonymous kernel memory you want to toss
238 : * the pages right away. (this gets called from uvm_unmap_...).
239 : */
240 : void
241 0 : uvm_km_pgremove(struct uvm_object *uobj, vaddr_t start, vaddr_t end)
242 : {
243 : struct vm_page *pp;
244 : voff_t curoff;
245 : int slot;
246 :
247 0 : KASSERT(uobj->pgops == &aobj_pager);
248 :
249 0 : for (curoff = start ; curoff < end ; curoff += PAGE_SIZE) {
250 0 : pp = uvm_pagelookup(uobj, curoff);
251 0 : if (pp && pp->pg_flags & PG_BUSY) {
252 0 : atomic_setbits_int(&pp->pg_flags, PG_WANTED);
253 0 : UVM_WAIT(pp, 0, "km_pgrm", 0);
254 0 : curoff -= PAGE_SIZE; /* loop back to us */
255 0 : continue;
256 : }
257 :
258 : /* free the swap slot, then the page */
259 0 : slot = uao_dropswap(uobj, curoff >> PAGE_SHIFT);
260 :
261 0 : if (pp != NULL) {
262 0 : uvm_lock_pageq();
263 0 : uvm_pagefree(pp);
264 0 : uvm_unlock_pageq();
265 0 : } else if (slot != 0) {
266 0 : uvmexp.swpgonly--;
267 0 : }
268 : }
269 0 : }
270 :
271 :
272 : /*
273 : * uvm_km_pgremove_intrsafe: like uvm_km_pgremove(), but for "intrsafe"
274 : * objects
275 : *
276 : * => when you unmap a part of anonymous kernel memory you want to toss
277 : * the pages right away. (this gets called from uvm_unmap_...).
278 : * => none of the pages will ever be busy, and none of them will ever
279 : * be on the active or inactive queues (because these objects are
280 : * never allowed to "page").
281 : */
282 : void
283 0 : uvm_km_pgremove_intrsafe(vaddr_t start, vaddr_t end)
284 : {
285 : struct vm_page *pg;
286 : vaddr_t va;
287 0 : paddr_t pa;
288 :
289 0 : for (va = start; va < end; va += PAGE_SIZE) {
290 0 : if (!pmap_extract(pmap_kernel(), va, &pa))
291 : continue;
292 0 : pg = PHYS_TO_VM_PAGE(pa);
293 0 : if (pg == NULL)
294 0 : panic("uvm_km_pgremove_intrsafe: no page");
295 0 : uvm_pagefree(pg);
296 0 : }
297 0 : }
298 :
299 : /*
300 : * uvm_km_kmemalloc: lower level kernel memory allocator for malloc()
301 : *
302 : * => we map wired memory into the specified map using the obj passed in
303 : * => NOTE: we can return NULL even if we can wait if there is not enough
304 : * free VM space in the map... caller should be prepared to handle
305 : * this case.
306 : * => we return KVA of memory allocated
307 : * => flags: NOWAIT, VALLOC - just allocate VA, TRYLOCK - fail if we can't
308 : * lock the map
309 : * => low, high, alignment, boundary, nsegs are the corresponding parameters
310 : * to uvm_pglistalloc
311 : * => flags: ZERO - correspond to uvm_pglistalloc flags
312 : */
313 : vaddr_t
314 0 : uvm_km_kmemalloc_pla(struct vm_map *map, struct uvm_object *obj, vsize_t size,
315 : vsize_t valign, int flags, paddr_t low, paddr_t high, paddr_t alignment,
316 : paddr_t boundary, int nsegs)
317 : {
318 0 : vaddr_t kva, loopva;
319 : voff_t offset;
320 : struct vm_page *pg;
321 0 : struct pglist pgl;
322 : int pla_flags;
323 :
324 0 : KASSERT(vm_map_pmap(map) == pmap_kernel());
325 : /* UVM_KMF_VALLOC => !UVM_KMF_ZERO */
326 0 : KASSERT(!(flags & UVM_KMF_VALLOC) ||
327 : !(flags & UVM_KMF_ZERO));
328 :
329 : /* setup for call */
330 0 : size = round_page(size);
331 0 : kva = vm_map_min(map); /* hint */
332 0 : if (nsegs == 0)
333 0 : nsegs = atop(size);
334 :
335 : /* allocate some virtual space */
336 0 : if (__predict_false(uvm_map(map, &kva, size, obj, UVM_UNKNOWN_OFFSET,
337 : valign, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
338 : MAP_INHERIT_NONE, MADV_RANDOM, (flags & UVM_KMF_TRYLOCK))) != 0)) {
339 0 : return(0);
340 : }
341 :
342 : /* if all we wanted was VA, return now */
343 0 : if (flags & UVM_KMF_VALLOC) {
344 0 : return(kva);
345 : }
346 :
347 : /* recover object offset from virtual address */
348 0 : if (obj != NULL)
349 0 : offset = kva - vm_map_min(kernel_map);
350 : else
351 : offset = 0;
352 :
353 : /*
354 : * now allocate and map in the memory... note that we are the only ones
355 : * whom should ever get a handle on this area of VM.
356 : */
357 0 : TAILQ_INIT(&pgl);
358 : pla_flags = 0;
359 0 : KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
360 0 : if ((flags & UVM_KMF_NOWAIT) ||
361 0 : ((flags & UVM_KMF_CANFAIL) &&
362 0 : uvmexp.swpages - uvmexp.swpgonly <= atop(size)))
363 0 : pla_flags |= UVM_PLA_NOWAIT;
364 : else
365 : pla_flags |= UVM_PLA_WAITOK;
366 0 : if (flags & UVM_KMF_ZERO)
367 0 : pla_flags |= UVM_PLA_ZERO;
368 0 : if (uvm_pglistalloc(size, low, high, alignment, boundary, &pgl, nsegs,
369 0 : pla_flags) != 0) {
370 : /* Failed. */
371 0 : uvm_unmap(map, kva, kva + size);
372 0 : return (0);
373 : }
374 :
375 0 : loopva = kva;
376 0 : while (loopva != kva + size) {
377 : pg = TAILQ_FIRST(&pgl);
378 0 : TAILQ_REMOVE(&pgl, pg, pageq);
379 0 : uvm_pagealloc_pg(pg, obj, offset, NULL);
380 0 : atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
381 : UVM_PAGE_OWN(pg, NULL);
382 :
383 : /*
384 : * map it in: note that we call pmap_enter with the map and
385 : * object unlocked in case we are kmem_map.
386 : */
387 0 : if (obj == NULL) {
388 0 : pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg),
389 : PROT_READ | PROT_WRITE);
390 0 : } else {
391 0 : pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
392 : PROT_READ | PROT_WRITE,
393 : PROT_READ | PROT_WRITE | PMAP_WIRED);
394 : }
395 0 : loopva += PAGE_SIZE;
396 0 : offset += PAGE_SIZE;
397 : }
398 0 : KASSERT(TAILQ_EMPTY(&pgl));
399 : pmap_update(pmap_kernel());
400 :
401 0 : return(kva);
402 0 : }
403 :
404 : /*
405 : * uvm_km_free: free an area of kernel memory
406 : */
407 : void
408 0 : uvm_km_free(struct vm_map *map, vaddr_t addr, vsize_t size)
409 : {
410 0 : uvm_unmap(map, trunc_page(addr), round_page(addr+size));
411 0 : }
412 :
413 : /*
414 : * uvm_km_free_wakeup: free an area of kernel memory and wake up
415 : * anyone waiting for vm space.
416 : *
417 : * => XXX: "wanted" bit + unlock&wait on other end?
418 : */
419 : void
420 0 : uvm_km_free_wakeup(struct vm_map *map, vaddr_t addr, vsize_t size)
421 : {
422 0 : struct uvm_map_deadq dead_entries;
423 :
424 0 : vm_map_lock(map);
425 0 : TAILQ_INIT(&dead_entries);
426 0 : uvm_unmap_remove(map, trunc_page(addr), round_page(addr+size),
427 : &dead_entries, FALSE, TRUE);
428 0 : wakeup(map);
429 0 : vm_map_unlock(map);
430 :
431 0 : uvm_unmap_detach(&dead_entries, 0);
432 0 : }
433 :
434 : /*
435 : * uvm_km_alloc1: allocate wired down memory in the kernel map.
436 : *
437 : * => we can sleep if needed
438 : */
439 : vaddr_t
440 0 : uvm_km_alloc1(struct vm_map *map, vsize_t size, vsize_t align, boolean_t zeroit)
441 : {
442 0 : vaddr_t kva, loopva;
443 : voff_t offset;
444 : struct vm_page *pg;
445 :
446 0 : KASSERT(vm_map_pmap(map) == pmap_kernel());
447 :
448 0 : size = round_page(size);
449 0 : kva = vm_map_min(map); /* hint */
450 :
451 : /* allocate some virtual space */
452 0 : if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object,
453 : UVM_UNKNOWN_OFFSET, align,
454 : UVM_MAPFLAG(PROT_READ | PROT_WRITE,
455 : PROT_READ | PROT_WRITE | PROT_EXEC,
456 : MAP_INHERIT_NONE, MADV_RANDOM, 0)) != 0)) {
457 0 : return(0);
458 : }
459 :
460 : /* recover object offset from virtual address */
461 0 : offset = kva - vm_map_min(kernel_map);
462 :
463 : /* now allocate the memory. we must be careful about released pages. */
464 : loopva = kva;
465 0 : while (size) {
466 : /* allocate ram */
467 0 : pg = uvm_pagealloc(uvm.kernel_object, offset, NULL, 0);
468 0 : if (pg) {
469 0 : atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
470 : UVM_PAGE_OWN(pg, NULL);
471 0 : }
472 0 : if (__predict_false(pg == NULL)) {
473 0 : if (curproc == uvm.pagedaemon_proc) {
474 : /*
475 : * It is unfeasible for the page daemon to
476 : * sleep for memory, so free what we have
477 : * allocated and fail.
478 : */
479 0 : uvm_unmap(map, kva, loopva - kva);
480 0 : return (0);
481 : } else {
482 0 : uvm_wait("km_alloc1w"); /* wait for memory */
483 0 : continue;
484 : }
485 : }
486 :
487 : /*
488 : * map it in; note we're never called with an intrsafe
489 : * object, so we always use regular old pmap_enter().
490 : */
491 0 : pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
492 : PROT_READ | PROT_WRITE,
493 : PROT_READ | PROT_WRITE | PMAP_WIRED);
494 :
495 0 : loopva += PAGE_SIZE;
496 0 : offset += PAGE_SIZE;
497 0 : size -= PAGE_SIZE;
498 : }
499 : pmap_update(map->pmap);
500 :
501 : /*
502 : * zero on request (note that "size" is now zero due to the above loop
503 : * so we need to subtract kva from loopva to reconstruct the size).
504 : */
505 0 : if (zeroit)
506 0 : memset((caddr_t)kva, 0, loopva - kva);
507 :
508 0 : return(kva);
509 0 : }
510 :
511 : /*
512 : * uvm_km_valloc: allocate zero-fill memory in the kernel's address space
513 : *
514 : * => memory is not allocated until fault time
515 : */
516 :
517 : vaddr_t
518 0 : uvm_km_valloc(struct vm_map *map, vsize_t size)
519 : {
520 0 : return(uvm_km_valloc_align(map, size, 0, 0));
521 : }
522 :
523 : vaddr_t
524 0 : uvm_km_valloc_try(struct vm_map *map, vsize_t size)
525 : {
526 0 : return(uvm_km_valloc_align(map, size, 0, UVM_FLAG_TRYLOCK));
527 : }
528 :
529 : vaddr_t
530 0 : uvm_km_valloc_align(struct vm_map *map, vsize_t size, vsize_t align, int flags)
531 : {
532 0 : vaddr_t kva;
533 :
534 0 : KASSERT(vm_map_pmap(map) == pmap_kernel());
535 :
536 0 : size = round_page(size);
537 0 : kva = vm_map_min(map); /* hint */
538 :
539 : /* allocate some virtual space, demand filled by kernel_object. */
540 :
541 0 : if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object,
542 : UVM_UNKNOWN_OFFSET, align,
543 : UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
544 : MAP_INHERIT_NONE, MADV_RANDOM, flags)) != 0)) {
545 0 : return(0);
546 : }
547 :
548 0 : return(kva);
549 0 : }
550 :
551 : /*
552 : * uvm_km_valloc_wait: allocate zero-fill memory in the kernel's address space
553 : *
554 : * => memory is not allocated until fault time
555 : * => if no room in map, wait for space to free, unless requested size
556 : * is larger than map (in which case we return 0)
557 : */
558 : vaddr_t
559 0 : uvm_km_valloc_prefer_wait(struct vm_map *map, vsize_t size, voff_t prefer)
560 : {
561 0 : vaddr_t kva;
562 :
563 0 : KASSERT(vm_map_pmap(map) == pmap_kernel());
564 :
565 0 : size = round_page(size);
566 0 : if (size > vm_map_max(map) - vm_map_min(map))
567 0 : return(0);
568 :
569 0 : while (1) {
570 0 : kva = vm_map_min(map); /* hint */
571 :
572 : /*
573 : * allocate some virtual space. will be demand filled
574 : * by kernel_object.
575 : */
576 0 : if (__predict_true(uvm_map(map, &kva, size, uvm.kernel_object,
577 : prefer, 0,
578 : UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
579 : MAP_INHERIT_NONE, MADV_RANDOM, 0)) == 0)) {
580 0 : return(kva);
581 : }
582 :
583 : /* failed. sleep for a while (on map) */
584 0 : tsleep(map, PVM, "vallocwait", 0);
585 : }
586 : /*NOTREACHED*/
587 0 : }
588 :
589 : vaddr_t
590 0 : uvm_km_valloc_wait(struct vm_map *map, vsize_t size)
591 : {
592 0 : return uvm_km_valloc_prefer_wait(map, size, UVM_UNKNOWN_OFFSET);
593 : }
594 :
595 : #if defined(__HAVE_PMAP_DIRECT)
596 : /*
597 : * uvm_km_page allocator, __HAVE_PMAP_DIRECT arch
598 : * On architectures with machine memory direct mapped into a portion
599 : * of KVM, we have very little work to do. Just get a physical page,
600 : * and find and return its VA.
601 : */
602 : void
603 0 : uvm_km_page_init(void)
604 : {
605 : /* nothing */
606 0 : }
607 :
608 : void
609 0 : uvm_km_page_lateinit(void)
610 : {
611 : /* nothing */
612 0 : }
613 :
614 : #else
615 : /*
616 : * uvm_km_page allocator, non __HAVE_PMAP_DIRECT archs
617 : * This is a special allocator that uses a reserve of free pages
618 : * to fulfill requests. It is fast and interrupt safe, but can only
619 : * return page sized regions. Its primary use is as a backend for pool.
620 : *
621 : * The memory returned is allocated from the larger kernel_map, sparing
622 : * pressure on the small interrupt-safe kmem_map. It is wired, but
623 : * not zero filled.
624 : */
625 :
626 : struct uvm_km_pages uvm_km_pages;
627 :
628 : void uvm_km_createthread(void *);
629 : void uvm_km_thread(void *);
630 : struct uvm_km_free_page *uvm_km_doputpage(struct uvm_km_free_page *);
631 :
632 : /*
633 : * Allocate the initial reserve, and create the thread which will
634 : * keep the reserve full. For bootstrapping, we allocate more than
635 : * the lowat amount, because it may be a while before the thread is
636 : * running.
637 : */
638 : void
639 : uvm_km_page_init(void)
640 : {
641 : int lowat_min;
642 : int i;
643 : int len, bulk;
644 : vaddr_t addr;
645 :
646 : mtx_init(&uvm_km_pages.mtx, IPL_VM);
647 : if (!uvm_km_pages.lowat) {
648 : /* based on physmem, calculate a good value here */
649 : uvm_km_pages.lowat = physmem / 256;
650 : lowat_min = physmem < atop(16 * 1024 * 1024) ? 32 : 128;
651 : if (uvm_km_pages.lowat < lowat_min)
652 : uvm_km_pages.lowat = lowat_min;
653 : }
654 : if (uvm_km_pages.lowat > UVM_KM_PAGES_LOWAT_MAX)
655 : uvm_km_pages.lowat = UVM_KM_PAGES_LOWAT_MAX;
656 : uvm_km_pages.hiwat = 4 * uvm_km_pages.lowat;
657 : if (uvm_km_pages.hiwat > UVM_KM_PAGES_HIWAT_MAX)
658 : uvm_km_pages.hiwat = UVM_KM_PAGES_HIWAT_MAX;
659 :
660 : /* Allocate all pages in as few allocations as possible. */
661 : len = 0;
662 : bulk = uvm_km_pages.hiwat;
663 : while (len < uvm_km_pages.hiwat && bulk > 0) {
664 : bulk = MIN(bulk, uvm_km_pages.hiwat - len);
665 : addr = vm_map_min(kernel_map);
666 : if (uvm_map(kernel_map, &addr, (vsize_t)bulk << PAGE_SHIFT,
667 : NULL, UVM_UNKNOWN_OFFSET, 0,
668 : UVM_MAPFLAG(PROT_READ | PROT_WRITE,
669 : PROT_READ | PROT_WRITE, MAP_INHERIT_NONE,
670 : MADV_RANDOM, UVM_KMF_TRYLOCK)) != 0) {
671 : bulk /= 2;
672 : continue;
673 : }
674 :
675 : for (i = len; i < len + bulk; i++, addr += PAGE_SIZE)
676 : uvm_km_pages.page[i] = addr;
677 : len += bulk;
678 : }
679 :
680 : uvm_km_pages.free = len;
681 : for (i = len; i < UVM_KM_PAGES_HIWAT_MAX; i++)
682 : uvm_km_pages.page[i] = 0;
683 :
684 : /* tone down if really high */
685 : if (uvm_km_pages.lowat > 512)
686 : uvm_km_pages.lowat = 512;
687 : }
688 :
689 : void
690 : uvm_km_page_lateinit(void)
691 : {
692 : kthread_create_deferred(uvm_km_createthread, NULL);
693 : }
694 :
695 : void
696 : uvm_km_createthread(void *arg)
697 : {
698 : kthread_create(uvm_km_thread, NULL, &uvm_km_pages.km_proc, "kmthread");
699 : }
700 :
701 : /*
702 : * Endless loop. We grab pages in increments of 16 pages, then
703 : * quickly swap them into the list. At some point we can consider
704 : * returning memory to the system if we have too many free pages,
705 : * but that's not implemented yet.
706 : */
707 : void
708 : uvm_km_thread(void *arg)
709 : {
710 : vaddr_t pg[16];
711 : int i;
712 : int allocmore = 0;
713 : int flags;
714 : struct uvm_km_free_page *fp = NULL;
715 :
716 : KERNEL_UNLOCK();
717 :
718 : for (;;) {
719 : mtx_enter(&uvm_km_pages.mtx);
720 : if (uvm_km_pages.free >= uvm_km_pages.lowat &&
721 : uvm_km_pages.freelist == NULL) {
722 : msleep(&uvm_km_pages.km_proc, &uvm_km_pages.mtx,
723 : PVM, "kmalloc", 0);
724 : }
725 : allocmore = uvm_km_pages.free < uvm_km_pages.lowat;
726 : fp = uvm_km_pages.freelist;
727 : uvm_km_pages.freelist = NULL;
728 : uvm_km_pages.freelistlen = 0;
729 : mtx_leave(&uvm_km_pages.mtx);
730 :
731 : if (allocmore) {
732 : /*
733 : * If there was nothing on the freelist, then we
734 : * must obtain at least one page to make progress.
735 : * So, only use UVM_KMF_TRYLOCK for the first page
736 : * if fp != NULL
737 : */
738 : flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE,
739 : PROT_READ | PROT_WRITE, MAP_INHERIT_NONE,
740 : MADV_RANDOM, fp != NULL ? UVM_KMF_TRYLOCK : 0);
741 : memset(pg, 0, sizeof(pg));
742 : for (i = 0; i < nitems(pg); i++) {
743 : pg[i] = vm_map_min(kernel_map);
744 : if (uvm_map(kernel_map, &pg[i], PAGE_SIZE,
745 : NULL, UVM_UNKNOWN_OFFSET, 0, flags) != 0) {
746 : pg[i] = 0;
747 : break;
748 : }
749 :
750 : /* made progress, so don't sleep for more */
751 : flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE,
752 : PROT_READ | PROT_WRITE, MAP_INHERIT_NONE,
753 : MADV_RANDOM, UVM_KMF_TRYLOCK);
754 : }
755 :
756 : mtx_enter(&uvm_km_pages.mtx);
757 : for (i = 0; i < nitems(pg); i++) {
758 : if (uvm_km_pages.free ==
759 : nitems(uvm_km_pages.page))
760 : break;
761 : else if (pg[i] != 0)
762 : uvm_km_pages.page[uvm_km_pages.free++]
763 : = pg[i];
764 : }
765 : wakeup(&uvm_km_pages.free);
766 : mtx_leave(&uvm_km_pages.mtx);
767 :
768 : /* Cleanup left-over pages (if any). */
769 : for (; i < nitems(pg); i++) {
770 : if (pg[i] != 0) {
771 : uvm_unmap(kernel_map,
772 : pg[i], pg[i] + PAGE_SIZE);
773 : }
774 : }
775 : }
776 : while (fp) {
777 : fp = uvm_km_doputpage(fp);
778 : }
779 : }
780 : }
781 :
782 : struct uvm_km_free_page *
783 : uvm_km_doputpage(struct uvm_km_free_page *fp)
784 : {
785 : vaddr_t va = (vaddr_t)fp;
786 : struct vm_page *pg;
787 : int freeva = 1;
788 : struct uvm_km_free_page *nextfp = fp->next;
789 :
790 : pg = uvm_atopg(va);
791 :
792 : pmap_kremove(va, PAGE_SIZE);
793 : pmap_update(kernel_map->pmap);
794 :
795 : mtx_enter(&uvm_km_pages.mtx);
796 : if (uvm_km_pages.free < uvm_km_pages.hiwat) {
797 : uvm_km_pages.page[uvm_km_pages.free++] = va;
798 : freeva = 0;
799 : }
800 : mtx_leave(&uvm_km_pages.mtx);
801 :
802 : if (freeva)
803 : uvm_unmap(kernel_map, va, va + PAGE_SIZE);
804 :
805 : uvm_pagefree(pg);
806 : return (nextfp);
807 : }
808 : #endif /* !__HAVE_PMAP_DIRECT */
809 :
810 : void *
811 0 : km_alloc(size_t sz, const struct kmem_va_mode *kv,
812 : const struct kmem_pa_mode *kp, const struct kmem_dyn_mode *kd)
813 : {
814 : struct vm_map *map;
815 : struct vm_page *pg;
816 0 : struct pglist pgl;
817 : int mapflags = 0;
818 : vm_prot_t prot;
819 : paddr_t pla_align;
820 : int pla_flags;
821 : int pla_maxseg;
822 0 : vaddr_t va, sva;
823 :
824 0 : KASSERT(sz == round_page(sz));
825 :
826 0 : TAILQ_INIT(&pgl);
827 :
828 0 : if (kp->kp_nomem || kp->kp_pageable)
829 : goto alloc_va;
830 :
831 0 : pla_flags = kd->kd_waitok ? UVM_PLA_WAITOK : UVM_PLA_NOWAIT;
832 0 : pla_flags |= UVM_PLA_TRYCONTIG;
833 0 : if (kp->kp_zero)
834 0 : pla_flags |= UVM_PLA_ZERO;
835 :
836 0 : pla_align = kp->kp_align;
837 : #ifdef __HAVE_PMAP_DIRECT
838 0 : if (pla_align < kv->kv_align)
839 0 : pla_align = kv->kv_align;
840 : #endif
841 0 : pla_maxseg = kp->kp_maxseg;
842 0 : if (pla_maxseg == 0)
843 0 : pla_maxseg = sz / PAGE_SIZE;
844 :
845 0 : if (uvm_pglistalloc(sz, kp->kp_constraint->ucr_low,
846 0 : kp->kp_constraint->ucr_high, pla_align, kp->kp_boundary,
847 : &pgl, pla_maxseg, pla_flags)) {
848 0 : return (NULL);
849 : }
850 :
851 : #ifdef __HAVE_PMAP_DIRECT
852 : /*
853 : * Only use direct mappings for single page or single segment
854 : * allocations.
855 : */
856 0 : if (kv->kv_singlepage || kp->kp_maxseg == 1) {
857 0 : TAILQ_FOREACH(pg, &pgl, pageq) {
858 0 : va = pmap_map_direct(pg);
859 0 : if (pg == TAILQ_FIRST(&pgl))
860 0 : sva = va;
861 : }
862 0 : return ((void *)sva);
863 : }
864 : #endif
865 : alloc_va:
866 : prot = PROT_READ | PROT_WRITE;
867 :
868 0 : if (kp->kp_pageable) {
869 0 : KASSERT(kp->kp_object);
870 0 : KASSERT(!kv->kv_singlepage);
871 : } else {
872 0 : KASSERT(kp->kp_object == NULL);
873 : }
874 :
875 0 : if (kv->kv_singlepage) {
876 0 : KASSERT(sz == PAGE_SIZE);
877 : #ifdef __HAVE_PMAP_DIRECT
878 0 : panic("km_alloc: DIRECT single page");
879 : #else
880 : mtx_enter(&uvm_km_pages.mtx);
881 : while (uvm_km_pages.free == 0) {
882 : if (kd->kd_waitok == 0) {
883 : mtx_leave(&uvm_km_pages.mtx);
884 : uvm_pglistfree(&pgl);
885 : return NULL;
886 : }
887 : msleep(&uvm_km_pages.free, &uvm_km_pages.mtx, PVM,
888 : "getpage", 0);
889 : }
890 : va = uvm_km_pages.page[--uvm_km_pages.free];
891 : if (uvm_km_pages.free < uvm_km_pages.lowat &&
892 : curproc != uvm_km_pages.km_proc) {
893 : if (kd->kd_slowdown)
894 : *kd->kd_slowdown = 1;
895 : wakeup(&uvm_km_pages.km_proc);
896 : }
897 : mtx_leave(&uvm_km_pages.mtx);
898 : #endif
899 : } else {
900 : struct uvm_object *uobj = NULL;
901 :
902 0 : if (kd->kd_trylock)
903 0 : mapflags |= UVM_KMF_TRYLOCK;
904 :
905 0 : if (kp->kp_object)
906 0 : uobj = *kp->kp_object;
907 : try_map:
908 0 : map = *kv->kv_map;
909 0 : va = vm_map_min(map);
910 0 : if (uvm_map(map, &va, sz, uobj, kd->kd_prefer,
911 0 : kv->kv_align, UVM_MAPFLAG(prot, prot, MAP_INHERIT_NONE,
912 : MADV_RANDOM, mapflags))) {
913 0 : if (kv->kv_wait && kd->kd_waitok) {
914 0 : tsleep(map, PVM, "km_allocva", 0);
915 0 : goto try_map;
916 : }
917 0 : uvm_pglistfree(&pgl);
918 0 : return (NULL);
919 : }
920 0 : }
921 0 : sva = va;
922 0 : TAILQ_FOREACH(pg, &pgl, pageq) {
923 0 : if (kp->kp_pageable)
924 0 : pmap_enter(pmap_kernel(), va, VM_PAGE_TO_PHYS(pg),
925 : prot, prot | PMAP_WIRED);
926 : else
927 0 : pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), prot);
928 0 : va += PAGE_SIZE;
929 : }
930 : pmap_update(pmap_kernel());
931 0 : return ((void *)sva);
932 0 : }
933 :
934 : void
935 0 : km_free(void *v, size_t sz, const struct kmem_va_mode *kv,
936 : const struct kmem_pa_mode *kp)
937 : {
938 : vaddr_t sva, eva, va;
939 : struct vm_page *pg;
940 0 : struct pglist pgl;
941 :
942 0 : sva = (vaddr_t)v;
943 0 : eva = sva + sz;
944 :
945 0 : if (kp->kp_nomem)
946 : goto free_va;
947 :
948 : #ifdef __HAVE_PMAP_DIRECT
949 0 : if (kv->kv_singlepage || kp->kp_maxseg == 1) {
950 0 : TAILQ_INIT(&pgl);
951 0 : for (va = sva; va < eva; va += PAGE_SIZE) {
952 0 : pg = pmap_unmap_direct(va);
953 0 : TAILQ_INSERT_TAIL(&pgl, pg, pageq);
954 : }
955 0 : uvm_pglistfree(&pgl);
956 0 : return;
957 : }
958 : #else
959 : if (kv->kv_singlepage) {
960 : struct uvm_km_free_page *fp = v;
961 :
962 : mtx_enter(&uvm_km_pages.mtx);
963 : fp->next = uvm_km_pages.freelist;
964 : uvm_km_pages.freelist = fp;
965 : if (uvm_km_pages.freelistlen++ > 16)
966 : wakeup(&uvm_km_pages.km_proc);
967 : mtx_leave(&uvm_km_pages.mtx);
968 : return;
969 : }
970 : #endif
971 :
972 0 : if (kp->kp_pageable) {
973 0 : pmap_remove(pmap_kernel(), sva, eva);
974 : pmap_update(pmap_kernel());
975 0 : } else {
976 0 : TAILQ_INIT(&pgl);
977 0 : for (va = sva; va < eva; va += PAGE_SIZE) {
978 0 : paddr_t pa;
979 :
980 0 : if (!pmap_extract(pmap_kernel(), va, &pa))
981 0 : continue;
982 :
983 0 : pg = PHYS_TO_VM_PAGE(pa);
984 0 : if (pg == NULL) {
985 0 : panic("km_free: unmanaged page 0x%lx\n", pa);
986 : }
987 0 : TAILQ_INSERT_TAIL(&pgl, pg, pageq);
988 0 : }
989 0 : pmap_kremove(sva, sz);
990 : pmap_update(pmap_kernel());
991 0 : uvm_pglistfree(&pgl);
992 : }
993 : free_va:
994 0 : uvm_unmap(*kv->kv_map, sva, eva);
995 0 : if (kv->kv_wait)
996 0 : wakeup(*kv->kv_map);
997 0 : }
998 :
999 : const struct kmem_va_mode kv_any = {
1000 : .kv_map = &kernel_map,
1001 : };
1002 :
1003 : const struct kmem_va_mode kv_intrsafe = {
1004 : .kv_map = &kmem_map,
1005 : };
1006 :
1007 : const struct kmem_va_mode kv_page = {
1008 : .kv_singlepage = 1
1009 : };
1010 :
1011 : const struct kmem_pa_mode kp_dirty = {
1012 : .kp_constraint = &no_constraint
1013 : };
1014 :
1015 : const struct kmem_pa_mode kp_dma = {
1016 : .kp_constraint = &dma_constraint
1017 : };
1018 :
1019 : const struct kmem_pa_mode kp_dma_contig = {
1020 : .kp_constraint = &dma_constraint,
1021 : .kp_maxseg = 1
1022 : };
1023 :
1024 : const struct kmem_pa_mode kp_dma_zero = {
1025 : .kp_constraint = &dma_constraint,
1026 : .kp_zero = 1
1027 : };
1028 :
1029 : const struct kmem_pa_mode kp_zero = {
1030 : .kp_constraint = &no_constraint,
1031 : .kp_zero = 1
1032 : };
1033 :
1034 : const struct kmem_pa_mode kp_pageable = {
1035 : .kp_object = &uvm.kernel_object,
1036 : .kp_pageable = 1
1037 : /* XXX - kp_nomem, maybe, but we'll need to fix km_free. */
1038 : };
1039 :
1040 : const struct kmem_pa_mode kp_none = {
1041 : .kp_nomem = 1
1042 : };
1043 :
1044 : const struct kmem_dyn_mode kd_waitok = {
1045 : .kd_waitok = 1,
1046 : .kd_prefer = UVM_UNKNOWN_OFFSET
1047 : };
1048 :
1049 : const struct kmem_dyn_mode kd_nowait = {
1050 : .kd_prefer = UVM_UNKNOWN_OFFSET
1051 : };
1052 :
1053 : const struct kmem_dyn_mode kd_trylock = {
1054 : .kd_trylock = 1,
1055 : .kd_prefer = UVM_UNKNOWN_OFFSET
1056 : };
|