Line data Source code
1 : /* $OpenBSD: uvm_map.c,v 1.238 2018/07/22 14:33:44 kettenis Exp $ */
2 : /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */
3 :
4 : /*
5 : * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org>
6 : *
7 : * Permission to use, copy, modify, and distribute this software for any
8 : * purpose with or without fee is hereby granted, provided that the above
9 : * copyright notice and this permission notice appear in all copies.
10 : *
11 : * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 : * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 : * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 : * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 : * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 : * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 : * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 : *
19 : *
20 : * Copyright (c) 1997 Charles D. Cranor and Washington University.
21 : * Copyright (c) 1991, 1993, The Regents of the University of California.
22 : *
23 : * All rights reserved.
24 : *
25 : * This code is derived from software contributed to Berkeley by
26 : * The Mach Operating System project at Carnegie-Mellon University.
27 : *
28 : * Redistribution and use in source and binary forms, with or without
29 : * modification, are permitted provided that the following conditions
30 : * are met:
31 : * 1. Redistributions of source code must retain the above copyright
32 : * notice, this list of conditions and the following disclaimer.
33 : * 2. Redistributions in binary form must reproduce the above copyright
34 : * notice, this list of conditions and the following disclaimer in the
35 : * documentation and/or other materials provided with the distribution.
36 : * 3. Neither the name of the University nor the names of its contributors
37 : * may be used to endorse or promote products derived from this software
38 : * without specific prior written permission.
39 : *
40 : * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41 : * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43 : * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44 : * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45 : * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46 : * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 : * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48 : * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49 : * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 : * SUCH DAMAGE.
51 : *
52 : * @(#)vm_map.c 8.3 (Berkeley) 1/12/94
53 : * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
54 : *
55 : *
56 : * Copyright (c) 1987, 1990 Carnegie-Mellon University.
57 : * All rights reserved.
58 : *
59 : * Permission to use, copy, modify and distribute this software and
60 : * its documentation is hereby granted, provided that both the copyright
61 : * notice and this permission notice appear in all copies of the
62 : * software, derivative works or modified versions, and any portions
63 : * thereof, and that both notices appear in supporting documentation.
64 : *
65 : * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
66 : * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
67 : * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
68 : *
69 : * Carnegie Mellon requests users of this software to return to
70 : *
71 : * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
72 : * School of Computer Science
73 : * Carnegie Mellon University
74 : * Pittsburgh PA 15213-3890
75 : *
76 : * any improvements or extensions that they make and grant Carnegie the
77 : * rights to redistribute these changes.
78 : */
79 :
80 : /*
81 : * uvm_map.c: uvm map operations
82 : */
83 :
84 : /* #define DEBUG */
85 : /* #define VMMAP_DEBUG */
86 :
87 : #include <sys/param.h>
88 : #include <sys/systm.h>
89 : #include <sys/mman.h>
90 : #include <sys/proc.h>
91 : #include <sys/malloc.h>
92 : #include <sys/pool.h>
93 : #include <sys/sysctl.h>
94 : #include <sys/syslog.h>
95 :
96 : #ifdef SYSVSHM
97 : #include <sys/shm.h>
98 : #endif
99 :
100 : #include <uvm/uvm.h>
101 :
102 : #ifdef DDB
103 : #include <uvm/uvm_ddb.h>
104 : #endif
105 :
106 : #include <uvm/uvm_addr.h>
107 :
108 :
109 : vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t);
110 : int uvm_mapent_isjoinable(struct vm_map*,
111 : struct vm_map_entry*, struct vm_map_entry*);
112 : struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*,
113 : struct vm_map_entry*, struct uvm_map_deadq*);
114 : struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*,
115 : struct vm_map_entry*, struct uvm_map_deadq*);
116 : struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*,
117 : struct vm_map_entry*, vaddr_t, vsize_t, int,
118 : struct uvm_map_deadq*, struct vm_map_entry*);
119 : struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int);
120 : void uvm_mapent_free(struct vm_map_entry*);
121 : void uvm_unmap_kill_entry(struct vm_map*,
122 : struct vm_map_entry*);
123 : void uvm_unmap_detach_intrsafe(struct uvm_map_deadq *);
124 : void uvm_mapent_mkfree(struct vm_map*,
125 : struct vm_map_entry*, struct vm_map_entry**,
126 : struct uvm_map_deadq*, boolean_t);
127 : void uvm_map_pageable_pgon(struct vm_map*,
128 : struct vm_map_entry*, struct vm_map_entry*,
129 : vaddr_t, vaddr_t);
130 : int uvm_map_pageable_wire(struct vm_map*,
131 : struct vm_map_entry*, struct vm_map_entry*,
132 : vaddr_t, vaddr_t, int);
133 : void uvm_map_setup_entries(struct vm_map*);
134 : void uvm_map_setup_md(struct vm_map*);
135 : void uvm_map_teardown(struct vm_map*);
136 : void uvm_map_vmspace_update(struct vm_map*,
137 : struct uvm_map_deadq*, int);
138 : void uvm_map_kmem_grow(struct vm_map*,
139 : struct uvm_map_deadq*, vsize_t, int);
140 : void uvm_map_freelist_update_clear(struct vm_map*,
141 : struct uvm_map_deadq*);
142 : void uvm_map_freelist_update_refill(struct vm_map *, int);
143 : void uvm_map_freelist_update(struct vm_map*,
144 : struct uvm_map_deadq*, vaddr_t, vaddr_t,
145 : vaddr_t, vaddr_t, int);
146 : struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*,
147 : vaddr_t, vaddr_t, int);
148 : int uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int,
149 : struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t,
150 : int);
151 : int uvm_map_findspace(struct vm_map*,
152 : struct vm_map_entry**, struct vm_map_entry**,
153 : vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
154 : vaddr_t);
155 : vsize_t uvm_map_addr_augment_get(struct vm_map_entry*);
156 : void uvm_map_addr_augment(struct vm_map_entry*);
157 :
158 : /*
159 : * Tree management functions.
160 : */
161 :
162 : static __inline void uvm_mapent_copy(struct vm_map_entry*,
163 : struct vm_map_entry*);
164 : static inline int uvm_mapentry_addrcmp(const struct vm_map_entry*,
165 : const struct vm_map_entry*);
166 : void uvm_mapent_free_insert(struct vm_map*,
167 : struct uvm_addr_state*, struct vm_map_entry*);
168 : void uvm_mapent_free_remove(struct vm_map*,
169 : struct uvm_addr_state*, struct vm_map_entry*);
170 : void uvm_mapent_addr_insert(struct vm_map*,
171 : struct vm_map_entry*);
172 : void uvm_mapent_addr_remove(struct vm_map*,
173 : struct vm_map_entry*);
174 : void uvm_map_splitentry(struct vm_map*,
175 : struct vm_map_entry*, struct vm_map_entry*,
176 : vaddr_t);
177 : vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t);
178 : int uvm_mapent_bias(struct vm_map*, struct vm_map_entry*);
179 :
180 : /*
181 : * uvm_vmspace_fork helper functions.
182 : */
183 : struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t,
184 : vsize_t, vm_prot_t, vm_prot_t,
185 : struct vm_map_entry*, struct uvm_map_deadq*, int,
186 : int);
187 : struct vm_map_entry *uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t,
188 : vsize_t, vm_prot_t, vm_prot_t, struct vm_map*,
189 : struct vm_map_entry*, struct uvm_map_deadq*);
190 : struct vm_map_entry *uvm_mapent_forkshared(struct vmspace*, struct vm_map*,
191 : struct vm_map*, struct vm_map_entry*,
192 : struct uvm_map_deadq*);
193 : struct vm_map_entry *uvm_mapent_forkcopy(struct vmspace*, struct vm_map*,
194 : struct vm_map*, struct vm_map_entry*,
195 : struct uvm_map_deadq*);
196 : struct vm_map_entry *uvm_mapent_forkzero(struct vmspace*, struct vm_map*,
197 : struct vm_map*, struct vm_map_entry*,
198 : struct uvm_map_deadq*);
199 :
200 : /*
201 : * Tree validation.
202 : */
203 : #ifdef VMMAP_DEBUG
204 : void uvm_tree_assert(struct vm_map*, int, char*,
205 : char*, int);
206 : #define UVM_ASSERT(map, cond, file, line) \
207 : uvm_tree_assert((map), (cond), #cond, (file), (line))
208 : void uvm_tree_sanity(struct vm_map*, char*, int);
209 : void uvm_tree_size_chk(struct vm_map*, char*, int);
210 : void vmspace_validate(struct vm_map*);
211 : #else
212 : #define uvm_tree_sanity(_map, _file, _line) do {} while (0)
213 : #define uvm_tree_size_chk(_map, _file, _line) do {} while (0)
214 : #define vmspace_validate(_map) do {} while (0)
215 : #endif
216 :
217 : /*
218 : * All architectures will have pmap_prefer.
219 : */
220 : #ifndef PMAP_PREFER
221 : #define PMAP_PREFER_ALIGN() (vaddr_t)PAGE_SIZE
222 : #define PMAP_PREFER_OFFSET(off) 0
223 : #define PMAP_PREFER(addr, off) (addr)
224 : #endif
225 :
226 :
227 : /*
228 : * The kernel map will initially be VM_MAP_KSIZE_INIT bytes.
229 : * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes.
230 : *
231 : * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size
232 : * each time.
233 : */
234 : #define VM_MAP_KSIZE_INIT (512 * (vaddr_t)PAGE_SIZE)
235 : #define VM_MAP_KSIZE_DELTA (256 * (vaddr_t)PAGE_SIZE)
236 : #define VM_MAP_KSIZE_ALLOCMUL 4
237 : /*
238 : * When selecting a random free-space block, look at most FSPACE_DELTA blocks
239 : * ahead.
240 : */
241 : #define FSPACE_DELTA 8
242 : /*
243 : * Put allocations adjecent to previous allocations when the free-space tree
244 : * is larger than FSPACE_COMPACT entries.
245 : *
246 : * Alignment and PMAP_PREFER may still cause the entry to not be fully
247 : * adjecent. Note that this strategy reduces memory fragmentation (by leaving
248 : * a large space before or after the allocation).
249 : */
250 : #define FSPACE_COMPACT 128
251 : /*
252 : * Make the address selection skip at most this many bytes from the start of
253 : * the free space in which the allocation takes place.
254 : *
255 : * The main idea behind a randomized address space is that an attacker cannot
256 : * know where to target his attack. Therefore, the location of objects must be
257 : * as random as possible. However, the goal is not to create the most sparse
258 : * map that is possible.
259 : * FSPACE_MAXOFF pushes the considered range in bytes down to less insane
260 : * sizes, thereby reducing the sparseness. The biggest randomization comes
261 : * from fragmentation, i.e. FSPACE_COMPACT.
262 : */
263 : #define FSPACE_MAXOFF ((vaddr_t)32 * 1024 * 1024)
264 : /*
265 : * Allow for small gaps in the overflow areas.
266 : * Gap size is in bytes and does not have to be a multiple of page-size.
267 : */
268 : #define FSPACE_BIASGAP ((vaddr_t)32 * 1024)
269 :
270 : /* auto-allocate address lower bound */
271 : #define VMMAP_MIN_ADDR PAGE_SIZE
272 :
273 :
274 : #ifdef DEADBEEF0
275 : #define UVMMAP_DEADBEEF ((unsigned long)DEADBEEF0)
276 : #else
277 : #define UVMMAP_DEADBEEF ((unsigned long)0xdeadd0d0)
278 : #endif
279 :
280 : #ifdef DEBUG
281 : int uvm_map_printlocks = 0;
282 :
283 : #define LPRINTF(_args) \
284 : do { \
285 : if (uvm_map_printlocks) \
286 : printf _args; \
287 : } while (0)
288 : #else
289 : #define LPRINTF(_args) do {} while (0)
290 : #endif
291 :
292 : static struct mutex uvm_kmapent_mtx;
293 : static struct timeval uvm_kmapent_last_warn_time;
294 : static struct timeval uvm_kmapent_warn_rate = { 10, 0 };
295 :
296 : const char vmmapbsy[] = "vmmapbsy";
297 :
298 : /*
299 : * pool for vmspace structures.
300 : */
301 : struct pool uvm_vmspace_pool;
302 :
303 : /*
304 : * pool for dynamically-allocated map entries.
305 : */
306 : struct pool uvm_map_entry_pool;
307 : struct pool uvm_map_entry_kmem_pool;
308 :
309 : /*
310 : * This global represents the end of the kernel virtual address
311 : * space. If we want to exceed this, we must grow the kernel
312 : * virtual address space dynamically.
313 : *
314 : * Note, this variable is locked by kernel_map's lock.
315 : */
316 : vaddr_t uvm_maxkaddr;
317 :
318 : /*
319 : * Locking predicate.
320 : */
321 : #define UVM_MAP_REQ_WRITE(_map) \
322 : do { \
323 : if ((_map)->ref_count > 0) { \
324 : if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \
325 : rw_assert_wrlock(&(_map)->lock); \
326 : else \
327 : MUTEX_ASSERT_LOCKED(&(_map)->mtx); \
328 : } \
329 : } while (0)
330 :
331 : /*
332 : * Tree describing entries by address.
333 : *
334 : * Addresses are unique.
335 : * Entries with start == end may only exist if they are the first entry
336 : * (sorted by address) within a free-memory tree.
337 : */
338 :
339 : static inline int
340 0 : uvm_mapentry_addrcmp(const struct vm_map_entry *e1,
341 : const struct vm_map_entry *e2)
342 : {
343 300 : return e1->start < e2->start ? -1 : e1->start > e2->start;
344 : }
345 :
346 : /*
347 : * Copy mapentry.
348 : */
349 : static __inline void
350 0 : uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
351 : {
352 : caddr_t csrc, cdst;
353 : size_t sz;
354 :
355 0 : csrc = (caddr_t)src;
356 0 : cdst = (caddr_t)dst;
357 0 : csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
358 0 : cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
359 :
360 : sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) -
361 : offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
362 0 : memcpy(cdst, csrc, sz);
363 0 : }
364 :
365 : /*
366 : * Handle free-list insertion.
367 : */
368 : void
369 0 : uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr,
370 : struct vm_map_entry *entry)
371 : {
372 : const struct uvm_addr_functions *fun;
373 : #ifdef VMMAP_DEBUG
374 : vaddr_t min, max, bound;
375 : #endif
376 :
377 : #ifdef VMMAP_DEBUG
378 : /*
379 : * Boundary check.
380 : * Boundaries are folded if they go on the same free list.
381 : */
382 : min = VMMAP_FREE_START(entry);
383 : max = VMMAP_FREE_END(entry);
384 :
385 : while (min < max) {
386 : bound = uvm_map_boundary(map, min, max);
387 : KASSERT(uvm_map_uaddr(map, min) == uaddr);
388 : min = bound;
389 : }
390 : #endif
391 : KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0);
392 180 : KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0);
393 :
394 180 : UVM_MAP_REQ_WRITE(map);
395 :
396 : /* Actual insert: forward to uaddr pointer. */
397 0 : if (uaddr != NULL) {
398 0 : fun = uaddr->uaddr_functions;
399 : KDASSERT(fun != NULL);
400 0 : if (fun->uaddr_free_insert != NULL)
401 180 : (*fun->uaddr_free_insert)(map, uaddr, entry);
402 0 : entry->etype |= UVM_ET_FREEMAPPED;
403 0 : }
404 :
405 : /* Update fspace augmentation. */
406 0 : uvm_map_addr_augment(entry);
407 180 : }
408 :
409 : /*
410 : * Handle free-list removal.
411 : */
412 : void
413 0 : uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr,
414 : struct vm_map_entry *entry)
415 : {
416 : const struct uvm_addr_functions *fun;
417 :
418 180 : KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL);
419 0 : KASSERT(uvm_map_uaddr_e(map, entry) == uaddr);
420 180 : UVM_MAP_REQ_WRITE(map);
421 :
422 0 : if (uaddr != NULL) {
423 0 : fun = uaddr->uaddr_functions;
424 0 : if (fun->uaddr_free_remove != NULL)
425 180 : (*fun->uaddr_free_remove)(map, uaddr, entry);
426 0 : entry->etype &= ~UVM_ET_FREEMAPPED;
427 0 : }
428 0 : }
429 :
430 : /*
431 : * Handle address tree insertion.
432 : */
433 : void
434 0 : uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry)
435 : {
436 : struct vm_map_entry *res;
437 :
438 0 : if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF))
439 0 : panic("uvm_mapent_addr_insert: entry still in addr list");
440 : KDASSERT(entry->start <= entry->end);
441 : KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 &&
442 : (entry->end & (vaddr_t)PAGE_MASK) == 0);
443 :
444 60 : UVM_MAP_REQ_WRITE(map);
445 0 : res = RBT_INSERT(uvm_map_addr, &map->addr, entry);
446 0 : if (res != NULL) {
447 0 : panic("uvm_mapent_addr_insert: map %p entry %p "
448 : "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision "
449 : "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)",
450 : map, entry,
451 0 : entry->start, entry->end, entry->guard, entry->fspace,
452 0 : res, res->start, res->end, res->guard, res->fspace);
453 : }
454 60 : }
455 :
456 : /*
457 : * Handle address tree removal.
458 : */
459 : void
460 0 : uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry)
461 : {
462 : struct vm_map_entry *res;
463 :
464 120 : UVM_MAP_REQ_WRITE(map);
465 0 : res = RBT_REMOVE(uvm_map_addr, &map->addr, entry);
466 0 : if (res != entry)
467 0 : panic("uvm_mapent_addr_remove");
468 0 : RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF);
469 0 : }
470 :
471 : /*
472 : * uvm_map_reference: add reference to a map
473 : *
474 : * XXX check map reference counter lock
475 : */
476 : #define uvm_map_reference(_map) \
477 : do { \
478 : map->ref_count++; \
479 : } while (0)
480 :
481 : /*
482 : * Calculate the dused delta.
483 : */
484 : vsize_t
485 0 : uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max)
486 : {
487 : struct vmspace *vm;
488 : vsize_t sz;
489 : vaddr_t lmax;
490 : vaddr_t stack_begin, stack_end; /* Position of stack. */
491 :
492 0 : KASSERT(map->flags & VM_MAP_ISVMSPACE);
493 0 : vm = (struct vmspace *)map;
494 0 : stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
495 0 : stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
496 :
497 : sz = 0;
498 0 : while (min != max) {
499 : lmax = max;
500 0 : if (min < stack_begin && lmax > stack_begin)
501 0 : lmax = stack_begin;
502 0 : else if (min < stack_end && lmax > stack_end)
503 0 : lmax = stack_end;
504 :
505 0 : if (min >= stack_begin && min < stack_end) {
506 : /* nothing */
507 : } else
508 0 : sz += lmax - min;
509 : min = lmax;
510 : }
511 :
512 0 : return sz >> PAGE_SHIFT;
513 : }
514 :
515 : /*
516 : * Find the entry describing the given address.
517 : */
518 : struct vm_map_entry*
519 0 : uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr)
520 : {
521 : struct vm_map_entry *iter;
522 :
523 0 : iter = RBT_ROOT(uvm_map_addr, atree);
524 0 : while (iter != NULL) {
525 1022 : if (iter->start > addr)
526 0 : iter = RBT_LEFT(uvm_map_addr, iter);
527 0 : else if (VMMAP_FREE_END(iter) <= addr)
528 0 : iter = RBT_RIGHT(uvm_map_addr, iter);
529 : else
530 0 : return iter;
531 : }
532 0 : return NULL;
533 0 : }
534 :
535 : /*
536 : * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry)
537 : *
538 : * Push dead entries into a linked list.
539 : * Since the linked list abuses the address tree for storage, the entry
540 : * may not be linked in a map.
541 : *
542 : * *head must be initialized to NULL before the first call to this macro.
543 : * uvm_unmap_detach(*head, 0) will remove dead entries.
544 : */
545 : static __inline void
546 0 : dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry)
547 : {
548 0 : TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq);
549 0 : }
550 : #define DEAD_ENTRY_PUSH(_headptr, _entry) \
551 : dead_entry_push((_headptr), (_entry))
552 :
553 : /*
554 : * Helper function for uvm_map_findspace_tree.
555 : *
556 : * Given allocation constraints and pmap constraints, finds the
557 : * lowest and highest address in a range that can be used for the
558 : * allocation.
559 : *
560 : * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs.
561 : *
562 : *
563 : * Big chunk of math with a seasoning of dragons.
564 : */
565 : int
566 0 : uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg,
567 : struct vm_map_entry *sel, vaddr_t align,
568 : vaddr_t pmap_align, vaddr_t pmap_off, int bias)
569 : {
570 : vaddr_t sel_min, sel_max;
571 : #ifdef PMAP_PREFER
572 : vaddr_t pmap_min, pmap_max;
573 : #endif /* PMAP_PREFER */
574 : #ifdef DIAGNOSTIC
575 : int bad;
576 : #endif /* DIAGNOSTIC */
577 :
578 0 : sel_min = VMMAP_FREE_START(sel);
579 0 : sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0);
580 :
581 : #ifdef PMAP_PREFER
582 :
583 : /*
584 : * There are two special cases, in which we can satisfy the align
585 : * requirement and the pmap_prefer requirement.
586 : * - when pmap_off == 0, we always select the largest of the two
587 : * - when pmap_off % align == 0 and pmap_align > align, we simply
588 : * satisfy the pmap_align requirement and automatically
589 : * satisfy the align requirement.
590 : */
591 0 : if (align > PAGE_SIZE &&
592 0 : !(pmap_align > align && (pmap_off & (align - 1)) == 0)) {
593 : /*
594 : * Simple case: only use align.
595 : */
596 0 : sel_min = roundup(sel_min, align);
597 0 : sel_max &= ~(align - 1);
598 :
599 0 : if (sel_min > sel_max)
600 0 : return ENOMEM;
601 :
602 : /* Correct for bias. */
603 0 : if (sel_max - sel_min > FSPACE_BIASGAP) {
604 0 : if (bias > 0) {
605 0 : sel_min = sel_max - FSPACE_BIASGAP;
606 0 : sel_min = roundup(sel_min, align);
607 0 : } else if (bias < 0) {
608 0 : sel_max = sel_min + FSPACE_BIASGAP;
609 0 : sel_max &= ~(align - 1);
610 0 : }
611 : }
612 0 : } else if (pmap_align != 0) {
613 : /*
614 : * Special case: satisfy both pmap_prefer and
615 : * align argument.
616 : */
617 0 : pmap_max = sel_max & ~(pmap_align - 1);
618 : pmap_min = sel_min;
619 0 : if (pmap_max < sel_min)
620 0 : return ENOMEM;
621 :
622 : /* Adjust pmap_min for BIASGAP for top-addr bias. */
623 0 : if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP)
624 0 : pmap_min = pmap_max - FSPACE_BIASGAP;
625 : /* Align pmap_min. */
626 0 : pmap_min &= ~(pmap_align - 1);
627 0 : if (pmap_min < sel_min)
628 0 : pmap_min += pmap_align;
629 0 : if (pmap_min > pmap_max)
630 0 : return ENOMEM;
631 :
632 : /* Adjust pmap_max for BIASGAP for bottom-addr bias. */
633 0 : if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) {
634 0 : pmap_max = (pmap_min + FSPACE_BIASGAP) &
635 : ~(pmap_align - 1);
636 0 : }
637 0 : if (pmap_min > pmap_max)
638 0 : return ENOMEM;
639 :
640 : /* Apply pmap prefer offset. */
641 0 : pmap_max |= pmap_off;
642 0 : if (pmap_max > sel_max)
643 0 : pmap_max -= pmap_align;
644 0 : pmap_min |= pmap_off;
645 0 : if (pmap_min < sel_min)
646 0 : pmap_min += pmap_align;
647 :
648 : /*
649 : * Fixup: it's possible that pmap_min and pmap_max
650 : * cross eachother. In this case, try to find one
651 : * address that is allowed.
652 : * (This usually happens in biased case.)
653 : */
654 0 : if (pmap_min > pmap_max) {
655 0 : if (pmap_min < sel_max)
656 0 : pmap_max = pmap_min;
657 0 : else if (pmap_max > sel_min)
658 : pmap_min = pmap_max;
659 : else
660 0 : return ENOMEM;
661 : }
662 :
663 : /* Internal validation. */
664 : KDASSERT(pmap_min <= pmap_max);
665 :
666 : sel_min = pmap_min;
667 : sel_max = pmap_max;
668 0 : } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)
669 0 : sel_min = sel_max - FSPACE_BIASGAP;
670 0 : else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)
671 0 : sel_max = sel_min + FSPACE_BIASGAP;
672 :
673 : #else
674 :
675 : if (align > PAGE_SIZE) {
676 : sel_min = roundup(sel_min, align);
677 : sel_max &= ~(align - 1);
678 : if (sel_min > sel_max)
679 : return ENOMEM;
680 :
681 : if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) {
682 : if (bias > 0) {
683 : sel_min = roundup(sel_max - FSPACE_BIASGAP,
684 : align);
685 : } else {
686 : sel_max = (sel_min + FSPACE_BIASGAP) &
687 : ~(align - 1);
688 : }
689 : }
690 : } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)
691 : sel_min = sel_max - FSPACE_BIASGAP;
692 : else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)
693 : sel_max = sel_min + FSPACE_BIASGAP;
694 :
695 : #endif
696 :
697 0 : if (sel_min > sel_max)
698 0 : return ENOMEM;
699 :
700 : #ifdef DIAGNOSTIC
701 : bad = 0;
702 : /* Lower boundary check. */
703 0 : if (sel_min < VMMAP_FREE_START(sel)) {
704 0 : printf("sel_min: 0x%lx, but should be at least 0x%lx\n",
705 : sel_min, VMMAP_FREE_START(sel));
706 : bad++;
707 0 : }
708 : /* Upper boundary check. */
709 0 : if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) {
710 0 : printf("sel_max: 0x%lx, but should be at most 0x%lx\n",
711 : sel_max,
712 : VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0));
713 0 : bad++;
714 0 : }
715 : /* Lower boundary alignment. */
716 0 : if (align != 0 && (sel_min & (align - 1)) != 0) {
717 0 : printf("sel_min: 0x%lx, not aligned to 0x%lx\n",
718 : sel_min, align);
719 0 : bad++;
720 0 : }
721 : /* Upper boundary alignment. */
722 0 : if (align != 0 && (sel_max & (align - 1)) != 0) {
723 0 : printf("sel_max: 0x%lx, not aligned to 0x%lx\n",
724 : sel_max, align);
725 0 : bad++;
726 0 : }
727 : /* Lower boundary PMAP_PREFER check. */
728 0 : if (pmap_align != 0 && align == 0 &&
729 0 : (sel_min & (pmap_align - 1)) != pmap_off) {
730 0 : printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",
731 : sel_min, sel_min & (pmap_align - 1), pmap_off);
732 0 : bad++;
733 0 : }
734 : /* Upper boundary PMAP_PREFER check. */
735 0 : if (pmap_align != 0 && align == 0 &&
736 0 : (sel_max & (pmap_align - 1)) != pmap_off) {
737 0 : printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",
738 : sel_max, sel_max & (pmap_align - 1), pmap_off);
739 0 : bad++;
740 0 : }
741 :
742 0 : if (bad) {
743 0 : panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, "
744 : "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, "
745 : "bias = %d, "
746 : "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)",
747 0 : sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off,
748 0 : bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel));
749 : }
750 : #endif /* DIAGNOSTIC */
751 :
752 0 : *min = sel_min;
753 0 : *max = sel_max;
754 0 : return 0;
755 0 : }
756 :
757 : /*
758 : * Test if memory starting at addr with sz bytes is free.
759 : *
760 : * Fills in *start_ptr and *end_ptr to be the first and last entry describing
761 : * the space.
762 : * If called with prefilled *start_ptr and *end_ptr, they are to be correct.
763 : */
764 : int
765 0 : uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr,
766 : struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr,
767 : vaddr_t addr, vsize_t sz)
768 120 : {
769 : struct uvm_addr_state *free;
770 : struct uvm_map_addr *atree;
771 : struct vm_map_entry *i, *i_end;
772 :
773 0 : if (addr + sz < addr)
774 0 : return 0;
775 :
776 : /*
777 : * Kernel memory above uvm_maxkaddr is considered unavailable.
778 : */
779 0 : if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
780 0 : if (addr + sz > uvm_maxkaddr)
781 0 : return 0;
782 : }
783 :
784 0 : atree = &map->addr;
785 :
786 : /*
787 : * Fill in first, last, so they point at the entries containing the
788 : * first and last address of the range.
789 : * Note that if they are not NULL, we don't perform the lookup.
790 : */
791 : KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL);
792 120 : if (*start_ptr == NULL) {
793 0 : *start_ptr = uvm_map_entrybyaddr(atree, addr);
794 0 : if (*start_ptr == NULL)
795 0 : return 0;
796 : } else
797 60 : KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr));
798 0 : if (*end_ptr == NULL) {
799 0 : if (VMMAP_FREE_END(*start_ptr) >= addr + sz)
800 0 : *end_ptr = *start_ptr;
801 : else {
802 0 : *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1);
803 0 : if (*end_ptr == NULL)
804 0 : return 0;
805 : }
806 : } else
807 0 : KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1));
808 :
809 : /* Validation. */
810 : KDASSERT(*start_ptr != NULL && *end_ptr != NULL);
811 : KDASSERT((*start_ptr)->start <= addr &&
812 : VMMAP_FREE_END(*start_ptr) > addr &&
813 : (*end_ptr)->start < addr + sz &&
814 : VMMAP_FREE_END(*end_ptr) >= addr + sz);
815 :
816 : /*
817 : * Check the none of the entries intersects with <addr, addr+sz>.
818 : * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is
819 : * considered unavailable unless called by those allocators.
820 : */
821 0 : i = *start_ptr;
822 0 : i_end = RBT_NEXT(uvm_map_addr, *end_ptr);
823 0 : for (; i != i_end;
824 0 : i = RBT_NEXT(uvm_map_addr, i)) {
825 0 : if (i->start != i->end && i->end > addr)
826 0 : return 0;
827 :
828 : /*
829 : * uaddr_exe and uaddr_brk_stack may only be used
830 : * by these allocators and the NULL uaddr (i.e. no
831 : * uaddr).
832 : * Reject if this requirement is not met.
833 : */
834 60 : if (uaddr != NULL) {
835 0 : free = uvm_map_uaddr_e(map, i);
836 :
837 0 : if (uaddr != free && free != NULL &&
838 0 : (free == map->uaddr_exe ||
839 0 : free == map->uaddr_brk_stack))
840 0 : return 0;
841 : }
842 : }
843 :
844 0 : return -1;
845 0 : }
846 :
847 : /*
848 : * Invoke each address selector until an address is found.
849 : * Will not invoke uaddr_exe.
850 : */
851 : int
852 0 : uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first,
853 : struct vm_map_entry**last, vaddr_t *addr, vsize_t sz,
854 : vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint)
855 : {
856 : struct uvm_addr_state *uaddr;
857 : int i;
858 :
859 : /*
860 : * Allocation for sz bytes at any address,
861 : * using the addr selectors in order.
862 : */
863 0 : for (i = 0; i < nitems(map->uaddr_any); i++) {
864 60 : uaddr = map->uaddr_any[i];
865 :
866 0 : if (uvm_addr_invoke(map, uaddr, first, last,
867 0 : addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
868 0 : return 0;
869 : }
870 :
871 : /* Fall back to brk() and stack() address selectors. */
872 0 : uaddr = map->uaddr_brk_stack;
873 0 : if (uvm_addr_invoke(map, uaddr, first, last,
874 0 : addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
875 0 : return 0;
876 :
877 0 : return ENOMEM;
878 0 : }
879 :
880 : /* Calculate entry augmentation value. */
881 : vsize_t
882 0 : uvm_map_addr_augment_get(struct vm_map_entry *entry)
883 : {
884 : vsize_t augment;
885 : struct vm_map_entry *left, *right;
886 :
887 356 : augment = entry->fspace;
888 0 : if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL)
889 536 : augment = MAX(augment, left->fspace_augment);
890 0 : if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
891 476 : augment = MAX(augment, right->fspace_augment);
892 0 : return augment;
893 : }
894 :
895 : /*
896 : * Update augmentation data in entry.
897 : */
898 : void
899 0 : uvm_map_addr_augment(struct vm_map_entry *entry)
900 : {
901 : vsize_t augment;
902 :
903 120 : while (entry != NULL) {
904 : /* Calculate value for augmentation. */
905 0 : augment = uvm_map_addr_augment_get(entry);
906 :
907 : /*
908 : * Descend update.
909 : * Once we find an entry that already has the correct value,
910 : * stop, since it means all its parents will use the correct
911 : * value too.
912 : */
913 180 : if (entry->fspace_augment == augment)
914 0 : return;
915 0 : entry->fspace_augment = augment;
916 0 : entry = RBT_PARENT(uvm_map_addr, entry);
917 : }
918 0 : }
919 :
920 : /*
921 : * uvm_mapanon: establish a valid mapping in map for an anon
922 : *
923 : * => *addr and sz must be a multiple of PAGE_SIZE.
924 : * => *addr is ignored, except if flags contains UVM_FLAG_FIXED.
925 : * => map must be unlocked.
926 : *
927 : * => align: align vaddr, must be a power-of-2.
928 : * Align is only a hint and will be ignored if the alignment fails.
929 : */
930 : int
931 0 : uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz,
932 : vsize_t align, unsigned int flags)
933 : {
934 0 : struct vm_map_entry *first, *last, *entry, *new;
935 0 : struct uvm_map_deadq dead;
936 : vm_prot_t prot;
937 : vm_prot_t maxprot;
938 : vm_inherit_t inherit;
939 : int advice;
940 : int error;
941 : vaddr_t pmap_align, pmap_offset;
942 : vaddr_t hint;
943 :
944 0 : KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE);
945 0 : KASSERT(map != kernel_map);
946 0 : KASSERT((map->flags & UVM_FLAG_HOLE) == 0);
947 :
948 0 : KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
949 0 : splassert(IPL_NONE);
950 :
951 : /*
952 : * We use pmap_align and pmap_offset as alignment and offset variables.
953 : *
954 : * Because the align parameter takes precedence over pmap prefer,
955 : * the pmap_align will need to be set to align, with pmap_offset = 0,
956 : * if pmap_prefer will not align.
957 : */
958 0 : pmap_align = MAX(align, PAGE_SIZE);
959 : pmap_offset = 0;
960 :
961 : /* Decode parameters. */
962 0 : prot = UVM_PROTECTION(flags);
963 0 : maxprot = UVM_MAXPROTECTION(flags);
964 0 : advice = UVM_ADVICE(flags);
965 0 : inherit = UVM_INHERIT(flags);
966 : error = 0;
967 0 : hint = trunc_page(*addr);
968 0 : TAILQ_INIT(&dead);
969 0 : KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
970 0 : KASSERT((align & (align - 1)) == 0);
971 :
972 : /* Check protection. */
973 0 : if ((prot & maxprot) != prot)
974 0 : return EACCES;
975 :
976 : /*
977 : * Before grabbing the lock, allocate a map entry for later
978 : * use to ensure we don't wait for memory while holding the
979 : * vm_map_lock.
980 : */
981 0 : new = uvm_mapent_alloc(map, flags);
982 0 : if (new == NULL)
983 0 : return(ENOMEM);
984 :
985 0 : if (flags & UVM_FLAG_TRYLOCK) {
986 0 : if (vm_map_lock_try(map) == FALSE) {
987 : error = EFAULT;
988 0 : goto out;
989 : }
990 : } else
991 0 : vm_map_lock(map);
992 :
993 0 : first = last = NULL;
994 0 : if (flags & UVM_FLAG_FIXED) {
995 : /*
996 : * Fixed location.
997 : *
998 : * Note: we ignore align, pmap_prefer.
999 : * Fill in first, last and *addr.
1000 : */
1001 0 : KASSERT((*addr & PAGE_MASK) == 0);
1002 :
1003 : /* Check that the space is available. */
1004 0 : if (flags & UVM_FLAG_UNMAP) {
1005 0 : if ((flags & UVM_FLAG_STACK) &&
1006 0 : !uvm_map_is_stack_remappable(map, *addr, sz)) {
1007 : error = EINVAL;
1008 0 : goto unlock;
1009 : }
1010 0 : uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE);
1011 0 : }
1012 0 : if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1013 : error = ENOMEM;
1014 0 : goto unlock;
1015 : }
1016 0 : } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
1017 0 : (align == 0 || (*addr & (align - 1)) == 0) &&
1018 0 : uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1019 : /*
1020 : * Address used as hint.
1021 : *
1022 : * Note: we enforce the alignment restriction,
1023 : * but ignore pmap_prefer.
1024 : */
1025 0 : } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
1026 : /* Run selection algorithm for executables. */
1027 0 : error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
1028 : addr, sz, pmap_align, pmap_offset, prot, hint);
1029 :
1030 0 : if (error != 0)
1031 : goto unlock;
1032 : } else {
1033 : /* Update freelists from vmspace. */
1034 0 : uvm_map_vmspace_update(map, &dead, flags);
1035 :
1036 0 : error = uvm_map_findspace(map, &first, &last, addr, sz,
1037 : pmap_align, pmap_offset, prot, hint);
1038 :
1039 0 : if (error != 0)
1040 : goto unlock;
1041 : }
1042 :
1043 : /* Double-check if selected address doesn't cause overflow. */
1044 0 : if (*addr + sz < *addr) {
1045 : error = ENOMEM;
1046 0 : goto unlock;
1047 : }
1048 :
1049 : /* If we only want a query, return now. */
1050 0 : if (flags & UVM_FLAG_QUERY) {
1051 : error = 0;
1052 0 : goto unlock;
1053 : }
1054 :
1055 : /*
1056 : * Create new entry.
1057 : * first and last may be invalidated after this call.
1058 : */
1059 0 : entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
1060 : new);
1061 0 : if (entry == NULL) {
1062 : error = ENOMEM;
1063 0 : goto unlock;
1064 : }
1065 : new = NULL;
1066 : KDASSERT(entry->start == *addr && entry->end == *addr + sz);
1067 0 : entry->object.uvm_obj = NULL;
1068 0 : entry->offset = 0;
1069 0 : entry->protection = prot;
1070 0 : entry->max_protection = maxprot;
1071 0 : entry->inheritance = inherit;
1072 0 : entry->wired_count = 0;
1073 0 : entry->advice = advice;
1074 0 : if (flags & UVM_FLAG_STACK) {
1075 0 : entry->etype |= UVM_ET_STACK;
1076 0 : if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP))
1077 0 : map->serial++;
1078 : }
1079 0 : if (flags & UVM_FLAG_COPYONW) {
1080 0 : entry->etype |= UVM_ET_COPYONWRITE;
1081 0 : if ((flags & UVM_FLAG_OVERLAY) == 0)
1082 0 : entry->etype |= UVM_ET_NEEDSCOPY;
1083 : }
1084 0 : if (flags & UVM_FLAG_OVERLAY) {
1085 0 : KERNEL_LOCK();
1086 0 : entry->aref.ar_pageoff = 0;
1087 0 : entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
1088 0 : KERNEL_UNLOCK();
1089 0 : }
1090 :
1091 : /* Update map and process statistics. */
1092 0 : map->size += sz;
1093 0 : ((struct vmspace *)map)->vm_dused += uvmspace_dused(map, *addr, *addr + sz);
1094 :
1095 : unlock:
1096 0 : vm_map_unlock(map);
1097 :
1098 : /*
1099 : * Remove dead entries.
1100 : *
1101 : * Dead entries may be the result of merging.
1102 : * uvm_map_mkentry may also create dead entries, when it attempts to
1103 : * destroy free-space entries.
1104 : */
1105 0 : uvm_unmap_detach(&dead, 0);
1106 : out:
1107 0 : if (new)
1108 0 : uvm_mapent_free(new);
1109 0 : return error;
1110 0 : }
1111 :
1112 : /*
1113 : * uvm_map: establish a valid mapping in map
1114 : *
1115 : * => *addr and sz must be a multiple of PAGE_SIZE.
1116 : * => map must be unlocked.
1117 : * => <uobj,uoffset> value meanings (4 cases):
1118 : * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER
1119 : * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER
1120 : * [3] <uobj,uoffset> == normal mapping
1121 : * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA
1122 : *
1123 : * case [4] is for kernel mappings where we don't know the offset until
1124 : * we've found a virtual address. note that kernel object offsets are
1125 : * always relative to vm_map_min(kernel_map).
1126 : *
1127 : * => align: align vaddr, must be a power-of-2.
1128 : * Align is only a hint and will be ignored if the alignment fails.
1129 : */
1130 : int
1131 0 : uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz,
1132 : struct uvm_object *uobj, voff_t uoffset,
1133 : vsize_t align, unsigned int flags)
1134 : {
1135 0 : struct vm_map_entry *first, *last, *entry, *new;
1136 0 : struct uvm_map_deadq dead;
1137 : vm_prot_t prot;
1138 : vm_prot_t maxprot;
1139 : vm_inherit_t inherit;
1140 : int advice;
1141 : int error;
1142 : vaddr_t pmap_align, pmap_offset;
1143 : vaddr_t hint;
1144 :
1145 0 : if ((map->flags & VM_MAP_INTRSAFE) == 0)
1146 0 : splassert(IPL_NONE);
1147 : else
1148 0 : splassert(IPL_VM);
1149 :
1150 : /*
1151 : * We use pmap_align and pmap_offset as alignment and offset variables.
1152 : *
1153 : * Because the align parameter takes precedence over pmap prefer,
1154 : * the pmap_align will need to be set to align, with pmap_offset = 0,
1155 : * if pmap_prefer will not align.
1156 : */
1157 0 : if (uoffset == UVM_UNKNOWN_OFFSET) {
1158 0 : pmap_align = MAX(align, PAGE_SIZE);
1159 : pmap_offset = 0;
1160 0 : } else {
1161 : pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE);
1162 : pmap_offset = PMAP_PREFER_OFFSET(uoffset);
1163 :
1164 0 : if (align == 0 ||
1165 0 : (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) {
1166 : /* pmap_offset satisfies align, no change. */
1167 : } else {
1168 : /* Align takes precedence over pmap prefer. */
1169 : pmap_align = align;
1170 : pmap_offset = 0;
1171 : }
1172 : }
1173 :
1174 : /* Decode parameters. */
1175 0 : prot = UVM_PROTECTION(flags);
1176 0 : maxprot = UVM_MAXPROTECTION(flags);
1177 0 : advice = UVM_ADVICE(flags);
1178 0 : inherit = UVM_INHERIT(flags);
1179 : error = 0;
1180 0 : hint = trunc_page(*addr);
1181 0 : TAILQ_INIT(&dead);
1182 0 : KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
1183 0 : KASSERT((align & (align - 1)) == 0);
1184 :
1185 : /* Holes are incompatible with other types of mappings. */
1186 0 : if (flags & UVM_FLAG_HOLE) {
1187 0 : KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) &&
1188 : (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0);
1189 : }
1190 :
1191 : /* Unset hint for kernel_map non-fixed allocations. */
1192 0 : if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED))
1193 0 : hint = 0;
1194 :
1195 : /* Check protection. */
1196 0 : if ((prot & maxprot) != prot)
1197 0 : return EACCES;
1198 :
1199 0 : if (map == kernel_map &&
1200 0 : (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
1201 0 : panic("uvm_map: kernel map W^X violation requested");
1202 :
1203 : /*
1204 : * Before grabbing the lock, allocate a map entry for later
1205 : * use to ensure we don't wait for memory while holding the
1206 : * vm_map_lock.
1207 : */
1208 0 : new = uvm_mapent_alloc(map, flags);
1209 0 : if (new == NULL)
1210 0 : return(ENOMEM);
1211 :
1212 0 : if (flags & UVM_FLAG_TRYLOCK) {
1213 0 : if (vm_map_lock_try(map) == FALSE) {
1214 : error = EFAULT;
1215 0 : goto out;
1216 : }
1217 : } else {
1218 0 : vm_map_lock(map);
1219 : }
1220 :
1221 0 : first = last = NULL;
1222 0 : if (flags & UVM_FLAG_FIXED) {
1223 : /*
1224 : * Fixed location.
1225 : *
1226 : * Note: we ignore align, pmap_prefer.
1227 : * Fill in first, last and *addr.
1228 : */
1229 0 : KASSERT((*addr & PAGE_MASK) == 0);
1230 :
1231 : /*
1232 : * Grow pmap to include allocated address.
1233 : * If the growth fails, the allocation will fail too.
1234 : */
1235 0 : if ((map->flags & VM_MAP_ISVMSPACE) == 0 &&
1236 0 : uvm_maxkaddr < (*addr + sz)) {
1237 0 : uvm_map_kmem_grow(map, &dead,
1238 0 : *addr + sz - uvm_maxkaddr, flags);
1239 0 : }
1240 :
1241 : /* Check that the space is available. */
1242 0 : if (flags & UVM_FLAG_UNMAP)
1243 0 : uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE);
1244 0 : if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1245 : error = ENOMEM;
1246 0 : goto unlock;
1247 : }
1248 0 : } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
1249 0 : (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE &&
1250 0 : (align == 0 || (*addr & (align - 1)) == 0) &&
1251 0 : uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1252 : /*
1253 : * Address used as hint.
1254 : *
1255 : * Note: we enforce the alignment restriction,
1256 : * but ignore pmap_prefer.
1257 : */
1258 0 : } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
1259 : /* Run selection algorithm for executables. */
1260 0 : error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
1261 : addr, sz, pmap_align, pmap_offset, prot, hint);
1262 :
1263 : /* Grow kernel memory and try again. */
1264 0 : if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1265 0 : uvm_map_kmem_grow(map, &dead, sz, flags);
1266 :
1267 0 : error = uvm_addr_invoke(map, map->uaddr_exe,
1268 : &first, &last, addr, sz,
1269 : pmap_align, pmap_offset, prot, hint);
1270 0 : }
1271 :
1272 0 : if (error != 0)
1273 : goto unlock;
1274 : } else {
1275 : /* Update freelists from vmspace. */
1276 0 : if (map->flags & VM_MAP_ISVMSPACE)
1277 0 : uvm_map_vmspace_update(map, &dead, flags);
1278 :
1279 0 : error = uvm_map_findspace(map, &first, &last, addr, sz,
1280 : pmap_align, pmap_offset, prot, hint);
1281 :
1282 : /* Grow kernel memory and try again. */
1283 0 : if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1284 0 : uvm_map_kmem_grow(map, &dead, sz, flags);
1285 :
1286 0 : error = uvm_map_findspace(map, &first, &last, addr, sz,
1287 : pmap_align, pmap_offset, prot, hint);
1288 0 : }
1289 :
1290 0 : if (error != 0)
1291 : goto unlock;
1292 : }
1293 :
1294 : /* Double-check if selected address doesn't cause overflow. */
1295 0 : if (*addr + sz < *addr) {
1296 : error = ENOMEM;
1297 0 : goto unlock;
1298 : }
1299 :
1300 0 : KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE ||
1301 : uvm_maxkaddr >= *addr + sz);
1302 :
1303 : /* If we only want a query, return now. */
1304 0 : if (flags & UVM_FLAG_QUERY) {
1305 : error = 0;
1306 0 : goto unlock;
1307 : }
1308 :
1309 0 : if (uobj == NULL)
1310 0 : uoffset = 0;
1311 0 : else if (uoffset == UVM_UNKNOWN_OFFSET) {
1312 0 : KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
1313 0 : uoffset = *addr - vm_map_min(kernel_map);
1314 0 : }
1315 :
1316 : /*
1317 : * Create new entry.
1318 : * first and last may be invalidated after this call.
1319 : */
1320 0 : entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
1321 : new);
1322 0 : if (entry == NULL) {
1323 : error = ENOMEM;
1324 0 : goto unlock;
1325 : }
1326 : new = NULL;
1327 : KDASSERT(entry->start == *addr && entry->end == *addr + sz);
1328 0 : entry->object.uvm_obj = uobj;
1329 0 : entry->offset = uoffset;
1330 0 : entry->protection = prot;
1331 0 : entry->max_protection = maxprot;
1332 0 : entry->inheritance = inherit;
1333 0 : entry->wired_count = 0;
1334 0 : entry->advice = advice;
1335 0 : if (flags & UVM_FLAG_STACK) {
1336 0 : entry->etype |= UVM_ET_STACK;
1337 0 : if (flags & UVM_FLAG_UNMAP)
1338 0 : map->serial++;
1339 : }
1340 0 : if (uobj)
1341 0 : entry->etype |= UVM_ET_OBJ;
1342 0 : else if (flags & UVM_FLAG_HOLE)
1343 0 : entry->etype |= UVM_ET_HOLE;
1344 0 : if (flags & UVM_FLAG_NOFAULT)
1345 0 : entry->etype |= UVM_ET_NOFAULT;
1346 0 : if (flags & UVM_FLAG_COPYONW) {
1347 0 : entry->etype |= UVM_ET_COPYONWRITE;
1348 0 : if ((flags & UVM_FLAG_OVERLAY) == 0)
1349 0 : entry->etype |= UVM_ET_NEEDSCOPY;
1350 : }
1351 0 : if (flags & UVM_FLAG_OVERLAY) {
1352 0 : entry->aref.ar_pageoff = 0;
1353 0 : entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
1354 0 : }
1355 :
1356 : /* Update map and process statistics. */
1357 0 : if (!(flags & UVM_FLAG_HOLE)) {
1358 0 : map->size += sz;
1359 0 : if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL) {
1360 0 : ((struct vmspace *)map)->vm_dused +=
1361 0 : uvmspace_dused(map, *addr, *addr + sz);
1362 0 : }
1363 : }
1364 :
1365 : /*
1366 : * Try to merge entry.
1367 : *
1368 : * Userland allocations are kept separated most of the time.
1369 : * Forego the effort of merging what most of the time can't be merged
1370 : * and only try the merge if it concerns a kernel entry.
1371 : */
1372 0 : if ((flags & UVM_FLAG_NOMERGE) == 0 &&
1373 0 : (map->flags & VM_MAP_ISVMSPACE) == 0)
1374 0 : uvm_mapent_tryjoin(map, entry, &dead);
1375 :
1376 : unlock:
1377 0 : vm_map_unlock(map);
1378 :
1379 : /*
1380 : * Remove dead entries.
1381 : *
1382 : * Dead entries may be the result of merging.
1383 : * uvm_map_mkentry may also create dead entries, when it attempts to
1384 : * destroy free-space entries.
1385 : */
1386 0 : if (map->flags & VM_MAP_INTRSAFE)
1387 0 : uvm_unmap_detach_intrsafe(&dead);
1388 : else
1389 0 : uvm_unmap_detach(&dead, 0);
1390 : out:
1391 0 : if (new)
1392 0 : uvm_mapent_free(new);
1393 0 : return error;
1394 0 : }
1395 :
1396 : /*
1397 : * True iff e1 and e2 can be joined together.
1398 : */
1399 : int
1400 0 : uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1,
1401 : struct vm_map_entry *e2)
1402 : {
1403 : KDASSERT(e1 != NULL && e2 != NULL);
1404 :
1405 : /* Must be the same entry type and not have free memory between. */
1406 0 : if (e1->etype != e2->etype || e1->end != e2->start)
1407 0 : return 0;
1408 :
1409 : /* Submaps are never joined. */
1410 0 : if (UVM_ET_ISSUBMAP(e1))
1411 0 : return 0;
1412 :
1413 : /* Never merge wired memory. */
1414 0 : if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2))
1415 0 : return 0;
1416 :
1417 : /* Protection, inheritance and advice must be equal. */
1418 0 : if (e1->protection != e2->protection ||
1419 0 : e1->max_protection != e2->max_protection ||
1420 0 : e1->inheritance != e2->inheritance ||
1421 0 : e1->advice != e2->advice)
1422 0 : return 0;
1423 :
1424 : /* If uvm_object: object itself and offsets within object must match. */
1425 0 : if (UVM_ET_ISOBJ(e1)) {
1426 0 : if (e1->object.uvm_obj != e2->object.uvm_obj)
1427 0 : return 0;
1428 0 : if (e1->offset + (e1->end - e1->start) != e2->offset)
1429 0 : return 0;
1430 : }
1431 :
1432 : /*
1433 : * Cannot join shared amaps.
1434 : * Note: no need to lock amap to look at refs, since we don't care
1435 : * about its exact value.
1436 : * If it is 1 (i.e. we have the only reference) it will stay there.
1437 : */
1438 0 : if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1)
1439 0 : return 0;
1440 0 : if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1)
1441 0 : return 0;
1442 :
1443 : /* Apprently, e1 and e2 match. */
1444 0 : return 1;
1445 0 : }
1446 :
1447 : /*
1448 : * Join support function.
1449 : *
1450 : * Returns the merged entry on succes.
1451 : * Returns NULL if the merge failed.
1452 : */
1453 : struct vm_map_entry*
1454 0 : uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1,
1455 : struct vm_map_entry *e2, struct uvm_map_deadq *dead)
1456 : {
1457 : struct uvm_addr_state *free;
1458 :
1459 : /*
1460 : * Merging is not supported for map entries that
1461 : * contain an amap in e1. This should never happen
1462 : * anyway, because only kernel entries are merged.
1463 : * These do not contain amaps.
1464 : * e2 contains no real information in its amap,
1465 : * so it can be erased immediately.
1466 : */
1467 0 : KASSERT(e1->aref.ar_amap == NULL);
1468 :
1469 : /*
1470 : * Don't drop obj reference:
1471 : * uvm_unmap_detach will do this for us.
1472 : */
1473 0 : free = uvm_map_uaddr_e(map, e1);
1474 0 : uvm_mapent_free_remove(map, free, e1);
1475 :
1476 0 : free = uvm_map_uaddr_e(map, e2);
1477 0 : uvm_mapent_free_remove(map, free, e2);
1478 0 : uvm_mapent_addr_remove(map, e2);
1479 0 : e1->end = e2->end;
1480 0 : e1->guard = e2->guard;
1481 0 : e1->fspace = e2->fspace;
1482 0 : uvm_mapent_free_insert(map, free, e1);
1483 :
1484 0 : DEAD_ENTRY_PUSH(dead, e2);
1485 0 : return e1;
1486 : }
1487 :
1488 : /*
1489 : * Attempt forward and backward joining of entry.
1490 : *
1491 : * Returns entry after joins.
1492 : * We are guaranteed that the amap of entry is either non-existant or
1493 : * has never been used.
1494 : */
1495 : struct vm_map_entry*
1496 0 : uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry,
1497 : struct uvm_map_deadq *dead)
1498 : {
1499 : struct vm_map_entry *other;
1500 : struct vm_map_entry *merged;
1501 :
1502 : /* Merge with previous entry. */
1503 0 : other = RBT_PREV(uvm_map_addr, entry);
1504 0 : if (other && uvm_mapent_isjoinable(map, other, entry)) {
1505 0 : merged = uvm_mapent_merge(map, other, entry, dead);
1506 0 : if (merged)
1507 0 : entry = merged;
1508 : }
1509 :
1510 : /*
1511 : * Merge with next entry.
1512 : *
1513 : * Because amap can only extend forward and the next entry
1514 : * probably contains sensible info, only perform forward merging
1515 : * in the absence of an amap.
1516 : */
1517 0 : other = RBT_NEXT(uvm_map_addr, entry);
1518 0 : if (other && entry->aref.ar_amap == NULL &&
1519 0 : other->aref.ar_amap == NULL &&
1520 0 : uvm_mapent_isjoinable(map, entry, other)) {
1521 0 : merged = uvm_mapent_merge(map, entry, other, dead);
1522 0 : if (merged)
1523 0 : entry = merged;
1524 : }
1525 :
1526 0 : return entry;
1527 : }
1528 :
1529 : /*
1530 : * Kill entries that are no longer in a map.
1531 : */
1532 : void
1533 0 : uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags)
1534 : {
1535 : struct vm_map_entry *entry;
1536 0 : int waitok = flags & UVM_PLA_WAITOK;
1537 :
1538 119 : if (TAILQ_EMPTY(deadq))
1539 0 : return;
1540 :
1541 0 : KERNEL_LOCK();
1542 31 : while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1543 58 : if (waitok)
1544 0 : uvm_pause();
1545 : /* Drop reference to amap, if we've got one. */
1546 0 : if (entry->aref.ar_amap)
1547 0 : amap_unref(entry->aref.ar_amap,
1548 58 : entry->aref.ar_pageoff,
1549 0 : atop(entry->end - entry->start),
1550 0 : flags & AMAP_REFALL);
1551 :
1552 : /* Drop reference to our backing object, if we've got one. */
1553 0 : if (UVM_ET_ISSUBMAP(entry)) {
1554 : /* ... unlikely to happen, but play it safe */
1555 0 : uvm_map_deallocate(entry->object.sub_map);
1556 0 : } else if (UVM_ET_ISOBJ(entry) &&
1557 0 : entry->object.uvm_obj->pgops->pgo_detach) {
1558 54 : entry->object.uvm_obj->pgops->pgo_detach(
1559 : entry->object.uvm_obj);
1560 0 : }
1561 :
1562 : /* Step to next. */
1563 0 : TAILQ_REMOVE(deadq, entry, dfree.deadq);
1564 0 : uvm_mapent_free(entry);
1565 : }
1566 0 : KERNEL_UNLOCK();
1567 60 : }
1568 :
1569 : void
1570 0 : uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq)
1571 : {
1572 : struct vm_map_entry *entry;
1573 :
1574 0 : while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1575 0 : KASSERT(entry->aref.ar_amap == NULL);
1576 0 : KASSERT(!UVM_ET_ISSUBMAP(entry));
1577 0 : KASSERT(!UVM_ET_ISOBJ(entry));
1578 0 : TAILQ_REMOVE(deadq, entry, dfree.deadq);
1579 0 : uvm_mapent_free(entry);
1580 : }
1581 0 : }
1582 :
1583 : /*
1584 : * Create and insert new entry.
1585 : *
1586 : * Returned entry contains new addresses and is inserted properly in the tree.
1587 : * first and last are (probably) no longer valid.
1588 : */
1589 : struct vm_map_entry*
1590 0 : uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first,
1591 : struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags,
1592 : struct uvm_map_deadq *dead, struct vm_map_entry *new)
1593 : {
1594 : struct vm_map_entry *entry, *prev;
1595 : struct uvm_addr_state *free;
1596 : vaddr_t min, max; /* free space boundaries for new entry */
1597 :
1598 : KDASSERT(map != NULL);
1599 : KDASSERT(first != NULL);
1600 : KDASSERT(last != NULL);
1601 : KDASSERT(dead != NULL);
1602 : KDASSERT(sz > 0);
1603 : KDASSERT(addr + sz > addr);
1604 : KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr);
1605 : KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz);
1606 : KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz));
1607 : uvm_tree_sanity(map, __FILE__, __LINE__);
1608 :
1609 0 : min = addr + sz;
1610 60 : max = VMMAP_FREE_END(last);
1611 :
1612 : /* Initialize new entry. */
1613 0 : if (new == NULL)
1614 0 : entry = uvm_mapent_alloc(map, flags);
1615 : else
1616 : entry = new;
1617 0 : if (entry == NULL)
1618 0 : return NULL;
1619 0 : entry->offset = 0;
1620 0 : entry->etype = 0;
1621 0 : entry->wired_count = 0;
1622 0 : entry->aref.ar_pageoff = 0;
1623 0 : entry->aref.ar_amap = NULL;
1624 :
1625 0 : entry->start = addr;
1626 0 : entry->end = min;
1627 0 : entry->guard = 0;
1628 0 : entry->fspace = 0;
1629 :
1630 : /* Reset free space in first. */
1631 0 : free = uvm_map_uaddr_e(map, first);
1632 0 : uvm_mapent_free_remove(map, free, first);
1633 0 : first->guard = 0;
1634 0 : first->fspace = 0;
1635 :
1636 : /*
1637 : * Remove all entries that are fully replaced.
1638 : * We are iterating using last in reverse order.
1639 : */
1640 60 : for (; first != last; last = prev) {
1641 0 : prev = RBT_PREV(uvm_map_addr, last);
1642 :
1643 : KDASSERT(last->start == last->end);
1644 0 : free = uvm_map_uaddr_e(map, last);
1645 0 : uvm_mapent_free_remove(map, free, last);
1646 0 : uvm_mapent_addr_remove(map, last);
1647 0 : DEAD_ENTRY_PUSH(dead, last);
1648 : }
1649 : /* Remove first if it is entirely inside <addr, addr+sz>. */
1650 0 : if (first->start == addr) {
1651 0 : uvm_mapent_addr_remove(map, first);
1652 0 : DEAD_ENTRY_PUSH(dead, first);
1653 0 : } else {
1654 60 : uvm_map_fix_space(map, first, VMMAP_FREE_START(first),
1655 : addr, flags);
1656 : }
1657 :
1658 : /* Finally, link in entry. */
1659 0 : uvm_mapent_addr_insert(map, entry);
1660 0 : uvm_map_fix_space(map, entry, min, max, flags);
1661 :
1662 : uvm_tree_sanity(map, __FILE__, __LINE__);
1663 0 : return entry;
1664 0 : }
1665 :
1666 :
1667 : /*
1668 : * uvm_mapent_alloc: allocate a map entry
1669 : */
1670 : struct vm_map_entry *
1671 0 : uvm_mapent_alloc(struct vm_map *map, int flags)
1672 : {
1673 : struct vm_map_entry *me, *ne;
1674 : int pool_flags;
1675 : int i;
1676 :
1677 : pool_flags = PR_WAITOK;
1678 0 : if (flags & UVM_FLAG_TRYLOCK)
1679 : pool_flags = PR_NOWAIT;
1680 :
1681 60 : if (map->flags & VM_MAP_INTRSAFE || cold) {
1682 0 : mtx_enter(&uvm_kmapent_mtx);
1683 0 : if (SLIST_EMPTY(&uvm.kentry_free)) {
1684 0 : ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty,
1685 : &kd_nowait);
1686 0 : if (ne == NULL)
1687 0 : panic("uvm_mapent_alloc: cannot allocate map "
1688 : "entry");
1689 0 : for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) {
1690 0 : SLIST_INSERT_HEAD(&uvm.kentry_free,
1691 : &ne[i], daddrs.addr_kentry);
1692 : }
1693 0 : if (ratecheck(&uvm_kmapent_last_warn_time,
1694 : &uvm_kmapent_warn_rate))
1695 0 : printf("uvm_mapent_alloc: out of static "
1696 : "map entries\n");
1697 : }
1698 0 : me = SLIST_FIRST(&uvm.kentry_free);
1699 0 : SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry);
1700 0 : uvmexp.kmapent++;
1701 0 : mtx_leave(&uvm_kmapent_mtx);
1702 0 : me->flags = UVM_MAP_STATIC;
1703 0 : } else if (map == kernel_map) {
1704 60 : splassert(IPL_NONE);
1705 0 : me = pool_get(&uvm_map_entry_kmem_pool, pool_flags);
1706 60 : if (me == NULL)
1707 : goto out;
1708 0 : me->flags = UVM_MAP_KMEM;
1709 0 : } else {
1710 0 : splassert(IPL_NONE);
1711 0 : me = pool_get(&uvm_map_entry_pool, pool_flags);
1712 0 : if (me == NULL)
1713 : goto out;
1714 0 : me->flags = 0;
1715 : }
1716 :
1717 0 : if (me != NULL) {
1718 0 : RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF);
1719 0 : }
1720 :
1721 : out:
1722 0 : return(me);
1723 : }
1724 :
1725 : /*
1726 : * uvm_mapent_free: free map entry
1727 : *
1728 : * => XXX: static pool for kernel map?
1729 : */
1730 : void
1731 0 : uvm_mapent_free(struct vm_map_entry *me)
1732 : {
1733 53 : if (me->flags & UVM_MAP_STATIC) {
1734 0 : mtx_enter(&uvm_kmapent_mtx);
1735 0 : SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry);
1736 0 : uvmexp.kmapent--;
1737 0 : mtx_leave(&uvm_kmapent_mtx);
1738 0 : } else if (me->flags & UVM_MAP_KMEM) {
1739 53 : splassert(IPL_NONE);
1740 0 : pool_put(&uvm_map_entry_kmem_pool, me);
1741 0 : } else {
1742 0 : splassert(IPL_NONE);
1743 0 : pool_put(&uvm_map_entry_pool, me);
1744 : }
1745 0 : }
1746 :
1747 : /*
1748 : * uvm_map_lookup_entry: find map entry at or before an address.
1749 : *
1750 : * => map must at least be read-locked by caller
1751 : * => entry is returned in "entry"
1752 : * => return value is true if address is in the returned entry
1753 : * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is
1754 : * returned for those mappings.
1755 : */
1756 : boolean_t
1757 0 : uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
1758 : struct vm_map_entry **entry)
1759 : {
1760 0 : *entry = uvm_map_entrybyaddr(&map->addr, address);
1761 0 : return *entry != NULL && !UVM_ET_ISHOLE(*entry) &&
1762 63 : (*entry)->start <= address && (*entry)->end > address;
1763 : }
1764 :
1765 : /*
1766 : * Inside a vm_map find the sp address and verify MAP_STACK, and also
1767 : * remember low and high regions of that of region which is marked
1768 : * with MAP_STACK. Return TRUE.
1769 : * If sp isn't in a MAP_STACK region return FALSE.
1770 : */
1771 : boolean_t
1772 0 : uvm_map_check_stack_range(struct proc *p, vaddr_t sp)
1773 : {
1774 0 : vm_map_t map = &p->p_vmspace->vm_map;
1775 0 : vm_map_entry_t entry;
1776 :
1777 0 : if (sp < map->min_offset || sp >= map->max_offset)
1778 0 : return(FALSE);
1779 :
1780 : /* lock map */
1781 0 : vm_map_lock_read(map);
1782 :
1783 : /* lookup */
1784 0 : if (!uvm_map_lookup_entry(map, trunc_page(sp), &entry)) {
1785 0 : vm_map_unlock_read(map);
1786 0 : return(FALSE);
1787 : }
1788 :
1789 0 : if ((entry->etype & UVM_ET_STACK) == 0) {
1790 0 : vm_map_unlock_read(map);
1791 0 : return (FALSE);
1792 : }
1793 0 : p->p_spstart = entry->start;
1794 0 : p->p_spend = entry->end;
1795 0 : p->p_spserial = map->serial;
1796 0 : vm_map_unlock_read(map);
1797 0 : return(TRUE);
1798 0 : }
1799 :
1800 : /*
1801 : * Check whether the given address range can be converted to a MAP_STACK
1802 : * mapping.
1803 : *
1804 : * Must be called with map locked.
1805 : */
1806 : boolean_t
1807 0 : uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz)
1808 : {
1809 0 : vaddr_t end = addr + sz;
1810 0 : struct vm_map_entry *first, *iter, *prev = NULL;
1811 :
1812 0 : if (!uvm_map_lookup_entry(map, addr, &first)) {
1813 0 : printf("map stack 0x%lx-0x%lx of map %p failed: no mapping\n",
1814 : addr, end, map);
1815 0 : return FALSE;
1816 : }
1817 :
1818 : /*
1819 : * Check that the address range exists and is contiguous.
1820 : */
1821 0 : for (iter = first; iter != NULL && iter->start < end;
1822 0 : prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) {
1823 : /*
1824 : * Make sure that we do not have holes in the range.
1825 : */
1826 : #if 0
1827 : if (prev != NULL) {
1828 : printf("prev->start 0x%lx, prev->end 0x%lx, "
1829 : "iter->start 0x%lx, iter->end 0x%lx\n",
1830 : prev->start, prev->end, iter->start, iter->end);
1831 : }
1832 : #endif
1833 :
1834 0 : if (prev != NULL && prev->end != iter->start) {
1835 0 : printf("map stack 0x%lx-0x%lx of map %p failed: "
1836 : "hole in range\n", addr, end, map);
1837 0 : return FALSE;
1838 : }
1839 0 : if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) {
1840 0 : printf("map stack 0x%lx-0x%lx of map %p failed: "
1841 : "hole in range\n", addr, end, map);
1842 0 : return FALSE;
1843 : }
1844 : }
1845 :
1846 0 : return TRUE;
1847 0 : }
1848 :
1849 : /*
1850 : * Remap the middle-pages of an existing mapping as a stack range.
1851 : * If there exists a previous contiguous mapping with the given range
1852 : * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the
1853 : * mapping is dropped, and a new anon mapping is created and marked as
1854 : * a stack.
1855 : *
1856 : * Must be called with map unlocked.
1857 : */
1858 : int
1859 0 : uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz)
1860 : {
1861 0 : vm_map_t map = &p->p_vmspace->vm_map;
1862 0 : vaddr_t start, end;
1863 : int error;
1864 : int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE,
1865 : PROT_READ | PROT_WRITE | PROT_EXEC,
1866 : MAP_INHERIT_COPY, MADV_NORMAL,
1867 : UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP |
1868 : UVM_FLAG_COPYONW);
1869 :
1870 0 : start = round_page(addr);
1871 0 : end = trunc_page(addr + sz);
1872 : #ifdef MACHINE_STACK_GROWS_UP
1873 : if (end == addr + sz)
1874 : end -= PAGE_SIZE;
1875 : #else
1876 0 : if (start == addr)
1877 0 : start += PAGE_SIZE;
1878 : #endif
1879 :
1880 0 : if (start < map->min_offset || end >= map->max_offset || end < start)
1881 0 : return EINVAL;
1882 :
1883 0 : error = uvm_mapanon(map, &start, end - start, 0, flags);
1884 0 : if (error != 0)
1885 0 : printf("map stack for pid %d failed\n", p->p_p->ps_pid);
1886 :
1887 0 : return error;
1888 0 : }
1889 :
1890 : /*
1891 : * uvm_map_pie: return a random load address for a PIE executable
1892 : * properly aligned.
1893 : */
1894 : #ifndef VM_PIE_MAX_ADDR
1895 : #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4)
1896 : #endif
1897 :
1898 : #ifndef VM_PIE_MIN_ADDR
1899 : #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS
1900 : #endif
1901 :
1902 : #ifndef VM_PIE_MIN_ALIGN
1903 : #define VM_PIE_MIN_ALIGN PAGE_SIZE
1904 : #endif
1905 :
1906 : vaddr_t
1907 0 : uvm_map_pie(vaddr_t align)
1908 : {
1909 : vaddr_t addr, space, min;
1910 :
1911 0 : align = MAX(align, VM_PIE_MIN_ALIGN);
1912 :
1913 : /* round up to next alignment */
1914 0 : min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1);
1915 :
1916 0 : if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR)
1917 0 : return (align);
1918 :
1919 0 : space = (VM_PIE_MAX_ADDR - min) / align;
1920 0 : space = MIN(space, (u_int32_t)-1);
1921 :
1922 0 : addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align;
1923 0 : addr += min;
1924 :
1925 0 : return (addr);
1926 0 : }
1927 :
1928 : void
1929 0 : uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end)
1930 : {
1931 0 : struct uvm_map_deadq dead;
1932 :
1933 0 : KASSERT((start & (vaddr_t)PAGE_MASK) == 0 &&
1934 : (end & (vaddr_t)PAGE_MASK) == 0);
1935 0 : TAILQ_INIT(&dead);
1936 0 : vm_map_lock(map);
1937 0 : uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE);
1938 0 : vm_map_unlock(map);
1939 :
1940 0 : if (map->flags & VM_MAP_INTRSAFE)
1941 0 : uvm_unmap_detach_intrsafe(&dead);
1942 : else
1943 0 : uvm_unmap_detach(&dead, 0);
1944 0 : }
1945 :
1946 : /*
1947 : * Mark entry as free.
1948 : *
1949 : * entry will be put on the dead list.
1950 : * The free space will be merged into the previous or a new entry,
1951 : * unless markfree is false.
1952 : */
1953 : void
1954 0 : uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry,
1955 : struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead,
1956 : boolean_t markfree)
1957 : {
1958 : struct uvm_addr_state *free;
1959 : struct vm_map_entry *prev;
1960 : vaddr_t addr; /* Start of freed range. */
1961 : vaddr_t end; /* End of freed range. */
1962 :
1963 60 : prev = *prev_ptr;
1964 0 : if (prev == entry)
1965 0 : *prev_ptr = prev = NULL;
1966 :
1967 60 : if (prev == NULL ||
1968 0 : VMMAP_FREE_END(prev) != entry->start)
1969 0 : prev = RBT_PREV(uvm_map_addr, entry);
1970 :
1971 : /* Entry is describing only free memory and has nothing to drain into. */
1972 0 : if (prev == NULL && entry->start == entry->end && markfree) {
1973 0 : *prev_ptr = entry;
1974 0 : return;
1975 : }
1976 :
1977 60 : addr = entry->start;
1978 0 : end = VMMAP_FREE_END(entry);
1979 0 : free = uvm_map_uaddr_e(map, entry);
1980 0 : uvm_mapent_free_remove(map, free, entry);
1981 0 : uvm_mapent_addr_remove(map, entry);
1982 0 : DEAD_ENTRY_PUSH(dead, entry);
1983 :
1984 0 : if (markfree) {
1985 0 : if (prev) {
1986 0 : free = uvm_map_uaddr_e(map, prev);
1987 0 : uvm_mapent_free_remove(map, free, prev);
1988 0 : }
1989 0 : *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0);
1990 0 : }
1991 0 : }
1992 :
1993 : /*
1994 : * Unwire and release referenced amap and object from map entry.
1995 : */
1996 : void
1997 0 : uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry)
1998 : {
1999 : /* Unwire removed map entry. */
2000 60 : if (VM_MAPENT_ISWIRED(entry)) {
2001 0 : KERNEL_LOCK();
2002 0 : entry->wired_count = 0;
2003 0 : uvm_fault_unwire_locked(map, entry->start, entry->end);
2004 0 : KERNEL_UNLOCK();
2005 0 : }
2006 :
2007 : /* Entry-type specific code. */
2008 60 : if (UVM_ET_ISHOLE(entry)) {
2009 : /* Nothing to be done for holes. */
2010 0 : } else if (map->flags & VM_MAP_INTRSAFE) {
2011 0 : KASSERT(vm_map_pmap(map) == pmap_kernel());
2012 0 : uvm_km_pgremove_intrsafe(entry->start, entry->end);
2013 0 : pmap_kremove(entry->start, entry->end - entry->start);
2014 0 : } else if (UVM_ET_ISOBJ(entry) &&
2015 0 : UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
2016 0 : KASSERT(vm_map_pmap(map) == pmap_kernel());
2017 : /*
2018 : * Note: kernel object mappings are currently used in
2019 : * two ways:
2020 : * [1] "normal" mappings of pages in the kernel object
2021 : * [2] uvm_km_valloc'd allocations in which we
2022 : * pmap_enter in some non-kernel-object page
2023 : * (e.g. vmapbuf).
2024 : *
2025 : * for case [1], we need to remove the mapping from
2026 : * the pmap and then remove the page from the kernel
2027 : * object (because, once pages in a kernel object are
2028 : * unmapped they are no longer needed, unlike, say,
2029 : * a vnode where you might want the data to persist
2030 : * until flushed out of a queue).
2031 : *
2032 : * for case [2], we need to remove the mapping from
2033 : * the pmap. there shouldn't be any pages at the
2034 : * specified offset in the kernel object [but it
2035 : * doesn't hurt to call uvm_km_pgremove just to be
2036 : * safe?]
2037 : *
2038 : * uvm_km_pgremove currently does the following:
2039 : * for pages in the kernel object range:
2040 : * - drops the swap slot
2041 : * - uvm_pagefree the page
2042 : *
2043 : * note there is version of uvm_km_pgremove() that
2044 : * is used for "intrsafe" objects.
2045 : */
2046 : /*
2047 : * remove mappings from pmap and drop the pages
2048 : * from the object. offsets are always relative
2049 : * to vm_map_min(kernel_map).
2050 : */
2051 0 : pmap_remove(pmap_kernel(), entry->start, entry->end);
2052 0 : uvm_km_pgremove(entry->object.uvm_obj,
2053 0 : entry->start - vm_map_min(kernel_map),
2054 0 : entry->end - vm_map_min(kernel_map));
2055 :
2056 : /*
2057 : * null out kernel_object reference, we've just
2058 : * dropped it
2059 : */
2060 0 : entry->etype &= ~UVM_ET_OBJ;
2061 0 : entry->object.uvm_obj = NULL; /* to be safe */
2062 0 : } else {
2063 : /* remove mappings the standard way. */
2064 60 : pmap_remove(map->pmap, entry->start, entry->end);
2065 : }
2066 0 : }
2067 :
2068 : /*
2069 : * Remove all entries from start to end.
2070 : *
2071 : * If remove_holes, then remove ET_HOLE entries as well.
2072 : * If markfree, entry will be properly marked free, otherwise, no replacement
2073 : * entry will be put in the tree (corrupting the tree).
2074 : */
2075 : void
2076 0 : uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
2077 : struct uvm_map_deadq *dead, boolean_t remove_holes,
2078 : boolean_t markfree)
2079 : {
2080 0 : struct vm_map_entry *prev_hint, *next, *entry;
2081 :
2082 60 : start = MAX(start, map->min_offset);
2083 0 : end = MIN(end, map->max_offset);
2084 0 : if (start >= end)
2085 0 : return;
2086 :
2087 0 : if ((map->flags & VM_MAP_INTRSAFE) == 0)
2088 60 : splassert(IPL_NONE);
2089 : else
2090 0 : splassert(IPL_VM);
2091 :
2092 : /* Find first affected entry. */
2093 0 : entry = uvm_map_entrybyaddr(&map->addr, start);
2094 : KDASSERT(entry != NULL && entry->start <= start);
2095 0 : if (entry->end <= start && markfree)
2096 0 : entry = RBT_NEXT(uvm_map_addr, entry);
2097 : else
2098 0 : UVM_MAP_CLIP_START(map, entry, start);
2099 :
2100 : /*
2101 : * Iterate entries until we reach end address.
2102 : * prev_hint hints where the freed space can be appended to.
2103 : */
2104 60 : prev_hint = NULL;
2105 120 : for (; entry != NULL && entry->start < end; entry = next) {
2106 : KDASSERT(entry->start >= start);
2107 0 : if (entry->end > end || !markfree)
2108 0 : UVM_MAP_CLIP_END(map, entry, end);
2109 : KDASSERT(entry->start >= start && entry->end <= end);
2110 0 : next = RBT_NEXT(uvm_map_addr, entry);
2111 :
2112 : /* Don't remove holes unless asked to do so. */
2113 0 : if (UVM_ET_ISHOLE(entry)) {
2114 0 : if (!remove_holes) {
2115 0 : prev_hint = entry;
2116 0 : continue;
2117 : }
2118 : }
2119 :
2120 : /* A stack has been removed.. */
2121 60 : if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE))
2122 0 : map->serial++;
2123 :
2124 : /* Kill entry. */
2125 0 : uvm_unmap_kill_entry(map, entry);
2126 :
2127 : /* Update space usage. */
2128 0 : if ((map->flags & VM_MAP_ISVMSPACE) &&
2129 0 : entry->object.uvm_obj == NULL &&
2130 0 : !UVM_ET_ISHOLE(entry)) {
2131 0 : ((struct vmspace *)map)->vm_dused -=
2132 0 : uvmspace_dused(map, entry->start, entry->end);
2133 0 : }
2134 60 : if (!UVM_ET_ISHOLE(entry))
2135 60 : map->size -= entry->end - entry->start;
2136 :
2137 : /* Actual removal of entry. */
2138 0 : uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree);
2139 0 : }
2140 :
2141 : pmap_update(vm_map_pmap(map));
2142 :
2143 : #ifdef VMMAP_DEBUG
2144 : if (markfree) {
2145 : for (entry = uvm_map_entrybyaddr(&map->addr, start);
2146 : entry != NULL && entry->start < end;
2147 : entry = RBT_NEXT(uvm_map_addr, entry)) {
2148 : KDASSERT(entry->end <= start ||
2149 : entry->start == entry->end ||
2150 : UVM_ET_ISHOLE(entry));
2151 : }
2152 : } else {
2153 : vaddr_t a;
2154 : for (a = start; a < end; a += PAGE_SIZE)
2155 : KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL);
2156 : }
2157 : #endif
2158 0 : }
2159 :
2160 : /*
2161 : * Mark all entries from first until end (exclusive) as pageable.
2162 : *
2163 : * Lock must be exclusive on entry and will not be touched.
2164 : */
2165 : void
2166 0 : uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first,
2167 : struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr)
2168 : {
2169 : struct vm_map_entry *iter;
2170 :
2171 0 : for (iter = first; iter != end;
2172 0 : iter = RBT_NEXT(uvm_map_addr, iter)) {
2173 : KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2174 0 : if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2175 : continue;
2176 :
2177 0 : iter->wired_count = 0;
2178 0 : uvm_fault_unwire_locked(map, iter->start, iter->end);
2179 0 : }
2180 0 : }
2181 :
2182 : /*
2183 : * Mark all entries from first until end (exclusive) as wired.
2184 : *
2185 : * Lockflags determines the lock state on return from this function.
2186 : * Lock must be exclusive on entry.
2187 : */
2188 : int
2189 0 : uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first,
2190 : struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr,
2191 : int lockflags)
2192 : {
2193 : struct vm_map_entry *iter;
2194 : #ifdef DIAGNOSTIC
2195 : unsigned int timestamp_save;
2196 : #endif
2197 : int error;
2198 :
2199 : /*
2200 : * Wire pages in two passes:
2201 : *
2202 : * 1: holding the write lock, we create any anonymous maps that need
2203 : * to be created. then we clip each map entry to the region to
2204 : * be wired and increment its wiring count.
2205 : *
2206 : * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
2207 : * in the pages for any newly wired area (wired_count == 1).
2208 : *
2209 : * downgrading to a read lock for uvm_fault_wire avoids a possible
2210 : * deadlock with another thread that may have faulted on one of
2211 : * the pages to be wired (it would mark the page busy, blocking
2212 : * us, then in turn block on the map lock that we hold).
2213 : * because we keep the read lock on the map, the copy-on-write
2214 : * status of the entries we modify here cannot change.
2215 : */
2216 0 : for (iter = first; iter != end;
2217 0 : iter = RBT_NEXT(uvm_map_addr, iter)) {
2218 : KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2219 0 : if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2220 0 : iter->protection == PROT_NONE)
2221 : continue;
2222 :
2223 : /*
2224 : * Perform actions of vm_map_lookup that need the write lock.
2225 : * - create an anonymous map for copy-on-write
2226 : * - anonymous map for zero-fill
2227 : * Skip submaps.
2228 : */
2229 0 : if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) &&
2230 0 : UVM_ET_ISNEEDSCOPY(iter) &&
2231 0 : ((iter->protection & PROT_WRITE) ||
2232 0 : iter->object.uvm_obj == NULL)) {
2233 0 : amap_copy(map, iter, M_WAITOK,
2234 0 : UVM_ET_ISSTACK(iter) ? FALSE : TRUE,
2235 0 : iter->start, iter->end);
2236 0 : }
2237 0 : iter->wired_count++;
2238 0 : }
2239 :
2240 : /*
2241 : * Pass 2.
2242 : */
2243 : #ifdef DIAGNOSTIC
2244 0 : timestamp_save = map->timestamp;
2245 : #endif
2246 0 : vm_map_busy(map);
2247 0 : vm_map_downgrade(map);
2248 :
2249 : error = 0;
2250 0 : for (iter = first; error == 0 && iter != end;
2251 0 : iter = RBT_NEXT(uvm_map_addr, iter)) {
2252 0 : if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2253 0 : iter->protection == PROT_NONE)
2254 : continue;
2255 :
2256 0 : error = uvm_fault_wire(map, iter->start, iter->end,
2257 : iter->protection);
2258 0 : }
2259 :
2260 0 : if (error) {
2261 : /*
2262 : * uvm_fault_wire failure
2263 : *
2264 : * Reacquire lock and undo our work.
2265 : */
2266 0 : vm_map_upgrade(map);
2267 0 : vm_map_unbusy(map);
2268 : #ifdef DIAGNOSTIC
2269 0 : if (timestamp_save != map->timestamp)
2270 0 : panic("uvm_map_pageable_wire: stale map");
2271 : #endif
2272 :
2273 : /*
2274 : * first is no longer needed to restart loops.
2275 : * Use it as iterator to unmap successful mappings.
2276 : */
2277 0 : for (; first != iter;
2278 0 : first = RBT_NEXT(uvm_map_addr, first)) {
2279 0 : if (UVM_ET_ISHOLE(first) ||
2280 0 : first->start == first->end ||
2281 0 : first->protection == PROT_NONE)
2282 : continue;
2283 :
2284 0 : first->wired_count--;
2285 0 : if (!VM_MAPENT_ISWIRED(first)) {
2286 0 : uvm_fault_unwire_locked(map,
2287 0 : iter->start, iter->end);
2288 0 : }
2289 : }
2290 :
2291 : /* decrease counter in the rest of the entries */
2292 0 : for (; iter != end;
2293 0 : iter = RBT_NEXT(uvm_map_addr, iter)) {
2294 0 : if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2295 0 : iter->protection == PROT_NONE)
2296 : continue;
2297 :
2298 0 : iter->wired_count--;
2299 0 : }
2300 :
2301 0 : if ((lockflags & UVM_LK_EXIT) == 0)
2302 0 : vm_map_unlock(map);
2303 0 : return error;
2304 : }
2305 :
2306 : /* We are currently holding a read lock. */
2307 0 : if ((lockflags & UVM_LK_EXIT) == 0) {
2308 0 : vm_map_unbusy(map);
2309 0 : vm_map_unlock_read(map);
2310 0 : } else {
2311 0 : vm_map_upgrade(map);
2312 0 : vm_map_unbusy(map);
2313 : #ifdef DIAGNOSTIC
2314 0 : if (timestamp_save != map->timestamp)
2315 0 : panic("uvm_map_pageable_wire: stale map");
2316 : #endif
2317 : }
2318 0 : return 0;
2319 0 : }
2320 :
2321 : /*
2322 : * uvm_map_pageable: set pageability of a range in a map.
2323 : *
2324 : * Flags:
2325 : * UVM_LK_ENTER: map is already locked by caller
2326 : * UVM_LK_EXIT: don't unlock map on exit
2327 : *
2328 : * The full range must be in use (entries may not have fspace != 0).
2329 : * UVM_ET_HOLE counts as unmapped.
2330 : */
2331 : int
2332 0 : uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
2333 : boolean_t new_pageable, int lockflags)
2334 : {
2335 : struct vm_map_entry *first, *last, *tmp;
2336 : int error;
2337 :
2338 0 : start = trunc_page(start);
2339 0 : end = round_page(end);
2340 :
2341 0 : if (start > end)
2342 0 : return EINVAL;
2343 0 : if (start == end)
2344 0 : return 0; /* nothing to do */
2345 0 : if (start < map->min_offset)
2346 0 : return EFAULT; /* why? see first XXX below */
2347 0 : if (end > map->max_offset)
2348 0 : return EINVAL; /* why? see second XXX below */
2349 :
2350 0 : KASSERT(map->flags & VM_MAP_PAGEABLE);
2351 0 : if ((lockflags & UVM_LK_ENTER) == 0)
2352 0 : vm_map_lock(map);
2353 :
2354 : /*
2355 : * Find first entry.
2356 : *
2357 : * Initial test on start is different, because of the different
2358 : * error returned. Rest is tested further down.
2359 : */
2360 0 : first = uvm_map_entrybyaddr(&map->addr, start);
2361 0 : if (first->end <= start || UVM_ET_ISHOLE(first)) {
2362 : /*
2363 : * XXX if the first address is not mapped, it is EFAULT?
2364 : */
2365 : error = EFAULT;
2366 0 : goto out;
2367 : }
2368 :
2369 : /* Check that the range has no holes. */
2370 0 : for (last = first; last != NULL && last->start < end;
2371 0 : last = RBT_NEXT(uvm_map_addr, last)) {
2372 0 : if (UVM_ET_ISHOLE(last) ||
2373 0 : (last->end < end && VMMAP_FREE_END(last) != last->end)) {
2374 : /*
2375 : * XXX unmapped memory in range, why is it EINVAL
2376 : * instead of EFAULT?
2377 : */
2378 : error = EINVAL;
2379 0 : goto out;
2380 : }
2381 : }
2382 :
2383 : /*
2384 : * Last ended at the first entry after the range.
2385 : * Move back one step.
2386 : *
2387 : * Note that last may be NULL.
2388 : */
2389 0 : if (last == NULL) {
2390 0 : last = RBT_MAX(uvm_map_addr, &map->addr);
2391 0 : if (last->end < end) {
2392 : error = EINVAL;
2393 0 : goto out;
2394 : }
2395 : } else {
2396 0 : KASSERT(last != first);
2397 0 : last = RBT_PREV(uvm_map_addr, last);
2398 : }
2399 :
2400 : /* Wire/unwire pages here. */
2401 0 : if (new_pageable) {
2402 : /*
2403 : * Mark pageable.
2404 : * entries that are not wired are untouched.
2405 : */
2406 0 : if (VM_MAPENT_ISWIRED(first))
2407 0 : UVM_MAP_CLIP_START(map, first, start);
2408 : /*
2409 : * Split last at end.
2410 : * Make tmp be the first entry after what is to be touched.
2411 : * If last is not wired, don't touch it.
2412 : */
2413 0 : if (VM_MAPENT_ISWIRED(last)) {
2414 0 : UVM_MAP_CLIP_END(map, last, end);
2415 0 : tmp = RBT_NEXT(uvm_map_addr, last);
2416 0 : } else
2417 : tmp = last;
2418 :
2419 0 : uvm_map_pageable_pgon(map, first, tmp, start, end);
2420 0 : error = 0;
2421 :
2422 : out:
2423 0 : if ((lockflags & UVM_LK_EXIT) == 0)
2424 0 : vm_map_unlock(map);
2425 0 : return error;
2426 : } else {
2427 : /*
2428 : * Mark entries wired.
2429 : * entries are always touched (because recovery needs this).
2430 : */
2431 0 : if (!VM_MAPENT_ISWIRED(first))
2432 0 : UVM_MAP_CLIP_START(map, first, start);
2433 : /*
2434 : * Split last at end.
2435 : * Make tmp be the first entry after what is to be touched.
2436 : * If last is not wired, don't touch it.
2437 : */
2438 0 : if (!VM_MAPENT_ISWIRED(last)) {
2439 0 : UVM_MAP_CLIP_END(map, last, end);
2440 0 : tmp = RBT_NEXT(uvm_map_addr, last);
2441 0 : } else
2442 : tmp = last;
2443 :
2444 0 : return uvm_map_pageable_wire(map, first, tmp, start, end,
2445 : lockflags);
2446 : }
2447 0 : }
2448 :
2449 : /*
2450 : * uvm_map_pageable_all: special case of uvm_map_pageable - affects
2451 : * all mapped regions.
2452 : *
2453 : * Map must not be locked.
2454 : * If no flags are specified, all ragions are unwired.
2455 : */
2456 : int
2457 0 : uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
2458 : {
2459 : vsize_t size;
2460 : struct vm_map_entry *iter;
2461 :
2462 0 : KASSERT(map->flags & VM_MAP_PAGEABLE);
2463 0 : vm_map_lock(map);
2464 :
2465 0 : if (flags == 0) {
2466 0 : uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr),
2467 0 : NULL, map->min_offset, map->max_offset);
2468 :
2469 0 : vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
2470 0 : vm_map_unlock(map);
2471 0 : return 0;
2472 : }
2473 :
2474 0 : if (flags & MCL_FUTURE)
2475 0 : vm_map_modflags(map, VM_MAP_WIREFUTURE, 0);
2476 0 : if (!(flags & MCL_CURRENT)) {
2477 0 : vm_map_unlock(map);
2478 0 : return 0;
2479 : }
2480 :
2481 : /*
2482 : * Count number of pages in all non-wired entries.
2483 : * If the number exceeds the limit, abort.
2484 : */
2485 : size = 0;
2486 0 : RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2487 0 : if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2488 : continue;
2489 :
2490 0 : size += iter->end - iter->start;
2491 0 : }
2492 :
2493 0 : if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
2494 0 : vm_map_unlock(map);
2495 0 : return ENOMEM;
2496 : }
2497 :
2498 : /* XXX non-pmap_wired_count case must be handled by caller */
2499 : #ifdef pmap_wired_count
2500 0 : if (limit != 0 &&
2501 0 : size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) {
2502 0 : vm_map_unlock(map);
2503 0 : return ENOMEM;
2504 : }
2505 : #endif
2506 :
2507 : /*
2508 : * uvm_map_pageable_wire will release lcok
2509 : */
2510 0 : return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr),
2511 0 : NULL, map->min_offset, map->max_offset, 0);
2512 0 : }
2513 :
2514 : /*
2515 : * Initialize map.
2516 : *
2517 : * Allocates sufficient entries to describe the free memory in the map.
2518 : */
2519 : void
2520 0 : uvm_map_setup(struct vm_map *map, vaddr_t min, vaddr_t max, int flags)
2521 : {
2522 : int i;
2523 :
2524 0 : KASSERT((min & (vaddr_t)PAGE_MASK) == 0);
2525 0 : KASSERT((max & (vaddr_t)PAGE_MASK) == 0 ||
2526 : (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
2527 :
2528 : /*
2529 : * Update parameters.
2530 : *
2531 : * This code handles (vaddr_t)-1 and other page mask ending addresses
2532 : * properly.
2533 : * We lose the top page if the full virtual address space is used.
2534 : */
2535 0 : if (max & (vaddr_t)PAGE_MASK) {
2536 0 : max += 1;
2537 0 : if (max == 0) /* overflow */
2538 0 : max -= PAGE_SIZE;
2539 : }
2540 :
2541 0 : RBT_INIT(uvm_map_addr, &map->addr);
2542 0 : map->uaddr_exe = NULL;
2543 0 : for (i = 0; i < nitems(map->uaddr_any); ++i)
2544 0 : map->uaddr_any[i] = NULL;
2545 0 : map->uaddr_brk_stack = NULL;
2546 :
2547 0 : map->size = 0;
2548 0 : map->ref_count = 0;
2549 0 : map->min_offset = min;
2550 0 : map->max_offset = max;
2551 0 : map->b_start = map->b_end = 0; /* Empty brk() area by default. */
2552 0 : map->s_start = map->s_end = 0; /* Empty stack area by default. */
2553 0 : map->flags = flags;
2554 0 : map->timestamp = 0;
2555 0 : rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK);
2556 0 : mtx_init(&map->mtx, IPL_VM);
2557 0 : mtx_init(&map->flags_lock, IPL_VM);
2558 :
2559 : /* Configure the allocators. */
2560 0 : if (flags & VM_MAP_ISVMSPACE)
2561 0 : uvm_map_setup_md(map);
2562 : else
2563 0 : map->uaddr_any[3] = &uaddr_kbootstrap;
2564 :
2565 : /*
2566 : * Fill map entries.
2567 : * We do not need to write-lock the map here because only the current
2568 : * thread sees it right now. Initialize ref_count to 0 above to avoid
2569 : * bogus triggering of lock-not-held assertions.
2570 : */
2571 0 : uvm_map_setup_entries(map);
2572 : uvm_tree_sanity(map, __FILE__, __LINE__);
2573 0 : map->ref_count = 1;
2574 0 : }
2575 :
2576 : /*
2577 : * Destroy the map.
2578 : *
2579 : * This is the inverse operation to uvm_map_setup.
2580 : */
2581 : void
2582 0 : uvm_map_teardown(struct vm_map *map)
2583 : {
2584 0 : struct uvm_map_deadq dead_entries;
2585 : struct vm_map_entry *entry, *tmp;
2586 : #ifdef VMMAP_DEBUG
2587 : size_t numq, numt;
2588 : #endif
2589 : int i;
2590 :
2591 0 : KERNEL_ASSERT_LOCKED();
2592 0 : KERNEL_UNLOCK();
2593 0 : KERNEL_ASSERT_UNLOCKED();
2594 :
2595 0 : KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
2596 :
2597 : /* Remove address selectors. */
2598 0 : uvm_addr_destroy(map->uaddr_exe);
2599 0 : map->uaddr_exe = NULL;
2600 0 : for (i = 0; i < nitems(map->uaddr_any); i++) {
2601 0 : uvm_addr_destroy(map->uaddr_any[i]);
2602 0 : map->uaddr_any[i] = NULL;
2603 : }
2604 0 : uvm_addr_destroy(map->uaddr_brk_stack);
2605 0 : map->uaddr_brk_stack = NULL;
2606 :
2607 : /*
2608 : * Remove entries.
2609 : *
2610 : * The following is based on graph breadth-first search.
2611 : *
2612 : * In color terms:
2613 : * - the dead_entries set contains all nodes that are reachable
2614 : * (i.e. both the black and the grey nodes)
2615 : * - any entry not in dead_entries is white
2616 : * - any entry that appears in dead_entries before entry,
2617 : * is black, the rest is grey.
2618 : * The set [entry, end] is also referred to as the wavefront.
2619 : *
2620 : * Since the tree is always a fully connected graph, the breadth-first
2621 : * search guarantees that each vmmap_entry is visited exactly once.
2622 : * The vm_map is broken down in linear time.
2623 : */
2624 0 : TAILQ_INIT(&dead_entries);
2625 0 : if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL)
2626 0 : DEAD_ENTRY_PUSH(&dead_entries, entry);
2627 0 : while (entry != NULL) {
2628 0 : sched_pause(yield);
2629 0 : uvm_unmap_kill_entry(map, entry);
2630 0 : if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL)
2631 0 : DEAD_ENTRY_PUSH(&dead_entries, tmp);
2632 0 : if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
2633 0 : DEAD_ENTRY_PUSH(&dead_entries, tmp);
2634 : /* Update wave-front. */
2635 0 : entry = TAILQ_NEXT(entry, dfree.deadq);
2636 : }
2637 :
2638 : #ifdef VMMAP_DEBUG
2639 : numt = numq = 0;
2640 : RBT_FOREACH(entry, uvm_map_addr, &map->addr)
2641 : numt++;
2642 : TAILQ_FOREACH(entry, &dead_entries, dfree.deadq)
2643 : numq++;
2644 : KASSERT(numt == numq);
2645 : #endif
2646 0 : uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK);
2647 :
2648 0 : KERNEL_LOCK();
2649 :
2650 0 : pmap_destroy(map->pmap);
2651 0 : map->pmap = NULL;
2652 0 : }
2653 :
2654 : /*
2655 : * Populate map with free-memory entries.
2656 : *
2657 : * Map must be initialized and empty.
2658 : */
2659 : void
2660 0 : uvm_map_setup_entries(struct vm_map *map)
2661 : {
2662 : KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
2663 :
2664 0 : uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0);
2665 0 : }
2666 :
2667 : /*
2668 : * Split entry at given address.
2669 : *
2670 : * orig: entry that is to be split.
2671 : * next: a newly allocated map entry that is not linked.
2672 : * split: address at which the split is done.
2673 : */
2674 : void
2675 0 : uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig,
2676 : struct vm_map_entry *next, vaddr_t split)
2677 : {
2678 : struct uvm_addr_state *free, *free_before;
2679 : vsize_t adj;
2680 :
2681 0 : if ((split & PAGE_MASK) != 0) {
2682 0 : panic("uvm_map_splitentry: split address 0x%lx "
2683 : "not on page boundary!", split);
2684 : }
2685 : KDASSERT(map != NULL && orig != NULL && next != NULL);
2686 : uvm_tree_sanity(map, __FILE__, __LINE__);
2687 0 : KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split);
2688 :
2689 : #ifdef VMMAP_DEBUG
2690 : KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig);
2691 : KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next);
2692 : #endif /* VMMAP_DEBUG */
2693 :
2694 : /*
2695 : * Free space will change, unlink from free space tree.
2696 : */
2697 0 : free = uvm_map_uaddr_e(map, orig);
2698 0 : uvm_mapent_free_remove(map, free, orig);
2699 :
2700 0 : adj = split - orig->start;
2701 :
2702 0 : uvm_mapent_copy(orig, next);
2703 0 : if (split >= orig->end) {
2704 0 : next->etype = 0;
2705 0 : next->offset = 0;
2706 0 : next->wired_count = 0;
2707 0 : next->start = next->end = split;
2708 0 : next->guard = 0;
2709 0 : next->fspace = VMMAP_FREE_END(orig) - split;
2710 0 : next->aref.ar_amap = NULL;
2711 0 : next->aref.ar_pageoff = 0;
2712 0 : orig->guard = MIN(orig->guard, split - orig->end);
2713 0 : orig->fspace = split - VMMAP_FREE_START(orig);
2714 0 : } else {
2715 0 : orig->fspace = 0;
2716 0 : orig->guard = 0;
2717 0 : orig->end = next->start = split;
2718 :
2719 0 : if (next->aref.ar_amap) {
2720 0 : KERNEL_LOCK();
2721 0 : amap_splitref(&orig->aref, &next->aref, adj);
2722 0 : KERNEL_UNLOCK();
2723 0 : }
2724 0 : if (UVM_ET_ISSUBMAP(orig)) {
2725 0 : uvm_map_reference(next->object.sub_map);
2726 0 : next->offset += adj;
2727 0 : } else if (UVM_ET_ISOBJ(orig)) {
2728 0 : if (next->object.uvm_obj->pgops &&
2729 0 : next->object.uvm_obj->pgops->pgo_reference) {
2730 0 : KERNEL_LOCK();
2731 0 : next->object.uvm_obj->pgops->pgo_reference(
2732 : next->object.uvm_obj);
2733 0 : KERNEL_UNLOCK();
2734 0 : }
2735 0 : next->offset += adj;
2736 0 : }
2737 : }
2738 :
2739 : /*
2740 : * Link next into address tree.
2741 : * Link orig and next into free-space tree.
2742 : *
2743 : * Don't insert 'next' into the addr tree until orig has been linked,
2744 : * in case the free-list looks at adjecent entries in the addr tree
2745 : * for its decisions.
2746 : */
2747 0 : if (orig->fspace > 0)
2748 0 : free_before = free;
2749 : else
2750 0 : free_before = uvm_map_uaddr_e(map, orig);
2751 0 : uvm_mapent_free_insert(map, free_before, orig);
2752 0 : uvm_mapent_addr_insert(map, next);
2753 0 : uvm_mapent_free_insert(map, free, next);
2754 :
2755 : uvm_tree_sanity(map, __FILE__, __LINE__);
2756 0 : }
2757 :
2758 :
2759 : #ifdef VMMAP_DEBUG
2760 :
2761 : void
2762 : uvm_tree_assert(struct vm_map *map, int test, char *test_str,
2763 : char *file, int line)
2764 : {
2765 : char* map_special;
2766 :
2767 : if (test)
2768 : return;
2769 :
2770 : if (map == kernel_map)
2771 : map_special = " (kernel_map)";
2772 : else if (map == kmem_map)
2773 : map_special = " (kmem_map)";
2774 : else
2775 : map_special = "";
2776 : panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file,
2777 : line, test_str);
2778 : }
2779 :
2780 : /*
2781 : * Check that map is sane.
2782 : */
2783 : void
2784 : uvm_tree_sanity(struct vm_map *map, char *file, int line)
2785 : {
2786 : struct vm_map_entry *iter;
2787 : vaddr_t addr;
2788 : vaddr_t min, max, bound; /* Bounds checker. */
2789 : struct uvm_addr_state *free;
2790 :
2791 : addr = vm_map_min(map);
2792 : RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2793 : /*
2794 : * Valid start, end.
2795 : * Catch overflow for end+fspace.
2796 : */
2797 : UVM_ASSERT(map, iter->end >= iter->start, file, line);
2798 : UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line);
2799 :
2800 : /* May not be empty. */
2801 : UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter),
2802 : file, line);
2803 :
2804 : /* Addresses for entry must lie within map boundaries. */
2805 : UVM_ASSERT(map, iter->start >= vm_map_min(map) &&
2806 : VMMAP_FREE_END(iter) <= vm_map_max(map), file, line);
2807 :
2808 : /* Tree may not have gaps. */
2809 : UVM_ASSERT(map, iter->start == addr, file, line);
2810 : addr = VMMAP_FREE_END(iter);
2811 :
2812 : /*
2813 : * Free space may not cross boundaries, unless the same
2814 : * free list is used on both sides of the border.
2815 : */
2816 : min = VMMAP_FREE_START(iter);
2817 : max = VMMAP_FREE_END(iter);
2818 :
2819 : while (min < max &&
2820 : (bound = uvm_map_boundary(map, min, max)) != max) {
2821 : UVM_ASSERT(map,
2822 : uvm_map_uaddr(map, bound - 1) ==
2823 : uvm_map_uaddr(map, bound),
2824 : file, line);
2825 : min = bound;
2826 : }
2827 :
2828 : free = uvm_map_uaddr_e(map, iter);
2829 : if (free) {
2830 : UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0,
2831 : file, line);
2832 : } else {
2833 : UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0,
2834 : file, line);
2835 : }
2836 : }
2837 : UVM_ASSERT(map, addr == vm_map_max(map), file, line);
2838 : }
2839 :
2840 : void
2841 : uvm_tree_size_chk(struct vm_map *map, char *file, int line)
2842 : {
2843 : struct vm_map_entry *iter;
2844 : vsize_t size;
2845 :
2846 : size = 0;
2847 : RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2848 : if (!UVM_ET_ISHOLE(iter))
2849 : size += iter->end - iter->start;
2850 : }
2851 :
2852 : if (map->size != size)
2853 : printf("map size = 0x%lx, should be 0x%lx\n", map->size, size);
2854 : UVM_ASSERT(map, map->size == size, file, line);
2855 :
2856 : vmspace_validate(map);
2857 : }
2858 :
2859 : /*
2860 : * This function validates the statistics on vmspace.
2861 : */
2862 : void
2863 : vmspace_validate(struct vm_map *map)
2864 : {
2865 : struct vmspace *vm;
2866 : struct vm_map_entry *iter;
2867 : vaddr_t imin, imax;
2868 : vaddr_t stack_begin, stack_end; /* Position of stack. */
2869 : vsize_t stack, heap; /* Measured sizes. */
2870 :
2871 : if (!(map->flags & VM_MAP_ISVMSPACE))
2872 : return;
2873 :
2874 : vm = (struct vmspace *)map;
2875 : stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2876 : stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2877 :
2878 : stack = heap = 0;
2879 : RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2880 : imin = imax = iter->start;
2881 :
2882 : if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL)
2883 : continue;
2884 :
2885 : /*
2886 : * Update stack, heap.
2887 : * Keep in mind that (theoretically) the entries of
2888 : * userspace and stack may be joined.
2889 : */
2890 : while (imin != iter->end) {
2891 : /*
2892 : * Set imax to the first boundary crossed between
2893 : * imin and stack addresses.
2894 : */
2895 : imax = iter->end;
2896 : if (imin < stack_begin && imax > stack_begin)
2897 : imax = stack_begin;
2898 : else if (imin < stack_end && imax > stack_end)
2899 : imax = stack_end;
2900 :
2901 : if (imin >= stack_begin && imin < stack_end)
2902 : stack += imax - imin;
2903 : else
2904 : heap += imax - imin;
2905 : imin = imax;
2906 : }
2907 : }
2908 :
2909 : heap >>= PAGE_SHIFT;
2910 : if (heap != vm->vm_dused) {
2911 : printf("vmspace stack range: 0x%lx-0x%lx\n",
2912 : stack_begin, stack_end);
2913 : panic("vmspace_validate: vmspace.vm_dused invalid, "
2914 : "expected %ld pgs, got %ld pgs in map %p",
2915 : heap, vm->vm_dused,
2916 : map);
2917 : }
2918 : }
2919 :
2920 : #endif /* VMMAP_DEBUG */
2921 :
2922 : /*
2923 : * uvm_map_init: init mapping system at boot time. note that we allocate
2924 : * and init the static pool of structs vm_map_entry for the kernel here.
2925 : */
2926 : void
2927 0 : uvm_map_init(void)
2928 : {
2929 : static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
2930 : int lcv;
2931 :
2932 : /* now set up static pool of kernel map entries ... */
2933 0 : mtx_init(&uvm_kmapent_mtx, IPL_VM);
2934 0 : SLIST_INIT(&uvm.kentry_free);
2935 0 : for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
2936 0 : SLIST_INSERT_HEAD(&uvm.kentry_free,
2937 : &kernel_map_entry[lcv], daddrs.addr_kentry);
2938 : }
2939 :
2940 : /* initialize the map-related pools. */
2941 0 : pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0,
2942 : IPL_NONE, PR_WAITOK, "vmsppl", NULL);
2943 0 : pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0,
2944 : IPL_VM, PR_WAITOK, "vmmpepl", NULL);
2945 0 : pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0,
2946 : IPL_VM, 0, "vmmpekpl", NULL);
2947 0 : pool_sethiwat(&uvm_map_entry_pool, 8192);
2948 :
2949 0 : uvm_addr_init();
2950 0 : }
2951 :
2952 : #if defined(DDB)
2953 :
2954 : /*
2955 : * DDB hooks
2956 : */
2957 :
2958 : /*
2959 : * uvm_map_printit: actually prints the map
2960 : */
2961 : void
2962 0 : uvm_map_printit(struct vm_map *map, boolean_t full,
2963 : int (*pr)(const char *, ...))
2964 : {
2965 : struct vmspace *vm;
2966 : struct vm_map_entry *entry;
2967 : struct uvm_addr_state *free;
2968 : int in_free, i;
2969 0 : char buf[8];
2970 :
2971 0 : (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
2972 0 : (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n",
2973 0 : map->b_start, map->b_end);
2974 0 : (*pr)("\tstack allocate range: 0x%lx-0x%lx\n",
2975 0 : map->s_start, map->s_end);
2976 0 : (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n",
2977 0 : map->size, map->ref_count, map->timestamp,
2978 0 : map->flags);
2979 0 : (*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
2980 0 : pmap_resident_count(map->pmap));
2981 :
2982 : /* struct vmspace handling. */
2983 0 : if (map->flags & VM_MAP_ISVMSPACE) {
2984 0 : vm = (struct vmspace *)map;
2985 :
2986 0 : (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n",
2987 0 : vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss);
2988 0 : (*pr)("\tvm_tsize=%u vm_dsize=%u\n",
2989 0 : vm->vm_tsize, vm->vm_dsize);
2990 0 : (*pr)("\tvm_taddr=%p vm_daddr=%p\n",
2991 0 : vm->vm_taddr, vm->vm_daddr);
2992 0 : (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n",
2993 0 : vm->vm_maxsaddr, vm->vm_minsaddr);
2994 0 : }
2995 :
2996 0 : if (!full)
2997 : goto print_uaddr;
2998 0 : RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
2999 0 : (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
3000 0 : entry, entry->start, entry->end, entry->object.uvm_obj,
3001 0 : (long long)entry->offset, entry->aref.ar_amap,
3002 0 : entry->aref.ar_pageoff);
3003 0 : (*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, prot(max)=%d/%d, inh=%d, "
3004 : "wc=%d, adv=%d\n",
3005 0 : (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
3006 0 : (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
3007 0 : (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
3008 0 : (entry->etype & UVM_ET_STACK) ? 'T' : 'F',
3009 0 : entry->protection, entry->max_protection,
3010 0 : entry->inheritance, entry->wired_count, entry->advice);
3011 :
3012 0 : free = uvm_map_uaddr_e(map, entry);
3013 0 : in_free = (free != NULL);
3014 0 : (*pr)("\thole=%c, free=%c, guard=0x%lx, "
3015 : "free=0x%lx-0x%lx\n",
3016 0 : (entry->etype & UVM_ET_HOLE) ? 'T' : 'F',
3017 0 : in_free ? 'T' : 'F',
3018 0 : entry->guard,
3019 0 : VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
3020 0 : (*pr)("\tfspace_augment=%lu\n", entry->fspace_augment);
3021 0 : (*pr)("\tfreemapped=%c, uaddr=%p\n",
3022 0 : (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free);
3023 0 : if (free) {
3024 0 : (*pr)("\t\t(0x%lx-0x%lx %s)\n",
3025 0 : free->uaddr_minaddr, free->uaddr_maxaddr,
3026 0 : free->uaddr_functions->uaddr_name);
3027 0 : }
3028 : }
3029 :
3030 : print_uaddr:
3031 0 : uvm_addr_print(map->uaddr_exe, "exe", full, pr);
3032 0 : for (i = 0; i < nitems(map->uaddr_any); i++) {
3033 0 : snprintf(&buf[0], sizeof(buf), "any[%d]", i);
3034 0 : uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr);
3035 : }
3036 0 : uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr);
3037 0 : }
3038 :
3039 : /*
3040 : * uvm_object_printit: actually prints the object
3041 : */
3042 : void
3043 0 : uvm_object_printit(uobj, full, pr)
3044 : struct uvm_object *uobj;
3045 : boolean_t full;
3046 : int (*pr)(const char *, ...);
3047 : {
3048 : struct vm_page *pg;
3049 : int cnt = 0;
3050 :
3051 0 : (*pr)("OBJECT %p: pgops=%p, npages=%d, ",
3052 0 : uobj, uobj->pgops, uobj->uo_npages);
3053 0 : if (UVM_OBJ_IS_KERN_OBJECT(uobj))
3054 0 : (*pr)("refs=<SYSTEM>\n");
3055 : else
3056 0 : (*pr)("refs=%d\n", uobj->uo_refs);
3057 :
3058 0 : if (!full) {
3059 0 : return;
3060 : }
3061 0 : (*pr)(" PAGES <pg,offset>:\n ");
3062 0 : RBT_FOREACH(pg, uvm_objtree, &uobj->memt) {
3063 0 : (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
3064 0 : if ((cnt % 3) == 2) {
3065 0 : (*pr)("\n ");
3066 0 : }
3067 0 : cnt++;
3068 : }
3069 0 : if ((cnt % 3) != 2) {
3070 0 : (*pr)("\n");
3071 0 : }
3072 0 : }
3073 :
3074 : /*
3075 : * uvm_page_printit: actually print the page
3076 : */
3077 : static const char page_flagbits[] =
3078 : "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY"
3079 : "\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ"
3080 : "\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5";
3081 :
3082 : void
3083 0 : uvm_page_printit(pg, full, pr)
3084 : struct vm_page *pg;
3085 : boolean_t full;
3086 : int (*pr)(const char *, ...);
3087 : {
3088 : struct vm_page *tpg;
3089 : struct uvm_object *uobj;
3090 : struct pglist *pgl;
3091 :
3092 0 : (*pr)("PAGE %p:\n", pg);
3093 0 : (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n",
3094 0 : pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count,
3095 0 : (long long)pg->phys_addr);
3096 0 : (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n",
3097 0 : pg->uobject, pg->uanon, (long long)pg->offset);
3098 : #if defined(UVM_PAGE_TRKOWN)
3099 : if (pg->pg_flags & PG_BUSY)
3100 : (*pr)(" owning thread = %d, tag=%s",
3101 : pg->owner, pg->owner_tag);
3102 : else
3103 : (*pr)(" page not busy, no owner");
3104 : #else
3105 0 : (*pr)(" [page ownership tracking disabled]");
3106 : #endif
3107 0 : (*pr)("\tvm_page_md %p\n", &pg->mdpage);
3108 :
3109 0 : if (!full)
3110 0 : return;
3111 :
3112 : /* cross-verify object/anon */
3113 0 : if ((pg->pg_flags & PQ_FREE) == 0) {
3114 0 : if (pg->pg_flags & PQ_ANON) {
3115 0 : if (pg->uanon == NULL || pg->uanon->an_page != pg)
3116 0 : (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n",
3117 0 : (pg->uanon) ? pg->uanon->an_page : NULL);
3118 : else
3119 0 : (*pr)(" anon backpointer is OK\n");
3120 : } else {
3121 0 : uobj = pg->uobject;
3122 0 : if (uobj) {
3123 0 : (*pr)(" checking object list\n");
3124 0 : RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) {
3125 0 : if (tpg == pg) {
3126 : break;
3127 : }
3128 : }
3129 0 : if (tpg)
3130 0 : (*pr)(" page found on object list\n");
3131 : else
3132 0 : (*pr)(" >>> PAGE NOT FOUND "
3133 : "ON OBJECT LIST! <<<\n");
3134 : }
3135 : }
3136 : }
3137 :
3138 : /* cross-verify page queue */
3139 0 : if (pg->pg_flags & PQ_FREE) {
3140 0 : if (uvm_pmr_isfree(pg))
3141 0 : (*pr)(" page found in uvm_pmemrange\n");
3142 : else
3143 0 : (*pr)(" >>> page not found in uvm_pmemrange <<<\n");
3144 : pgl = NULL;
3145 0 : } else if (pg->pg_flags & PQ_INACTIVE) {
3146 0 : pgl = (pg->pg_flags & PQ_SWAPBACKED) ?
3147 : &uvm.page_inactive_swp : &uvm.page_inactive_obj;
3148 0 : } else if (pg->pg_flags & PQ_ACTIVE) {
3149 : pgl = &uvm.page_active;
3150 0 : } else {
3151 : pgl = NULL;
3152 : }
3153 :
3154 0 : if (pgl) {
3155 0 : (*pr)(" checking pageq list\n");
3156 0 : TAILQ_FOREACH(tpg, pgl, pageq) {
3157 0 : if (tpg == pg) {
3158 : break;
3159 : }
3160 : }
3161 0 : if (tpg)
3162 0 : (*pr)(" page found on pageq list\n");
3163 : else
3164 0 : (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
3165 : }
3166 0 : }
3167 : #endif
3168 :
3169 : /*
3170 : * uvm_map_protect: change map protection
3171 : *
3172 : * => set_max means set max_protection.
3173 : * => map must be unlocked.
3174 : */
3175 : int
3176 0 : uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
3177 : vm_prot_t new_prot, boolean_t set_max)
3178 : {
3179 : struct vm_map_entry *first, *iter;
3180 : vm_prot_t old_prot;
3181 : vm_prot_t mask;
3182 : int error;
3183 :
3184 0 : if (start > end)
3185 0 : return EINVAL;
3186 0 : start = MAX(start, map->min_offset);
3187 0 : end = MIN(end, map->max_offset);
3188 0 : if (start >= end)
3189 0 : return 0;
3190 :
3191 : error = 0;
3192 0 : vm_map_lock(map);
3193 :
3194 : /*
3195 : * Set up first and last.
3196 : * - first will contain first entry at or after start.
3197 : */
3198 0 : first = uvm_map_entrybyaddr(&map->addr, start);
3199 : KDASSERT(first != NULL);
3200 0 : if (first->end <= start)
3201 0 : first = RBT_NEXT(uvm_map_addr, first);
3202 :
3203 : /* First, check for protection violations. */
3204 0 : for (iter = first; iter != NULL && iter->start < end;
3205 0 : iter = RBT_NEXT(uvm_map_addr, iter)) {
3206 : /* Treat memory holes as free space. */
3207 0 : if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3208 : continue;
3209 :
3210 0 : if (UVM_ET_ISSUBMAP(iter)) {
3211 : error = EINVAL;
3212 0 : goto out;
3213 : }
3214 0 : if ((new_prot & iter->max_protection) != new_prot) {
3215 : error = EACCES;
3216 0 : goto out;
3217 : }
3218 0 : if (map == kernel_map &&
3219 0 : (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
3220 0 : panic("uvm_map_protect: kernel map W^X violation requested");
3221 : }
3222 :
3223 : /* Fix protections. */
3224 0 : for (iter = first; iter != NULL && iter->start < end;
3225 0 : iter = RBT_NEXT(uvm_map_addr, iter)) {
3226 : /* Treat memory holes as free space. */
3227 0 : if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3228 : continue;
3229 :
3230 0 : old_prot = iter->protection;
3231 :
3232 : /*
3233 : * Skip adapting protection iff old and new protection
3234 : * are equal.
3235 : */
3236 0 : if (set_max) {
3237 0 : if (old_prot == (new_prot & old_prot) &&
3238 0 : iter->max_protection == new_prot)
3239 : continue;
3240 : } else {
3241 0 : if (old_prot == new_prot)
3242 : continue;
3243 : }
3244 :
3245 0 : UVM_MAP_CLIP_START(map, iter, start);
3246 0 : UVM_MAP_CLIP_END(map, iter, end);
3247 :
3248 0 : if (set_max) {
3249 0 : iter->max_protection = new_prot;
3250 0 : iter->protection &= new_prot;
3251 0 : } else
3252 0 : iter->protection = new_prot;
3253 :
3254 : /*
3255 : * update physical map if necessary. worry about copy-on-write
3256 : * here -- CHECK THIS XXX
3257 : */
3258 0 : if (iter->protection != old_prot) {
3259 0 : mask = UVM_ET_ISCOPYONWRITE(iter) ?
3260 : ~PROT_WRITE : PROT_MASK;
3261 :
3262 : /* update pmap */
3263 0 : if ((iter->protection & mask) == PROT_NONE &&
3264 0 : VM_MAPENT_ISWIRED(iter)) {
3265 : /*
3266 : * TODO(ariane) this is stupid. wired_count
3267 : * is 0 if not wired, otherwise anything
3268 : * larger than 0 (incremented once each time
3269 : * wire is called).
3270 : * Mostly to be able to undo the damage on
3271 : * failure. Not the actually be a wired
3272 : * refcounter...
3273 : * Originally: iter->wired_count--;
3274 : * (don't we have to unwire this in the pmap
3275 : * as well?)
3276 : */
3277 0 : iter->wired_count = 0;
3278 0 : }
3279 0 : pmap_protect(map->pmap, iter->start, iter->end,
3280 0 : iter->protection & mask);
3281 0 : }
3282 :
3283 : /*
3284 : * If the map is configured to lock any future mappings,
3285 : * wire this entry now if the old protection was PROT_NONE
3286 : * and the new protection is not PROT_NONE.
3287 : */
3288 0 : if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
3289 0 : VM_MAPENT_ISWIRED(iter) == 0 &&
3290 0 : old_prot == PROT_NONE &&
3291 0 : new_prot != PROT_NONE) {
3292 0 : if (uvm_map_pageable(map, iter->start, iter->end,
3293 0 : FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) {
3294 : /*
3295 : * If locking the entry fails, remember the
3296 : * error if it's the first one. Note we
3297 : * still continue setting the protection in
3298 : * the map, but it will return the resource
3299 : * storage condition regardless.
3300 : *
3301 : * XXX Ignore what the actual error is,
3302 : * XXX just call it a resource shortage
3303 : * XXX so that it doesn't get confused
3304 : * XXX what uvm_map_protect() itself would
3305 : * XXX normally return.
3306 : */
3307 : error = ENOMEM;
3308 0 : }
3309 : }
3310 : }
3311 : pmap_update(map->pmap);
3312 :
3313 : out:
3314 0 : vm_map_unlock(map);
3315 0 : return error;
3316 0 : }
3317 :
3318 : /*
3319 : * uvmspace_alloc: allocate a vmspace structure.
3320 : *
3321 : * - structure includes vm_map and pmap
3322 : * - XXX: no locking on this structure
3323 : * - refcnt set to 1, rest must be init'd by caller
3324 : */
3325 : struct vmspace *
3326 0 : uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable,
3327 : boolean_t remove_holes)
3328 : {
3329 : struct vmspace *vm;
3330 :
3331 0 : vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO);
3332 0 : uvmspace_init(vm, NULL, min, max, pageable, remove_holes);
3333 0 : return (vm);
3334 : }
3335 :
3336 : /*
3337 : * uvmspace_init: initialize a vmspace structure.
3338 : *
3339 : * - XXX: no locking on this structure
3340 : * - refcnt set to 1, rest must be init'd by caller
3341 : */
3342 : void
3343 0 : uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max,
3344 : boolean_t pageable, boolean_t remove_holes)
3345 : {
3346 0 : KASSERT(pmap == NULL || pmap == pmap_kernel());
3347 :
3348 0 : if (pmap)
3349 0 : pmap_reference(pmap);
3350 : else
3351 0 : pmap = pmap_create();
3352 0 : vm->vm_map.pmap = pmap;
3353 :
3354 0 : uvm_map_setup(&vm->vm_map, min, max,
3355 0 : (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE);
3356 :
3357 0 : vm->vm_refcnt = 1;
3358 :
3359 : if (remove_holes)
3360 : pmap_remove_holes(vm);
3361 0 : }
3362 :
3363 : /*
3364 : * uvmspace_share: share a vmspace between two processes
3365 : *
3366 : * - XXX: no locking on vmspace
3367 : * - used for vfork
3368 : */
3369 :
3370 : struct vmspace *
3371 0 : uvmspace_share(struct process *pr)
3372 : {
3373 0 : struct vmspace *vm = pr->ps_vmspace;
3374 :
3375 0 : vm->vm_refcnt++;
3376 0 : return vm;
3377 : }
3378 :
3379 : /*
3380 : * uvmspace_exec: the process wants to exec a new program
3381 : *
3382 : * - XXX: no locking on vmspace
3383 : */
3384 :
3385 : void
3386 0 : uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end)
3387 : {
3388 0 : struct process *pr = p->p_p;
3389 0 : struct vmspace *nvm, *ovm = pr->ps_vmspace;
3390 0 : struct vm_map *map = &ovm->vm_map;
3391 0 : struct uvm_map_deadq dead_entries;
3392 :
3393 0 : KASSERT((start & (vaddr_t)PAGE_MASK) == 0);
3394 0 : KASSERT((end & (vaddr_t)PAGE_MASK) == 0 ||
3395 : (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
3396 :
3397 : pmap_unuse_final(p); /* before stack addresses go away */
3398 0 : TAILQ_INIT(&dead_entries);
3399 :
3400 : /* see if more than one process is using this vmspace... */
3401 0 : if (ovm->vm_refcnt == 1) {
3402 : /*
3403 : * If pr is the only process using its vmspace then
3404 : * we can safely recycle that vmspace for the program
3405 : * that is being exec'd.
3406 : */
3407 :
3408 : #ifdef SYSVSHM
3409 : /*
3410 : * SYSV SHM semantics require us to kill all segments on an exec
3411 : */
3412 0 : if (ovm->vm_shm)
3413 0 : shmexit(ovm);
3414 : #endif
3415 :
3416 : /*
3417 : * POSIX 1003.1b -- "lock future mappings" is revoked
3418 : * when a process execs another program image.
3419 : */
3420 0 : vm_map_lock(map);
3421 0 : vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
3422 :
3423 : /*
3424 : * now unmap the old program
3425 : *
3426 : * Instead of attempting to keep the map valid, we simply
3427 : * nuke all entries and ask uvm_map_setup to reinitialize
3428 : * the map to the new boundaries.
3429 : *
3430 : * uvm_unmap_remove will actually nuke all entries for us
3431 : * (as in, not replace them with free-memory entries).
3432 : */
3433 0 : uvm_unmap_remove(map, map->min_offset, map->max_offset,
3434 : &dead_entries, TRUE, FALSE);
3435 :
3436 : KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
3437 :
3438 : /* Nuke statistics and boundaries. */
3439 0 : memset(&ovm->vm_startcopy, 0,
3440 : (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy);
3441 :
3442 :
3443 0 : if (end & (vaddr_t)PAGE_MASK) {
3444 0 : end += 1;
3445 0 : if (end == 0) /* overflow */
3446 0 : end -= PAGE_SIZE;
3447 : }
3448 :
3449 : /* Setup new boundaries and populate map with entries. */
3450 0 : map->min_offset = start;
3451 0 : map->max_offset = end;
3452 0 : uvm_map_setup_entries(map);
3453 0 : vm_map_unlock(map);
3454 :
3455 : /* but keep MMU holes unavailable */
3456 : pmap_remove_holes(ovm);
3457 0 : } else {
3458 : /*
3459 : * pr's vmspace is being shared, so we can't reuse
3460 : * it for pr since it is still being used for others.
3461 : * allocate a new vmspace for pr
3462 : */
3463 0 : nvm = uvmspace_alloc(start, end,
3464 0 : (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE);
3465 :
3466 : /* install new vmspace and drop our ref to the old one. */
3467 0 : pmap_deactivate(p);
3468 0 : p->p_vmspace = pr->ps_vmspace = nvm;
3469 0 : pmap_activate(p);
3470 :
3471 0 : uvmspace_free(ovm);
3472 : }
3473 :
3474 : /* Release dead entries */
3475 0 : uvm_unmap_detach(&dead_entries, 0);
3476 0 : }
3477 :
3478 : /*
3479 : * uvmspace_free: free a vmspace data structure
3480 : *
3481 : * - XXX: no locking on vmspace
3482 : */
3483 : void
3484 0 : uvmspace_free(struct vmspace *vm)
3485 : {
3486 0 : if (--vm->vm_refcnt == 0) {
3487 : /*
3488 : * lock the map, to wait out all other references to it. delete
3489 : * all of the mappings and pages they hold, then call the pmap
3490 : * module to reclaim anything left.
3491 : */
3492 : #ifdef SYSVSHM
3493 : /* Get rid of any SYSV shared memory segments. */
3494 0 : if (vm->vm_shm != NULL)
3495 0 : shmexit(vm);
3496 : #endif
3497 :
3498 0 : uvm_map_teardown(&vm->vm_map);
3499 0 : pool_put(&uvm_vmspace_pool, vm);
3500 0 : }
3501 0 : }
3502 :
3503 : /*
3504 : * uvm_share: Map the address range [srcaddr, srcaddr + sz) in
3505 : * srcmap to the address range [dstaddr, dstaddr + sz) in
3506 : * dstmap.
3507 : *
3508 : * The whole address range in srcmap must be backed by an object
3509 : * (no holes).
3510 : *
3511 : * If successful, the address ranges share memory and the destination
3512 : * address range uses the protection flags in prot.
3513 : *
3514 : * This routine assumes that sz is a multiple of PAGE_SIZE and
3515 : * that dstaddr and srcaddr are page-aligned.
3516 : */
3517 : int
3518 0 : uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot,
3519 : struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz)
3520 : {
3521 : int ret = 0;
3522 : vaddr_t unmap_end;
3523 : vaddr_t dstva;
3524 : vsize_t off, len, n = sz;
3525 0 : struct vm_map_entry *first = NULL, *last = NULL;
3526 0 : struct vm_map_entry *src_entry, *psrc_entry = NULL;
3527 0 : struct uvm_map_deadq dead;
3528 :
3529 0 : if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr)
3530 0 : return EINVAL;
3531 :
3532 0 : TAILQ_INIT(&dead);
3533 0 : vm_map_lock(dstmap);
3534 0 : vm_map_lock_read(srcmap);
3535 :
3536 0 : if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) {
3537 : ret = ENOMEM;
3538 0 : goto exit_unlock;
3539 : }
3540 0 : if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) {
3541 : ret = EINVAL;
3542 0 : goto exit_unlock;
3543 : }
3544 :
3545 : unmap_end = dstaddr;
3546 0 : for (; src_entry != NULL;
3547 0 : psrc_entry = src_entry,
3548 0 : src_entry = RBT_NEXT(uvm_map_addr, src_entry)) {
3549 : /* hole in address space, bail out */
3550 0 : if (psrc_entry != NULL && psrc_entry->end != src_entry->start)
3551 : break;
3552 0 : if (src_entry->start >= srcaddr + sz)
3553 : break;
3554 :
3555 0 : if (UVM_ET_ISSUBMAP(src_entry))
3556 0 : panic("uvm_share: encountered a submap (illegal)");
3557 0 : if (!UVM_ET_ISCOPYONWRITE(src_entry) &&
3558 0 : UVM_ET_ISNEEDSCOPY(src_entry))
3559 0 : panic("uvm_share: non-copy_on_write map entries "
3560 : "marked needs_copy (illegal)");
3561 :
3562 : dstva = dstaddr;
3563 0 : if (src_entry->start > srcaddr) {
3564 0 : dstva += src_entry->start - srcaddr;
3565 : off = 0;
3566 0 : } else
3567 0 : off = srcaddr - src_entry->start;
3568 :
3569 0 : if (n < src_entry->end - src_entry->start)
3570 0 : len = n;
3571 : else
3572 : len = src_entry->end - src_entry->start;
3573 0 : n -= len;
3574 :
3575 0 : if (uvm_mapent_share(dstmap, dstva, len, off, prot, prot,
3576 0 : srcmap, src_entry, &dead) == NULL)
3577 : break;
3578 :
3579 0 : unmap_end = dstva + len;
3580 0 : if (n == 0)
3581 : goto exit_unlock;
3582 : }
3583 :
3584 : ret = EINVAL;
3585 0 : uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE);
3586 :
3587 : exit_unlock:
3588 0 : vm_map_unlock_read(srcmap);
3589 0 : vm_map_unlock(dstmap);
3590 0 : uvm_unmap_detach(&dead, 0);
3591 :
3592 0 : return ret;
3593 0 : }
3594 :
3595 : /*
3596 : * Clone map entry into other map.
3597 : *
3598 : * Mapping will be placed at dstaddr, for the same length.
3599 : * Space must be available.
3600 : * Reference counters are incremented.
3601 : */
3602 : struct vm_map_entry *
3603 0 : uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3604 : vsize_t off, vm_prot_t prot, vm_prot_t maxprot,
3605 : struct vm_map_entry *old_entry, struct uvm_map_deadq *dead,
3606 : int mapent_flags, int amap_share_flags)
3607 : {
3608 0 : struct vm_map_entry *new_entry, *first, *last;
3609 :
3610 : KDASSERT(!UVM_ET_ISSUBMAP(old_entry));
3611 :
3612 : /* Create new entry (linked in on creation). Fill in first, last. */
3613 60 : first = last = NULL;
3614 0 : if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) {
3615 0 : panic("uvmspace_fork: no space in map for "
3616 : "entry in empty map");
3617 : }
3618 0 : new_entry = uvm_map_mkentry(dstmap, first, last,
3619 : dstaddr, dstlen, mapent_flags, dead, NULL);
3620 0 : if (new_entry == NULL)
3621 0 : return NULL;
3622 : /* old_entry -> new_entry */
3623 0 : new_entry->object = old_entry->object;
3624 0 : new_entry->offset = old_entry->offset;
3625 0 : new_entry->aref = old_entry->aref;
3626 0 : new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED;
3627 0 : new_entry->protection = prot;
3628 0 : new_entry->max_protection = maxprot;
3629 0 : new_entry->inheritance = old_entry->inheritance;
3630 0 : new_entry->advice = old_entry->advice;
3631 :
3632 : /* gain reference to object backing the map (can't be a submap). */
3633 0 : if (new_entry->aref.ar_amap) {
3634 60 : new_entry->aref.ar_pageoff += off >> PAGE_SHIFT;
3635 0 : amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3636 0 : (new_entry->end - new_entry->start) >> PAGE_SHIFT,
3637 : amap_share_flags);
3638 0 : }
3639 :
3640 0 : if (UVM_ET_ISOBJ(new_entry) &&
3641 0 : new_entry->object.uvm_obj->pgops->pgo_reference) {
3642 60 : new_entry->offset += off;
3643 0 : new_entry->object.uvm_obj->pgops->pgo_reference
3644 : (new_entry->object.uvm_obj);
3645 0 : }
3646 :
3647 0 : return new_entry;
3648 0 : }
3649 :
3650 : struct vm_map_entry *
3651 0 : uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3652 : vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map,
3653 : struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3654 : {
3655 : /*
3656 : * If old_entry refers to a copy-on-write region that has not yet been
3657 : * written to (needs_copy flag is set), then we need to allocate a new
3658 : * amap for old_entry.
3659 : *
3660 : * If we do not do this, and the process owning old_entry does a copy-on
3661 : * write later, old_entry and new_entry will refer to different memory
3662 : * regions, and the memory between the processes is no longer shared.
3663 : *
3664 : * [in other words, we need to clear needs_copy]
3665 : */
3666 :
3667 0 : if (UVM_ET_ISNEEDSCOPY(old_entry)) {
3668 : /* get our own amap, clears needs_copy */
3669 0 : amap_copy(old_map, old_entry, M_WAITOK, FALSE,
3670 : 0, 0);
3671 : /* XXXCDC: WAITOK??? */
3672 0 : }
3673 :
3674 0 : return uvm_mapent_clone(dstmap, dstaddr, dstlen, off,
3675 : prot, maxprot, old_entry, dead, 0, AMAP_SHARED);
3676 : }
3677 :
3678 : /*
3679 : * share the mapping: this means we want the old and
3680 : * new entries to share amaps and backing objects.
3681 : */
3682 : struct vm_map_entry *
3683 0 : uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map,
3684 : struct vm_map *old_map,
3685 : struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3686 : {
3687 : struct vm_map_entry *new_entry;
3688 :
3689 0 : new_entry = uvm_mapent_share(new_map, old_entry->start,
3690 0 : old_entry->end - old_entry->start, 0, old_entry->protection,
3691 0 : old_entry->max_protection, old_map, old_entry, dead);
3692 :
3693 : /*
3694 : * pmap_copy the mappings: this routine is optional
3695 : * but if it is there it will reduce the number of
3696 : * page faults in the new proc.
3697 : */
3698 0 : if (!UVM_ET_ISHOLE(new_entry))
3699 : pmap_copy(new_map->pmap, old_map->pmap, new_entry->start,
3700 : (new_entry->end - new_entry->start), new_entry->start);
3701 :
3702 0 : return (new_entry);
3703 : }
3704 :
3705 : /*
3706 : * copy-on-write the mapping (using mmap's
3707 : * MAP_PRIVATE semantics)
3708 : *
3709 : * allocate new_entry, adjust reference counts.
3710 : * (note that new references are read-only).
3711 : */
3712 : struct vm_map_entry *
3713 0 : uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map,
3714 : struct vm_map *old_map,
3715 : struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3716 : {
3717 : struct vm_map_entry *new_entry;
3718 : boolean_t protect_child;
3719 :
3720 0 : new_entry = uvm_mapent_clone(new_map, old_entry->start,
3721 0 : old_entry->end - old_entry->start, 0, old_entry->protection,
3722 0 : old_entry->max_protection, old_entry, dead, 0, 0);
3723 :
3724 0 : new_entry->etype |=
3725 : (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3726 :
3727 : /*
3728 : * the new entry will need an amap. it will either
3729 : * need to be copied from the old entry or created
3730 : * from scratch (if the old entry does not have an
3731 : * amap). can we defer this process until later
3732 : * (by setting "needs_copy") or do we need to copy
3733 : * the amap now?
3734 : *
3735 : * we must copy the amap now if any of the following
3736 : * conditions hold:
3737 : * 1. the old entry has an amap and that amap is
3738 : * being shared. this means that the old (parent)
3739 : * process is sharing the amap with another
3740 : * process. if we do not clear needs_copy here
3741 : * we will end up in a situation where both the
3742 : * parent and child process are referring to the
3743 : * same amap with "needs_copy" set. if the
3744 : * parent write-faults, the fault routine will
3745 : * clear "needs_copy" in the parent by allocating
3746 : * a new amap. this is wrong because the
3747 : * parent is supposed to be sharing the old amap
3748 : * and the new amap will break that.
3749 : *
3750 : * 2. if the old entry has an amap and a non-zero
3751 : * wire count then we are going to have to call
3752 : * amap_cow_now to avoid page faults in the
3753 : * parent process. since amap_cow_now requires
3754 : * "needs_copy" to be clear we might as well
3755 : * clear it here as well.
3756 : *
3757 : */
3758 0 : if (old_entry->aref.ar_amap != NULL &&
3759 0 : ((amap_flags(old_entry->aref.ar_amap) &
3760 0 : AMAP_SHARED) != 0 ||
3761 0 : VM_MAPENT_ISWIRED(old_entry))) {
3762 0 : amap_copy(new_map, new_entry, M_WAITOK, FALSE,
3763 : 0, 0);
3764 : /* XXXCDC: M_WAITOK ... ok? */
3765 0 : }
3766 :
3767 : /*
3768 : * if the parent's entry is wired down, then the
3769 : * parent process does not want page faults on
3770 : * access to that memory. this means that we
3771 : * cannot do copy-on-write because we can't write
3772 : * protect the old entry. in this case we
3773 : * resolve all copy-on-write faults now, using
3774 : * amap_cow_now. note that we have already
3775 : * allocated any needed amap (above).
3776 : */
3777 0 : if (VM_MAPENT_ISWIRED(old_entry)) {
3778 : /*
3779 : * resolve all copy-on-write faults now
3780 : * (note that there is nothing to do if
3781 : * the old mapping does not have an amap).
3782 : * XXX: is it worthwhile to bother with
3783 : * pmap_copy in this case?
3784 : */
3785 0 : if (old_entry->aref.ar_amap)
3786 0 : amap_cow_now(new_map, new_entry);
3787 : } else {
3788 0 : if (old_entry->aref.ar_amap) {
3789 : /*
3790 : * setup mappings to trigger copy-on-write faults
3791 : * we must write-protect the parent if it has
3792 : * an amap and it is not already "needs_copy"...
3793 : * if it is already "needs_copy" then the parent
3794 : * has already been write-protected by a previous
3795 : * fork operation.
3796 : *
3797 : * if we do not write-protect the parent, then
3798 : * we must be sure to write-protect the child
3799 : * after the pmap_copy() operation.
3800 : *
3801 : * XXX: pmap_copy should have some way of telling
3802 : * us that it didn't do anything so we can avoid
3803 : * calling pmap_protect needlessly.
3804 : */
3805 0 : if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
3806 0 : if (old_entry->max_protection & PROT_WRITE) {
3807 0 : pmap_protect(old_map->pmap,
3808 0 : old_entry->start,
3809 0 : old_entry->end,
3810 0 : old_entry->protection &
3811 : ~PROT_WRITE);
3812 : pmap_update(old_map->pmap);
3813 0 : }
3814 0 : old_entry->etype |= UVM_ET_NEEDSCOPY;
3815 0 : }
3816 :
3817 : /* parent must now be write-protected */
3818 : protect_child = FALSE;
3819 0 : } else {
3820 : /*
3821 : * we only need to protect the child if the
3822 : * parent has write access.
3823 : */
3824 0 : if (old_entry->max_protection & PROT_WRITE)
3825 0 : protect_child = TRUE;
3826 : else
3827 : protect_child = FALSE;
3828 : }
3829 : /*
3830 : * copy the mappings
3831 : * XXX: need a way to tell if this does anything
3832 : */
3833 0 : if (!UVM_ET_ISHOLE(new_entry))
3834 : pmap_copy(new_map->pmap, old_map->pmap,
3835 : new_entry->start,
3836 : (old_entry->end - old_entry->start),
3837 : old_entry->start);
3838 :
3839 : /* protect the child's mappings if necessary */
3840 0 : if (protect_child) {
3841 0 : pmap_protect(new_map->pmap, new_entry->start,
3842 0 : new_entry->end,
3843 0 : new_entry->protection &
3844 : ~PROT_WRITE);
3845 0 : }
3846 : }
3847 :
3848 0 : return (new_entry);
3849 : }
3850 :
3851 : /*
3852 : * zero the mapping: the new entry will be zero initialized
3853 : */
3854 : struct vm_map_entry *
3855 0 : uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map,
3856 : struct vm_map *old_map,
3857 : struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3858 : {
3859 : struct vm_map_entry *new_entry;
3860 :
3861 0 : new_entry = uvm_mapent_clone(new_map, old_entry->start,
3862 0 : old_entry->end - old_entry->start, 0, old_entry->protection,
3863 0 : old_entry->max_protection, old_entry, dead, 0, 0);
3864 :
3865 0 : new_entry->etype |=
3866 : (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3867 :
3868 0 : if (new_entry->aref.ar_amap) {
3869 0 : amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3870 0 : atop(new_entry->end - new_entry->start), 0);
3871 0 : new_entry->aref.ar_amap = NULL;
3872 0 : new_entry->aref.ar_pageoff = 0;
3873 0 : }
3874 :
3875 0 : if (UVM_ET_ISOBJ(new_entry)) {
3876 0 : if (new_entry->object.uvm_obj->pgops->pgo_detach)
3877 0 : new_entry->object.uvm_obj->pgops->pgo_detach(
3878 : new_entry->object.uvm_obj);
3879 0 : new_entry->object.uvm_obj = NULL;
3880 0 : new_entry->etype &= ~UVM_ET_OBJ;
3881 0 : }
3882 :
3883 0 : return (new_entry);
3884 : }
3885 :
3886 : /*
3887 : * uvmspace_fork: fork a process' main map
3888 : *
3889 : * => create a new vmspace for child process from parent.
3890 : * => parent's map must not be locked.
3891 : */
3892 : struct vmspace *
3893 0 : uvmspace_fork(struct process *pr)
3894 : {
3895 0 : struct vmspace *vm1 = pr->ps_vmspace;
3896 : struct vmspace *vm2;
3897 0 : struct vm_map *old_map = &vm1->vm_map;
3898 : struct vm_map *new_map;
3899 : struct vm_map_entry *old_entry, *new_entry;
3900 0 : struct uvm_map_deadq dead;
3901 :
3902 0 : vm_map_lock(old_map);
3903 :
3904 0 : vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
3905 0 : (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE);
3906 0 : memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
3907 : (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
3908 0 : vm2->vm_dused = 0; /* Statistic managed by us. */
3909 0 : new_map = &vm2->vm_map;
3910 0 : vm_map_lock(new_map);
3911 :
3912 : /* go entry-by-entry */
3913 0 : TAILQ_INIT(&dead);
3914 0 : RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) {
3915 0 : if (old_entry->start == old_entry->end)
3916 : continue;
3917 :
3918 : /* first, some sanity checks on the old entry */
3919 0 : if (UVM_ET_ISSUBMAP(old_entry)) {
3920 0 : panic("fork: encountered a submap during fork "
3921 : "(illegal)");
3922 : }
3923 :
3924 0 : if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
3925 0 : UVM_ET_ISNEEDSCOPY(old_entry)) {
3926 0 : panic("fork: non-copy_on_write map entry marked "
3927 : "needs_copy (illegal)");
3928 : }
3929 :
3930 : /* Apply inheritance. */
3931 0 : switch (old_entry->inheritance) {
3932 : case MAP_INHERIT_SHARE:
3933 0 : new_entry = uvm_mapent_forkshared(vm2, new_map,
3934 : old_map, old_entry, &dead);
3935 0 : break;
3936 : case MAP_INHERIT_COPY:
3937 0 : new_entry = uvm_mapent_forkcopy(vm2, new_map,
3938 : old_map, old_entry, &dead);
3939 0 : break;
3940 : case MAP_INHERIT_ZERO:
3941 0 : new_entry = uvm_mapent_forkzero(vm2, new_map,
3942 : old_map, old_entry, &dead);
3943 0 : break;
3944 : default:
3945 : continue;
3946 : }
3947 :
3948 : /* Update process statistics. */
3949 0 : if (!UVM_ET_ISHOLE(new_entry))
3950 0 : new_map->size += new_entry->end - new_entry->start;
3951 0 : if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) {
3952 0 : vm2->vm_dused += uvmspace_dused(
3953 0 : new_map, new_entry->start, new_entry->end);
3954 0 : }
3955 : }
3956 :
3957 0 : vm_map_unlock(old_map);
3958 0 : vm_map_unlock(new_map);
3959 :
3960 : /*
3961 : * This can actually happen, if multiple entries described a
3962 : * space in which an entry was inherited.
3963 : */
3964 0 : uvm_unmap_detach(&dead, 0);
3965 :
3966 : #ifdef SYSVSHM
3967 0 : if (vm1->vm_shm)
3968 0 : shmfork(vm1, vm2);
3969 : #endif
3970 :
3971 0 : return vm2;
3972 0 : }
3973 :
3974 : /*
3975 : * uvm_map_hint: return the beginning of the best area suitable for
3976 : * creating a new mapping with "prot" protection.
3977 : */
3978 : vaddr_t
3979 0 : uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr,
3980 : vaddr_t maxaddr)
3981 : {
3982 : vaddr_t addr;
3983 : vaddr_t spacing;
3984 :
3985 : #ifdef __i386__
3986 : /*
3987 : * If executable skip first two pages, otherwise start
3988 : * after data + heap region.
3989 : */
3990 : if ((prot & PROT_EXEC) != 0 &&
3991 : (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) {
3992 : addr = (PAGE_SIZE*2) +
3993 : (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1));
3994 : return (round_page(addr));
3995 : }
3996 : #endif
3997 :
3998 : #if defined (__LP64__)
3999 : spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1;
4000 : #else
4001 : spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1;
4002 : #endif
4003 :
4004 : /*
4005 : * Start malloc/mmap after the brk.
4006 : */
4007 0 : addr = (vaddr_t)vm->vm_daddr + BRKSIZ;
4008 0 : addr = MAX(addr, minaddr);
4009 :
4010 0 : if (addr < maxaddr) {
4011 0 : while (spacing > maxaddr - addr)
4012 0 : spacing >>= 1;
4013 : }
4014 0 : addr += arc4random() & spacing;
4015 0 : return (round_page(addr));
4016 : }
4017 :
4018 : /*
4019 : * uvm_map_submap: punch down part of a map into a submap
4020 : *
4021 : * => only the kernel_map is allowed to be submapped
4022 : * => the purpose of submapping is to break up the locking granularity
4023 : * of a larger map
4024 : * => the range specified must have been mapped previously with a uvm_map()
4025 : * call [with uobj==NULL] to create a blank map entry in the main map.
4026 : * [And it had better still be blank!]
4027 : * => maps which contain submaps should never be copied or forked.
4028 : * => to remove a submap, use uvm_unmap() on the main map
4029 : * and then uvm_map_deallocate() the submap.
4030 : * => main map must be unlocked.
4031 : * => submap must have been init'd and have a zero reference count.
4032 : * [need not be locked as we don't actually reference it]
4033 : */
4034 : int
4035 0 : uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
4036 : struct vm_map *submap)
4037 : {
4038 0 : struct vm_map_entry *entry;
4039 : int result;
4040 :
4041 0 : if (start > map->max_offset || end > map->max_offset ||
4042 0 : start < map->min_offset || end < map->min_offset)
4043 0 : return EINVAL;
4044 :
4045 0 : vm_map_lock(map);
4046 :
4047 0 : if (uvm_map_lookup_entry(map, start, &entry)) {
4048 0 : UVM_MAP_CLIP_START(map, entry, start);
4049 0 : UVM_MAP_CLIP_END(map, entry, end);
4050 : } else
4051 0 : entry = NULL;
4052 :
4053 0 : if (entry != NULL &&
4054 0 : entry->start == start && entry->end == end &&
4055 0 : entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
4056 0 : !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
4057 0 : entry->etype |= UVM_ET_SUBMAP;
4058 0 : entry->object.sub_map = submap;
4059 0 : entry->offset = 0;
4060 0 : uvm_map_reference(submap);
4061 : result = 0;
4062 0 : } else
4063 : result = EINVAL;
4064 :
4065 0 : vm_map_unlock(map);
4066 0 : return(result);
4067 0 : }
4068 :
4069 : /*
4070 : * uvm_map_checkprot: check protection in map
4071 : *
4072 : * => must allow specific protection in a fully allocated region.
4073 : * => map mut be read or write locked by caller.
4074 : */
4075 : boolean_t
4076 0 : uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
4077 : vm_prot_t protection)
4078 : {
4079 : struct vm_map_entry *entry;
4080 :
4081 0 : if (start < map->min_offset || end > map->max_offset || start > end)
4082 0 : return FALSE;
4083 0 : if (start == end)
4084 0 : return TRUE;
4085 :
4086 : /*
4087 : * Iterate entries.
4088 : */
4089 0 : for (entry = uvm_map_entrybyaddr(&map->addr, start);
4090 0 : entry != NULL && entry->start < end;
4091 0 : entry = RBT_NEXT(uvm_map_addr, entry)) {
4092 : /* Fail if a hole is found. */
4093 0 : if (UVM_ET_ISHOLE(entry) ||
4094 0 : (entry->end < end && entry->end != VMMAP_FREE_END(entry)))
4095 0 : return FALSE;
4096 :
4097 : /* Check protection. */
4098 0 : if ((entry->protection & protection) != protection)
4099 0 : return FALSE;
4100 : }
4101 0 : return TRUE;
4102 0 : }
4103 :
4104 : /*
4105 : * uvm_map_create: create map
4106 : */
4107 : vm_map_t
4108 0 : uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags)
4109 : {
4110 : vm_map_t map;
4111 :
4112 0 : map = malloc(sizeof *map, M_VMMAP, M_WAITOK);
4113 0 : map->pmap = pmap;
4114 0 : uvm_map_setup(map, min, max, flags);
4115 0 : return (map);
4116 : }
4117 :
4118 : /*
4119 : * uvm_map_deallocate: drop reference to a map
4120 : *
4121 : * => caller must not lock map
4122 : * => we will zap map if ref count goes to zero
4123 : */
4124 : void
4125 0 : uvm_map_deallocate(vm_map_t map)
4126 : {
4127 : int c;
4128 0 : struct uvm_map_deadq dead;
4129 :
4130 0 : c = --map->ref_count;
4131 0 : if (c > 0) {
4132 0 : return;
4133 : }
4134 :
4135 : /*
4136 : * all references gone. unmap and free.
4137 : *
4138 : * No lock required: we are only one to access this map.
4139 : */
4140 0 : TAILQ_INIT(&dead);
4141 : uvm_tree_sanity(map, __FILE__, __LINE__);
4142 0 : uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead,
4143 : TRUE, FALSE);
4144 0 : pmap_destroy(map->pmap);
4145 0 : KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
4146 0 : free(map, M_VMMAP, sizeof *map);
4147 :
4148 0 : uvm_unmap_detach(&dead, 0);
4149 0 : }
4150 :
4151 : /*
4152 : * uvm_map_inherit: set inheritance code for range of addrs in map.
4153 : *
4154 : * => map must be unlocked
4155 : * => note that the inherit code is used during a "fork". see fork
4156 : * code for details.
4157 : */
4158 : int
4159 0 : uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
4160 : vm_inherit_t new_inheritance)
4161 : {
4162 : struct vm_map_entry *entry;
4163 :
4164 0 : switch (new_inheritance) {
4165 : case MAP_INHERIT_NONE:
4166 : case MAP_INHERIT_COPY:
4167 : case MAP_INHERIT_SHARE:
4168 : case MAP_INHERIT_ZERO:
4169 : break;
4170 : default:
4171 0 : return (EINVAL);
4172 : }
4173 :
4174 0 : if (start > end)
4175 0 : return EINVAL;
4176 0 : start = MAX(start, map->min_offset);
4177 0 : end = MIN(end, map->max_offset);
4178 0 : if (start >= end)
4179 0 : return 0;
4180 :
4181 0 : vm_map_lock(map);
4182 :
4183 0 : entry = uvm_map_entrybyaddr(&map->addr, start);
4184 0 : if (entry->end > start)
4185 0 : UVM_MAP_CLIP_START(map, entry, start);
4186 : else
4187 0 : entry = RBT_NEXT(uvm_map_addr, entry);
4188 :
4189 0 : while (entry != NULL && entry->start < end) {
4190 0 : UVM_MAP_CLIP_END(map, entry, end);
4191 0 : entry->inheritance = new_inheritance;
4192 0 : entry = RBT_NEXT(uvm_map_addr, entry);
4193 : }
4194 :
4195 0 : vm_map_unlock(map);
4196 0 : return (0);
4197 0 : }
4198 :
4199 : /*
4200 : * uvm_map_advice: set advice code for range of addrs in map.
4201 : *
4202 : * => map must be unlocked
4203 : */
4204 : int
4205 0 : uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
4206 : {
4207 : struct vm_map_entry *entry;
4208 :
4209 0 : switch (new_advice) {
4210 : case MADV_NORMAL:
4211 : case MADV_RANDOM:
4212 : case MADV_SEQUENTIAL:
4213 : break;
4214 : default:
4215 0 : return (EINVAL);
4216 : }
4217 :
4218 0 : if (start > end)
4219 0 : return EINVAL;
4220 0 : start = MAX(start, map->min_offset);
4221 0 : end = MIN(end, map->max_offset);
4222 0 : if (start >= end)
4223 0 : return 0;
4224 :
4225 0 : vm_map_lock(map);
4226 :
4227 0 : entry = uvm_map_entrybyaddr(&map->addr, start);
4228 0 : if (entry != NULL && entry->end > start)
4229 0 : UVM_MAP_CLIP_START(map, entry, start);
4230 0 : else if (entry!= NULL)
4231 0 : entry = RBT_NEXT(uvm_map_addr, entry);
4232 :
4233 : /*
4234 : * XXXJRT: disallow holes?
4235 : */
4236 0 : while (entry != NULL && entry->start < end) {
4237 0 : UVM_MAP_CLIP_END(map, entry, end);
4238 0 : entry->advice = new_advice;
4239 0 : entry = RBT_NEXT(uvm_map_addr, entry);
4240 : }
4241 :
4242 0 : vm_map_unlock(map);
4243 0 : return (0);
4244 0 : }
4245 :
4246 : /*
4247 : * uvm_map_extract: extract a mapping from a map and put it somewhere
4248 : * in the kernel_map, setting protection to max_prot.
4249 : *
4250 : * => map should be unlocked (we will write lock it and kernel_map)
4251 : * => returns 0 on success, error code otherwise
4252 : * => start must be page aligned
4253 : * => len must be page sized
4254 : * => flags:
4255 : * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
4256 : * Mappings are QREF's.
4257 : */
4258 : int
4259 0 : uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
4260 : vaddr_t *dstaddrp, int flags)
4261 60 : {
4262 0 : struct uvm_map_deadq dead;
4263 0 : struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2;
4264 0 : vaddr_t dstaddr;
4265 : vaddr_t end;
4266 : vaddr_t cp_start;
4267 : vsize_t cp_len, cp_off;
4268 : int error;
4269 :
4270 0 : TAILQ_INIT(&dead);
4271 0 : end = start + len;
4272 :
4273 : /*
4274 : * Sanity check on the parameters.
4275 : * Also, since the mapping may not contain gaps, error out if the
4276 : * mapped area is not in source map.
4277 : */
4278 0 : if ((start & (vaddr_t)PAGE_MASK) != 0 ||
4279 0 : (end & (vaddr_t)PAGE_MASK) != 0 || end < start)
4280 0 : return EINVAL;
4281 0 : if (start < srcmap->min_offset || end > srcmap->max_offset)
4282 0 : return EINVAL;
4283 :
4284 : /* Initialize dead entries. Handle len == 0 case. */
4285 0 : if (len == 0)
4286 0 : return 0;
4287 :
4288 : /* Acquire lock on srcmap. */
4289 0 : vm_map_lock(srcmap);
4290 :
4291 : /* Lock srcmap, lookup first and last entry in <start,len>. */
4292 0 : first = uvm_map_entrybyaddr(&srcmap->addr, start);
4293 :
4294 : /* Check that the range is contiguous. */
4295 0 : for (entry = first; entry != NULL && entry->end < end;
4296 0 : entry = RBT_NEXT(uvm_map_addr, entry)) {
4297 0 : if (VMMAP_FREE_END(entry) != entry->end ||
4298 0 : UVM_ET_ISHOLE(entry)) {
4299 : error = EINVAL;
4300 0 : goto fail;
4301 : }
4302 : }
4303 0 : if (entry == NULL || UVM_ET_ISHOLE(entry)) {
4304 : error = EINVAL;
4305 0 : goto fail;
4306 : }
4307 :
4308 : /*
4309 : * Handle need-copy flag.
4310 : */
4311 120 : for (entry = first; entry != NULL && entry->start < end;
4312 0 : entry = RBT_NEXT(uvm_map_addr, entry)) {
4313 0 : if (UVM_ET_ISNEEDSCOPY(entry))
4314 0 : amap_copy(srcmap, entry, M_NOWAIT,
4315 0 : UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end);
4316 60 : if (UVM_ET_ISNEEDSCOPY(entry)) {
4317 : /*
4318 : * amap_copy failure
4319 : */
4320 : error = ENOMEM;
4321 0 : goto fail;
4322 : }
4323 : }
4324 :
4325 : /* Lock destination map (kernel_map). */
4326 0 : vm_map_lock(kernel_map);
4327 :
4328 0 : if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len,
4329 : MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start),
4330 0 : PROT_NONE, 0) != 0) {
4331 : error = ENOMEM;
4332 0 : goto fail2;
4333 : }
4334 0 : *dstaddrp = dstaddr;
4335 :
4336 : /*
4337 : * We now have srcmap and kernel_map locked.
4338 : * dstaddr contains the destination offset in dstmap.
4339 : */
4340 : /* step 1: start looping through map entries, performing extraction. */
4341 120 : for (entry = first; entry != NULL && entry->start < end;
4342 0 : entry = RBT_NEXT(uvm_map_addr, entry)) {
4343 : KDASSERT(!UVM_ET_ISNEEDSCOPY(entry));
4344 0 : if (UVM_ET_ISHOLE(entry))
4345 : continue;
4346 :
4347 : /* Calculate uvm_mapent_clone parameters. */
4348 : cp_start = entry->start;
4349 0 : if (cp_start < start) {
4350 0 : cp_off = start - cp_start;
4351 : cp_start = start;
4352 0 : } else
4353 : cp_off = 0;
4354 0 : cp_len = MIN(entry->end, end) - cp_start;
4355 :
4356 0 : newentry = uvm_mapent_clone(kernel_map,
4357 0 : cp_start - start + dstaddr, cp_len, cp_off,
4358 0 : entry->protection, entry->max_protection,
4359 : entry, &dead, flags, AMAP_SHARED | AMAP_REFALL);
4360 0 : if (newentry == NULL) {
4361 : error = ENOMEM;
4362 0 : goto fail2_unmap;
4363 : }
4364 0 : kernel_map->size += cp_len;
4365 0 : if (flags & UVM_EXTRACT_FIXPROT)
4366 60 : newentry->protection = newentry->max_protection;
4367 :
4368 : /*
4369 : * Step 2: perform pmap copy.
4370 : * (Doing this in the loop saves one RB traversal.)
4371 : */
4372 : pmap_copy(kernel_map->pmap, srcmap->pmap,
4373 : cp_start - start + dstaddr, cp_len, cp_start);
4374 : }
4375 : pmap_update(kernel_map->pmap);
4376 :
4377 0 : error = 0;
4378 :
4379 : /* Unmap copied entries on failure. */
4380 : fail2_unmap:
4381 0 : if (error) {
4382 0 : uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead,
4383 : FALSE, TRUE);
4384 0 : }
4385 :
4386 : /* Release maps, release dead entries. */
4387 : fail2:
4388 0 : vm_map_unlock(kernel_map);
4389 :
4390 : fail:
4391 0 : vm_map_unlock(srcmap);
4392 :
4393 0 : uvm_unmap_detach(&dead, 0);
4394 :
4395 0 : return error;
4396 0 : }
4397 :
4398 : /*
4399 : * uvm_map_clean: clean out a map range
4400 : *
4401 : * => valid flags:
4402 : * if (flags & PGO_CLEANIT): dirty pages are cleaned first
4403 : * if (flags & PGO_SYNCIO): dirty pages are written synchronously
4404 : * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
4405 : * if (flags & PGO_FREE): any cached pages are freed after clean
4406 : * => returns an error if any part of the specified range isn't mapped
4407 : * => never a need to flush amap layer since the anonymous memory has
4408 : * no permanent home, but may deactivate pages there
4409 : * => called from sys_msync() and sys_madvise()
4410 : * => caller must not write-lock map (read OK).
4411 : * => we may sleep while cleaning if SYNCIO [with map read-locked]
4412 : */
4413 :
4414 : int
4415 0 : uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
4416 : {
4417 : struct vm_map_entry *first, *entry;
4418 : struct vm_amap *amap;
4419 : struct vm_anon *anon;
4420 : struct vm_page *pg;
4421 : struct uvm_object *uobj;
4422 : vaddr_t cp_start, cp_end;
4423 : int refs;
4424 : int error;
4425 : boolean_t rv;
4426 :
4427 0 : KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
4428 : (PGO_FREE|PGO_DEACTIVATE));
4429 :
4430 0 : if (start > end || start < map->min_offset || end > map->max_offset)
4431 0 : return EINVAL;
4432 :
4433 0 : vm_map_lock_read(map);
4434 0 : first = uvm_map_entrybyaddr(&map->addr, start);
4435 :
4436 : /* Make a first pass to check for holes. */
4437 0 : for (entry = first; entry != NULL && entry->start < end;
4438 0 : entry = RBT_NEXT(uvm_map_addr, entry)) {
4439 0 : if (UVM_ET_ISSUBMAP(entry)) {
4440 0 : vm_map_unlock_read(map);
4441 0 : return EINVAL;
4442 : }
4443 0 : if (UVM_ET_ISSUBMAP(entry) ||
4444 0 : UVM_ET_ISHOLE(entry) ||
4445 0 : (entry->end < end &&
4446 0 : VMMAP_FREE_END(entry) != entry->end)) {
4447 0 : vm_map_unlock_read(map);
4448 0 : return EFAULT;
4449 : }
4450 : }
4451 :
4452 : error = 0;
4453 0 : for (entry = first; entry != NULL && entry->start < end;
4454 0 : entry = RBT_NEXT(uvm_map_addr, entry)) {
4455 0 : amap = entry->aref.ar_amap; /* top layer */
4456 0 : if (UVM_ET_ISOBJ(entry))
4457 0 : uobj = entry->object.uvm_obj;
4458 : else
4459 : uobj = NULL;
4460 :
4461 : /*
4462 : * No amap cleaning necessary if:
4463 : * - there's no amap
4464 : * - we're not deactivating or freeing pages.
4465 : */
4466 0 : if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
4467 : goto flush_object;
4468 :
4469 0 : cp_start = MAX(entry->start, start);
4470 0 : cp_end = MIN(entry->end, end);
4471 :
4472 0 : for (; cp_start != cp_end; cp_start += PAGE_SIZE) {
4473 0 : anon = amap_lookup(&entry->aref,
4474 0 : cp_start - entry->start);
4475 0 : if (anon == NULL)
4476 : continue;
4477 :
4478 0 : pg = anon->an_page;
4479 0 : if (pg == NULL) {
4480 : continue;
4481 : }
4482 0 : KASSERT(pg->pg_flags & PQ_ANON);
4483 :
4484 0 : switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
4485 : /*
4486 : * XXX In these first 3 cases, we always just
4487 : * XXX deactivate the page. We may want to
4488 : * XXX handle the different cases more
4489 : * XXX specifically, in the future.
4490 : */
4491 : case PGO_CLEANIT|PGO_FREE:
4492 : case PGO_CLEANIT|PGO_DEACTIVATE:
4493 : case PGO_DEACTIVATE:
4494 : deactivate_it:
4495 : /* skip the page if it's wired */
4496 0 : if (pg->wire_count != 0)
4497 : break;
4498 :
4499 0 : uvm_lock_pageq();
4500 :
4501 0 : KASSERT(pg->uanon == anon);
4502 :
4503 : /* zap all mappings for the page. */
4504 0 : pmap_page_protect(pg, PROT_NONE);
4505 :
4506 : /* ...and deactivate the page. */
4507 0 : uvm_pagedeactivate(pg);
4508 :
4509 0 : uvm_unlock_pageq();
4510 0 : break;
4511 : case PGO_FREE:
4512 : /*
4513 : * If there are multiple references to
4514 : * the amap, just deactivate the page.
4515 : */
4516 0 : if (amap_refs(amap) > 1)
4517 : goto deactivate_it;
4518 :
4519 : /* XXX skip the page if it's wired */
4520 0 : if (pg->wire_count != 0) {
4521 : break;
4522 : }
4523 0 : amap_unadd(&entry->aref,
4524 0 : cp_start - entry->start);
4525 0 : refs = --anon->an_ref;
4526 0 : if (refs == 0)
4527 0 : uvm_anfree(anon);
4528 : break;
4529 : default:
4530 0 : panic("uvm_map_clean: weird flags");
4531 : }
4532 : }
4533 :
4534 : flush_object:
4535 0 : cp_start = MAX(entry->start, start);
4536 0 : cp_end = MIN(entry->end, end);
4537 :
4538 : /*
4539 : * flush pages if we've got a valid backing object.
4540 : *
4541 : * Don't PGO_FREE if we don't have write permission
4542 : * and don't flush if this is a copy-on-write object
4543 : * since we can't know our permissions on it.
4544 : */
4545 0 : if (uobj != NULL &&
4546 0 : ((flags & PGO_FREE) == 0 ||
4547 0 : ((entry->max_protection & PROT_WRITE) != 0 &&
4548 0 : (entry->etype & UVM_ET_COPYONWRITE) == 0))) {
4549 0 : rv = uobj->pgops->pgo_flush(uobj,
4550 0 : cp_start - entry->start + entry->offset,
4551 0 : cp_end - entry->start + entry->offset, flags);
4552 :
4553 0 : if (rv == FALSE)
4554 0 : error = EFAULT;
4555 : }
4556 : }
4557 :
4558 0 : vm_map_unlock_read(map);
4559 0 : return error;
4560 0 : }
4561 :
4562 : /*
4563 : * UVM_MAP_CLIP_END implementation
4564 : */
4565 : void
4566 0 : uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4567 : {
4568 : struct vm_map_entry *tmp;
4569 :
4570 0 : KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4571 0 : tmp = uvm_mapent_alloc(map, 0);
4572 :
4573 : /* Invoke splitentry. */
4574 0 : uvm_map_splitentry(map, entry, tmp, addr);
4575 0 : }
4576 :
4577 : /*
4578 : * UVM_MAP_CLIP_START implementation
4579 : *
4580 : * Clippers are required to not change the pointers to the entry they are
4581 : * clipping on.
4582 : * Since uvm_map_splitentry turns the original entry into the lowest
4583 : * entry (address wise) we do a swap between the new entry and the original
4584 : * entry, prior to calling uvm_map_splitentry.
4585 : */
4586 : void
4587 0 : uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4588 : {
4589 : struct vm_map_entry *tmp;
4590 : struct uvm_addr_state *free;
4591 :
4592 : /* Unlink original. */
4593 0 : free = uvm_map_uaddr_e(map, entry);
4594 0 : uvm_mapent_free_remove(map, free, entry);
4595 0 : uvm_mapent_addr_remove(map, entry);
4596 :
4597 : /* Copy entry. */
4598 0 : KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4599 0 : tmp = uvm_mapent_alloc(map, 0);
4600 0 : uvm_mapent_copy(entry, tmp);
4601 :
4602 : /* Put new entry in place of original entry. */
4603 0 : uvm_mapent_addr_insert(map, tmp);
4604 0 : uvm_mapent_free_insert(map, free, tmp);
4605 :
4606 : /* Invoke splitentry. */
4607 0 : uvm_map_splitentry(map, tmp, entry, addr);
4608 0 : }
4609 :
4610 : /*
4611 : * Boundary fixer.
4612 : */
4613 : static __inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t);
4614 : static __inline vaddr_t
4615 0 : uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound)
4616 : {
4617 60 : return (min < bound && max > bound) ? bound : max;
4618 : }
4619 :
4620 : /*
4621 : * Choose free list based on address at start of free space.
4622 : *
4623 : * The uvm_addr_state returned contains addr and is the first of:
4624 : * - uaddr_exe
4625 : * - uaddr_brk_stack
4626 : * - uaddr_any
4627 : */
4628 : struct uvm_addr_state*
4629 0 : uvm_map_uaddr(struct vm_map *map, vaddr_t addr)
4630 : {
4631 : struct uvm_addr_state *uaddr;
4632 : int i;
4633 :
4634 : /* Special case the first page, to prevent mmap from returning 0. */
4635 660 : if (addr < VMMAP_MIN_ADDR)
4636 0 : return NULL;
4637 :
4638 : /* Upper bound for kernel maps at uvm_maxkaddr. */
4639 0 : if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
4640 0 : if (addr >= uvm_maxkaddr)
4641 0 : return NULL;
4642 : }
4643 :
4644 : /* Is the address inside the exe-only map? */
4645 0 : if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr &&
4646 0 : addr < map->uaddr_exe->uaddr_maxaddr)
4647 0 : return map->uaddr_exe;
4648 :
4649 : /* Check if the space falls inside brk/stack area. */
4650 660 : if ((addr >= map->b_start && addr < map->b_end) ||
4651 660 : (addr >= map->s_start && addr < map->s_end)) {
4652 0 : if (map->uaddr_brk_stack != NULL &&
4653 0 : addr >= map->uaddr_brk_stack->uaddr_minaddr &&
4654 0 : addr < map->uaddr_brk_stack->uaddr_maxaddr) {
4655 0 : return map->uaddr_brk_stack;
4656 : } else
4657 0 : return NULL;
4658 : }
4659 :
4660 : /*
4661 : * Check the other selectors.
4662 : *
4663 : * These selectors are only marked as the owner, if they have insert
4664 : * functions.
4665 : */
4666 0 : for (i = 0; i < nitems(map->uaddr_any); i++) {
4667 660 : uaddr = map->uaddr_any[i];
4668 1980 : if (uaddr == NULL)
4669 : continue;
4670 0 : if (uaddr->uaddr_functions->uaddr_free_insert == NULL)
4671 : continue;
4672 :
4673 660 : if (addr >= uaddr->uaddr_minaddr &&
4674 0 : addr < uaddr->uaddr_maxaddr)
4675 0 : return uaddr;
4676 : }
4677 :
4678 0 : return NULL;
4679 0 : }
4680 :
4681 : /*
4682 : * Choose free list based on address at start of free space.
4683 : *
4684 : * The uvm_addr_state returned contains addr and is the first of:
4685 : * - uaddr_exe
4686 : * - uaddr_brk_stack
4687 : * - uaddr_any
4688 : */
4689 : struct uvm_addr_state*
4690 0 : uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry)
4691 : {
4692 240 : return uvm_map_uaddr(map, VMMAP_FREE_START(entry));
4693 : }
4694 :
4695 : /*
4696 : * Returns the first free-memory boundary that is crossed by [min-max].
4697 : */
4698 : vsize_t
4699 0 : uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max)
4700 : {
4701 : struct uvm_addr_state *uaddr;
4702 : int i;
4703 :
4704 : /* Never return first page. */
4705 0 : max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR);
4706 :
4707 : /* Treat the maxkaddr special, if the map is a kernel_map. */
4708 0 : if ((map->flags & VM_MAP_ISVMSPACE) == 0)
4709 60 : max = uvm_map_boundfix(min, max, uvm_maxkaddr);
4710 :
4711 : /* Check for exe-only boundaries. */
4712 0 : if (map->uaddr_exe != NULL) {
4713 0 : max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr);
4714 0 : max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr);
4715 0 : }
4716 :
4717 : /* Check for exe-only boundaries. */
4718 60 : if (map->uaddr_brk_stack != NULL) {
4719 0 : max = uvm_map_boundfix(min, max,
4720 0 : map->uaddr_brk_stack->uaddr_minaddr);
4721 0 : max = uvm_map_boundfix(min, max,
4722 0 : map->uaddr_brk_stack->uaddr_maxaddr);
4723 0 : }
4724 :
4725 : /* Check other boundaries. */
4726 0 : for (i = 0; i < nitems(map->uaddr_any); i++) {
4727 240 : uaddr = map->uaddr_any[i];
4728 0 : if (uaddr != NULL) {
4729 60 : max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr);
4730 0 : max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr);
4731 0 : }
4732 : }
4733 :
4734 : /* Boundaries at stack and brk() area. */
4735 0 : max = uvm_map_boundfix(min, max, map->s_start);
4736 0 : max = uvm_map_boundfix(min, max, map->s_end);
4737 0 : max = uvm_map_boundfix(min, max, map->b_start);
4738 0 : max = uvm_map_boundfix(min, max, map->b_end);
4739 :
4740 0 : return max;
4741 : }
4742 :
4743 : /*
4744 : * Update map allocation start and end addresses from proc vmspace.
4745 : */
4746 : void
4747 0 : uvm_map_vmspace_update(struct vm_map *map,
4748 : struct uvm_map_deadq *dead, int flags)
4749 : {
4750 : struct vmspace *vm;
4751 : vaddr_t b_start, b_end, s_start, s_end;
4752 :
4753 0 : KASSERT(map->flags & VM_MAP_ISVMSPACE);
4754 : KASSERT(offsetof(struct vmspace, vm_map) == 0);
4755 :
4756 : /*
4757 : * Derive actual allocation boundaries from vmspace.
4758 : */
4759 0 : vm = (struct vmspace *)map;
4760 0 : b_start = (vaddr_t)vm->vm_daddr;
4761 0 : b_end = b_start + BRKSIZ;
4762 0 : s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4763 0 : s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4764 : #ifdef DIAGNOSTIC
4765 0 : if ((b_start & (vaddr_t)PAGE_MASK) != 0 ||
4766 0 : (b_end & (vaddr_t)PAGE_MASK) != 0 ||
4767 0 : (s_start & (vaddr_t)PAGE_MASK) != 0 ||
4768 0 : (s_end & (vaddr_t)PAGE_MASK) != 0) {
4769 0 : panic("uvm_map_vmspace_update: vmspace %p invalid bounds: "
4770 : "b=0x%lx-0x%lx s=0x%lx-0x%lx",
4771 : vm, b_start, b_end, s_start, s_end);
4772 : }
4773 : #endif
4774 :
4775 0 : if (__predict_true(map->b_start == b_start && map->b_end == b_end &&
4776 : map->s_start == s_start && map->s_end == s_end))
4777 0 : return;
4778 :
4779 0 : uvm_map_freelist_update(map, dead, b_start, b_end,
4780 : s_start, s_end, flags);
4781 0 : }
4782 :
4783 : /*
4784 : * Grow kernel memory.
4785 : *
4786 : * This function is only called for kernel maps when an allocation fails.
4787 : *
4788 : * If the map has a gap that is large enough to accommodate alloc_sz, this
4789 : * function will make sure map->free will include it.
4790 : */
4791 : void
4792 0 : uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead,
4793 : vsize_t alloc_sz, int flags)
4794 : {
4795 : vsize_t sz;
4796 : vaddr_t end;
4797 : struct vm_map_entry *entry;
4798 :
4799 : /* Kernel memory only. */
4800 0 : KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0);
4801 : /* Destroy free list. */
4802 0 : uvm_map_freelist_update_clear(map, dead);
4803 :
4804 : /* Include the guard page in the hard minimum requirement of alloc_sz. */
4805 0 : if (map->flags & VM_MAP_GUARDPAGES)
4806 0 : alloc_sz += PAGE_SIZE;
4807 :
4808 : /*
4809 : * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA.
4810 : *
4811 : * Don't handle the case where the multiplication overflows:
4812 : * if that happens, the allocation is probably too big anyway.
4813 : */
4814 0 : sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA);
4815 :
4816 : /*
4817 : * Walk forward until a gap large enough for alloc_sz shows up.
4818 : *
4819 : * We assume the kernel map has no boundaries.
4820 : * uvm_maxkaddr may be zero.
4821 : */
4822 0 : end = MAX(uvm_maxkaddr, map->min_offset);
4823 0 : entry = uvm_map_entrybyaddr(&map->addr, end);
4824 0 : while (entry && entry->fspace < alloc_sz)
4825 0 : entry = RBT_NEXT(uvm_map_addr, entry);
4826 0 : if (entry) {
4827 0 : end = MAX(VMMAP_FREE_START(entry), end);
4828 0 : end += MIN(sz, map->max_offset - end);
4829 0 : } else
4830 0 : end = map->max_offset;
4831 :
4832 : /* Reserve pmap entries. */
4833 : #ifdef PMAP_GROWKERNEL
4834 0 : uvm_maxkaddr = pmap_growkernel(end);
4835 : #else
4836 : uvm_maxkaddr = MAX(uvm_maxkaddr, end);
4837 : #endif
4838 :
4839 : /* Rebuild free list. */
4840 0 : uvm_map_freelist_update_refill(map, flags);
4841 0 : }
4842 :
4843 : /*
4844 : * Freelist update subfunction: unlink all entries from freelists.
4845 : */
4846 : void
4847 0 : uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead)
4848 : {
4849 : struct uvm_addr_state *free;
4850 : struct vm_map_entry *entry, *prev, *next;
4851 :
4852 : prev = NULL;
4853 0 : for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL;
4854 : entry = next) {
4855 0 : next = RBT_NEXT(uvm_map_addr, entry);
4856 :
4857 0 : free = uvm_map_uaddr_e(map, entry);
4858 0 : uvm_mapent_free_remove(map, free, entry);
4859 :
4860 0 : if (prev != NULL && entry->start == entry->end) {
4861 0 : prev->fspace += VMMAP_FREE_END(entry) - entry->end;
4862 0 : uvm_mapent_addr_remove(map, entry);
4863 0 : DEAD_ENTRY_PUSH(dead, entry);
4864 0 : } else
4865 : prev = entry;
4866 : }
4867 0 : }
4868 :
4869 : /*
4870 : * Freelist update subfunction: refill the freelists with entries.
4871 : */
4872 : void
4873 0 : uvm_map_freelist_update_refill(struct vm_map *map, int flags)
4874 : {
4875 : struct vm_map_entry *entry;
4876 : vaddr_t min, max;
4877 :
4878 0 : RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
4879 0 : min = VMMAP_FREE_START(entry);
4880 0 : max = VMMAP_FREE_END(entry);
4881 0 : entry->fspace = 0;
4882 :
4883 0 : entry = uvm_map_fix_space(map, entry, min, max, flags);
4884 : }
4885 :
4886 : uvm_tree_sanity(map, __FILE__, __LINE__);
4887 0 : }
4888 :
4889 : /*
4890 : * Change {a,b}_{start,end} allocation ranges and associated free lists.
4891 : */
4892 : void
4893 0 : uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead,
4894 : vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags)
4895 : {
4896 : KDASSERT(b_end >= b_start && s_end >= s_start);
4897 :
4898 : /* Clear all free lists. */
4899 0 : uvm_map_freelist_update_clear(map, dead);
4900 :
4901 : /* Apply new bounds. */
4902 0 : map->b_start = b_start;
4903 0 : map->b_end = b_end;
4904 0 : map->s_start = s_start;
4905 0 : map->s_end = s_end;
4906 :
4907 : /* Refill free lists. */
4908 0 : uvm_map_freelist_update_refill(map, flags);
4909 0 : }
4910 :
4911 : /*
4912 : * Assign a uvm_addr_state to the specified pointer in vm_map.
4913 : *
4914 : * May sleep.
4915 : */
4916 : void
4917 0 : uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which,
4918 : struct uvm_addr_state *newval)
4919 : {
4920 0 : struct uvm_map_deadq dead;
4921 :
4922 : /* Pointer which must be in this map. */
4923 0 : KASSERT(which != NULL);
4924 0 : KASSERT((void*)map <= (void*)(which) &&
4925 : (void*)(which) < (void*)(map + 1));
4926 :
4927 0 : vm_map_lock(map);
4928 0 : TAILQ_INIT(&dead);
4929 0 : uvm_map_freelist_update_clear(map, &dead);
4930 :
4931 0 : uvm_addr_destroy(*which);
4932 0 : *which = newval;
4933 :
4934 0 : uvm_map_freelist_update_refill(map, 0);
4935 0 : vm_map_unlock(map);
4936 0 : uvm_unmap_detach(&dead, 0);
4937 0 : }
4938 :
4939 : /*
4940 : * Correct space insert.
4941 : *
4942 : * Entry must not be on any freelist.
4943 : */
4944 : struct vm_map_entry*
4945 0 : uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry,
4946 : vaddr_t min, vaddr_t max, int flags)
4947 : {
4948 : struct uvm_addr_state *free, *entfree;
4949 : vaddr_t lmax;
4950 :
4951 180 : KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0);
4952 : KDASSERT(min <= max);
4953 : KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) ||
4954 : min == map->min_offset);
4955 :
4956 : /*
4957 : * During the function, entfree will always point at the uaddr state
4958 : * for entry.
4959 : */
4960 0 : entfree = (entry == NULL ? NULL :
4961 0 : uvm_map_uaddr_e(map, entry));
4962 :
4963 60 : while (min != max) {
4964 : /* Claim guard page for entry. */
4965 60 : if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL &&
4966 0 : VMMAP_FREE_END(entry) == entry->end &&
4967 0 : entry->start != entry->end) {
4968 0 : if (max - min == 2 * PAGE_SIZE) {
4969 : /*
4970 : * If the free-space gap is exactly 2 pages,
4971 : * we make the guard 2 pages instead of 1.
4972 : * Because in a guarded map, an area needs
4973 : * at least 2 pages to allocate from:
4974 : * one page for the allocation and one for
4975 : * the guard.
4976 : */
4977 0 : entry->guard = 2 * PAGE_SIZE;
4978 : min = max;
4979 0 : } else {
4980 0 : entry->guard = PAGE_SIZE;
4981 0 : min += PAGE_SIZE;
4982 : }
4983 0 : continue;
4984 : }
4985 :
4986 : /*
4987 : * Handle the case where entry has a 2-page guard, but the
4988 : * space after entry is freed.
4989 : */
4990 0 : if (entry != NULL && entry->fspace == 0 &&
4991 0 : entry->guard > PAGE_SIZE) {
4992 0 : entry->guard = PAGE_SIZE;
4993 0 : min = VMMAP_FREE_START(entry);
4994 0 : }
4995 :
4996 60 : lmax = uvm_map_boundary(map, min, max);
4997 0 : free = uvm_map_uaddr(map, min);
4998 :
4999 : /*
5000 : * Entries are merged if they point at the same uvm_free().
5001 : * Exception to that rule: if min == uvm_maxkaddr, a new
5002 : * entry is started regardless (otherwise the allocators
5003 : * will get confused).
5004 : */
5005 0 : if (entry != NULL && free == entfree &&
5006 0 : !((map->flags & VM_MAP_ISVMSPACE) == 0 &&
5007 0 : min == uvm_maxkaddr)) {
5008 : KDASSERT(VMMAP_FREE_END(entry) == min);
5009 60 : entry->fspace += lmax - min;
5010 0 : } else {
5011 : /*
5012 : * Commit entry to free list: it'll not be added to
5013 : * anymore.
5014 : * We'll start a new entry and add to that entry
5015 : * instead.
5016 : */
5017 0 : if (entry != NULL)
5018 0 : uvm_mapent_free_insert(map, entfree, entry);
5019 :
5020 : /* New entry for new uaddr. */
5021 0 : entry = uvm_mapent_alloc(map, flags);
5022 : KDASSERT(entry != NULL);
5023 0 : entry->end = entry->start = min;
5024 0 : entry->guard = 0;
5025 0 : entry->fspace = lmax - min;
5026 0 : entry->object.uvm_obj = NULL;
5027 0 : entry->offset = 0;
5028 0 : entry->etype = 0;
5029 0 : entry->protection = entry->max_protection = 0;
5030 0 : entry->inheritance = 0;
5031 0 : entry->wired_count = 0;
5032 0 : entry->advice = 0;
5033 0 : entry->aref.ar_pageoff = 0;
5034 0 : entry->aref.ar_amap = NULL;
5035 0 : uvm_mapent_addr_insert(map, entry);
5036 :
5037 : entfree = free;
5038 : }
5039 :
5040 : min = lmax;
5041 : }
5042 : /* Finally put entry on the uaddr state. */
5043 300 : if (entry != NULL)
5044 0 : uvm_mapent_free_insert(map, entfree, entry);
5045 :
5046 0 : return entry;
5047 : }
5048 :
5049 : /*
5050 : * MQuery style of allocation.
5051 : *
5052 : * This allocator searches forward until sufficient space is found to map
5053 : * the given size.
5054 : *
5055 : * XXX: factor in offset (via pmap_prefer) and protection?
5056 : */
5057 : int
5058 0 : uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset,
5059 : int flags)
5060 : {
5061 0 : struct vm_map_entry *entry, *last;
5062 : vaddr_t addr;
5063 : vaddr_t tmp, pmap_align, pmap_offset;
5064 : int error;
5065 :
5066 0 : addr = *addr_p;
5067 0 : vm_map_lock_read(map);
5068 :
5069 : /* Configure pmap prefer. */
5070 : if (offset != UVM_UNKNOWN_OFFSET) {
5071 : pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN());
5072 : pmap_offset = PMAP_PREFER_OFFSET(offset);
5073 : } else {
5074 : pmap_align = PAGE_SIZE;
5075 : pmap_offset = 0;
5076 : }
5077 :
5078 : /* Align address to pmap_prefer unless FLAG_FIXED is set. */
5079 0 : if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) {
5080 0 : tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5081 0 : if (tmp < addr)
5082 0 : tmp += pmap_align;
5083 : addr = tmp;
5084 0 : }
5085 :
5086 : /* First, check if the requested range is fully available. */
5087 0 : entry = uvm_map_entrybyaddr(&map->addr, addr);
5088 0 : last = NULL;
5089 0 : if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5090 : error = 0;
5091 0 : goto out;
5092 : }
5093 0 : if (flags & UVM_FLAG_FIXED) {
5094 : error = EINVAL;
5095 0 : goto out;
5096 : }
5097 :
5098 : error = ENOMEM; /* Default error from here. */
5099 :
5100 : /*
5101 : * At this point, the memory at <addr, sz> is not available.
5102 : * The reasons are:
5103 : * [1] it's outside the map,
5104 : * [2] it starts in used memory (and therefore needs to move
5105 : * toward the first free page in entry),
5106 : * [3] it starts in free memory but bumps into used memory.
5107 : *
5108 : * Note that for case [2], the forward moving is handled by the
5109 : * for loop below.
5110 : */
5111 0 : if (entry == NULL) {
5112 : /* [1] Outside the map. */
5113 0 : if (addr >= map->max_offset)
5114 : goto out;
5115 : else
5116 0 : entry = RBT_MIN(uvm_map_addr, &map->addr);
5117 0 : } else if (VMMAP_FREE_START(entry) <= addr) {
5118 : /* [3] Bumped into used memory. */
5119 0 : entry = RBT_NEXT(uvm_map_addr, entry);
5120 0 : }
5121 :
5122 : /* Test if the next entry is sufficient for the allocation. */
5123 0 : for (; entry != NULL;
5124 0 : entry = RBT_NEXT(uvm_map_addr, entry)) {
5125 0 : if (entry->fspace == 0)
5126 : continue;
5127 0 : addr = VMMAP_FREE_START(entry);
5128 :
5129 : restart: /* Restart address checks on address change. */
5130 0 : tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5131 0 : if (tmp < addr)
5132 0 : tmp += pmap_align;
5133 : addr = tmp;
5134 0 : if (addr >= VMMAP_FREE_END(entry))
5135 : continue;
5136 :
5137 : /* Skip brk() allocation addresses. */
5138 0 : if (addr + sz > map->b_start && addr < map->b_end) {
5139 0 : if (VMMAP_FREE_END(entry) > map->b_end) {
5140 : addr = map->b_end;
5141 0 : goto restart;
5142 : } else
5143 : continue;
5144 : }
5145 : /* Skip stack allocation addresses. */
5146 0 : if (addr + sz > map->s_start && addr < map->s_end) {
5147 0 : if (VMMAP_FREE_END(entry) > map->s_end) {
5148 : addr = map->s_end;
5149 0 : goto restart;
5150 : } else
5151 : continue;
5152 : }
5153 :
5154 0 : last = NULL;
5155 0 : if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5156 : error = 0;
5157 0 : goto out;
5158 : }
5159 : }
5160 :
5161 : out:
5162 0 : vm_map_unlock_read(map);
5163 0 : if (error == 0)
5164 0 : *addr_p = addr;
5165 0 : return error;
5166 0 : }
5167 :
5168 : /*
5169 : * Determine allocation bias.
5170 : *
5171 : * Returns 1 if we should bias to high addresses, -1 for a bias towards low
5172 : * addresses, or 0 for no bias.
5173 : * The bias mechanism is intended to avoid clashing with brk() and stack
5174 : * areas.
5175 : */
5176 : int
5177 0 : uvm_mapent_bias(struct vm_map *map, struct vm_map_entry *entry)
5178 : {
5179 : vaddr_t start, end;
5180 :
5181 0 : start = VMMAP_FREE_START(entry);
5182 0 : end = VMMAP_FREE_END(entry);
5183 :
5184 : /* Stay at the top of brk() area. */
5185 0 : if (end >= map->b_start && start < map->b_end)
5186 0 : return 1;
5187 : /* Stay at the far end of the stack area. */
5188 0 : if (end >= map->s_start && start < map->s_end) {
5189 : #ifdef MACHINE_STACK_GROWS_UP
5190 : return 1;
5191 : #else
5192 0 : return -1;
5193 : #endif
5194 : }
5195 :
5196 : /* No bias, this area is meant for us. */
5197 0 : return 0;
5198 0 : }
5199 :
5200 :
5201 : boolean_t
5202 0 : vm_map_lock_try_ln(struct vm_map *map, char *file, int line)
5203 : {
5204 : boolean_t rv;
5205 :
5206 0 : if (map->flags & VM_MAP_INTRSAFE) {
5207 0 : rv = _mtx_enter_try(&map->mtx LOCK_FL_ARGS);
5208 0 : } else {
5209 0 : mtx_enter(&map->flags_lock);
5210 0 : if (map->flags & VM_MAP_BUSY) {
5211 : mtx_leave(&map->flags_lock);
5212 0 : return (FALSE);
5213 : }
5214 : mtx_leave(&map->flags_lock);
5215 0 : rv = (_rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP LOCK_FL_ARGS)
5216 0 : == 0);
5217 : /* check if the lock is busy and back out if we won the race */
5218 0 : if (rv) {
5219 0 : mtx_enter(&map->flags_lock);
5220 0 : if (map->flags & VM_MAP_BUSY) {
5221 0 : _rw_exit(&map->lock LOCK_FL_ARGS);
5222 : rv = FALSE;
5223 0 : }
5224 0 : mtx_leave(&map->flags_lock);
5225 0 : }
5226 : }
5227 :
5228 0 : if (rv) {
5229 0 : map->timestamp++;
5230 : LPRINTF(("map lock: %p (at %s %d)\n", map, file, line));
5231 : uvm_tree_sanity(map, file, line);
5232 : uvm_tree_size_chk(map, file, line);
5233 0 : }
5234 :
5235 0 : return (rv);
5236 0 : }
5237 :
5238 : void
5239 0 : vm_map_lock_ln(struct vm_map *map, char *file, int line)
5240 : {
5241 180 : if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5242 0 : do {
5243 0 : mtx_enter(&map->flags_lock);
5244 : tryagain:
5245 180 : while (map->flags & VM_MAP_BUSY) {
5246 0 : map->flags |= VM_MAP_WANTLOCK;
5247 0 : msleep(&map->flags, &map->flags_lock,
5248 : PVM, vmmapbsy, 0);
5249 : }
5250 0 : mtx_leave(&map->flags_lock);
5251 0 : } while (_rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL
5252 0 : LOCK_FL_ARGS) != 0);
5253 : /* check if the lock is busy and back out if we won the race */
5254 0 : mtx_enter(&map->flags_lock);
5255 0 : if (map->flags & VM_MAP_BUSY) {
5256 0 : _rw_exit(&map->lock LOCK_FL_ARGS);
5257 0 : goto tryagain;
5258 : }
5259 180 : mtx_leave(&map->flags_lock);
5260 0 : } else {
5261 0 : _mtx_enter(&map->mtx LOCK_FL_ARGS);
5262 : }
5263 :
5264 0 : map->timestamp++;
5265 : LPRINTF(("map lock: %p (at %s %d)\n", map, file, line));
5266 : uvm_tree_sanity(map, file, line);
5267 : uvm_tree_size_chk(map, file, line);
5268 0 : }
5269 :
5270 : void
5271 0 : vm_map_lock_read_ln(struct vm_map *map, char *file, int line)
5272 : {
5273 74 : if ((map->flags & VM_MAP_INTRSAFE) == 0)
5274 73 : _rw_enter_read(&map->lock LOCK_FL_ARGS);
5275 : else
5276 0 : _mtx_enter(&map->mtx LOCK_FL_ARGS);
5277 : LPRINTF(("map lock: %p (at %s %d)\n", map, file, line));
5278 : uvm_tree_sanity(map, file, line);
5279 : uvm_tree_size_chk(map, file, line);
5280 0 : }
5281 :
5282 : void
5283 0 : vm_map_unlock_ln(struct vm_map *map, char *file, int line)
5284 : {
5285 : uvm_tree_sanity(map, file, line);
5286 : uvm_tree_size_chk(map, file, line);
5287 : LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5288 60 : if ((map->flags & VM_MAP_INTRSAFE) == 0)
5289 180 : _rw_exit(&map->lock LOCK_FL_ARGS);
5290 : else
5291 0 : _mtx_leave(&map->mtx LOCK_FL_ARGS);
5292 0 : }
5293 :
5294 : void
5295 0 : vm_map_unlock_read_ln(struct vm_map *map, char *file, int line)
5296 : {
5297 : /* XXX: RO */ uvm_tree_sanity(map, file, line);
5298 : /* XXX: RO */ uvm_tree_size_chk(map, file, line);
5299 : LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5300 60 : if ((map->flags & VM_MAP_INTRSAFE) == 0)
5301 60 : _rw_exit_read(&map->lock LOCK_FL_ARGS);
5302 : else
5303 0 : _mtx_leave(&map->mtx LOCK_FL_ARGS);
5304 0 : }
5305 :
5306 : void
5307 0 : vm_map_downgrade_ln(struct vm_map *map, char *file, int line)
5308 : {
5309 : uvm_tree_sanity(map, file, line);
5310 : uvm_tree_size_chk(map, file, line);
5311 : LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5312 : LPRINTF(("map lock: %p (at %s %d)\n", map, file, line));
5313 0 : KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5314 0 : if ((map->flags & VM_MAP_INTRSAFE) == 0)
5315 0 : _rw_enter(&map->lock, RW_DOWNGRADE LOCK_FL_ARGS);
5316 0 : }
5317 :
5318 : void
5319 0 : vm_map_upgrade_ln(struct vm_map *map, char *file, int line)
5320 : {
5321 : /* XXX: RO */ uvm_tree_sanity(map, file, line);
5322 : /* XXX: RO */ uvm_tree_size_chk(map, file, line);
5323 : LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5324 0 : KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5325 0 : if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5326 0 : _rw_exit_read(&map->lock LOCK_FL_ARGS);
5327 0 : _rw_enter_write(&map->lock LOCK_FL_ARGS);
5328 0 : }
5329 : LPRINTF(("map lock: %p (at %s %d)\n", map, file, line));
5330 : uvm_tree_sanity(map, file, line);
5331 0 : }
5332 :
5333 : void
5334 0 : vm_map_busy_ln(struct vm_map *map, char *file, int line)
5335 : {
5336 0 : KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5337 0 : mtx_enter(&map->flags_lock);
5338 0 : map->flags |= VM_MAP_BUSY;
5339 0 : mtx_leave(&map->flags_lock);
5340 0 : }
5341 :
5342 : void
5343 0 : vm_map_unbusy_ln(struct vm_map *map, char *file, int line)
5344 : {
5345 : int oflags;
5346 :
5347 0 : KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5348 0 : mtx_enter(&map->flags_lock);
5349 0 : oflags = map->flags;
5350 0 : map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK);
5351 0 : mtx_leave(&map->flags_lock);
5352 0 : if (oflags & VM_MAP_WANTLOCK)
5353 0 : wakeup(&map->flags);
5354 0 : }
5355 :
5356 : #ifndef SMALL_KERNEL
5357 : int
5358 0 : uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve,
5359 : size_t *lenp)
5360 : {
5361 : struct vm_map_entry *entry;
5362 : vaddr_t start;
5363 : int cnt, maxcnt, error = 0;
5364 :
5365 0 : KASSERT(*lenp > 0);
5366 0 : KASSERT((*lenp % sizeof(*kve)) == 0);
5367 : cnt = 0;
5368 0 : maxcnt = *lenp / sizeof(*kve);
5369 0 : KASSERT(maxcnt > 0);
5370 :
5371 : /*
5372 : * Return only entries whose address is above the given base
5373 : * address. This allows userland to iterate without knowing the
5374 : * number of entries beforehand.
5375 : */
5376 0 : start = (vaddr_t)kve[0].kve_start;
5377 :
5378 0 : vm_map_lock(map);
5379 0 : RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
5380 0 : if (cnt == maxcnt) {
5381 : error = ENOMEM;
5382 0 : break;
5383 : }
5384 0 : if (start != 0 && entry->start < start)
5385 : continue;
5386 0 : kve->kve_start = entry->start;
5387 0 : kve->kve_end = entry->end;
5388 0 : kve->kve_guard = entry->guard;
5389 0 : kve->kve_fspace = entry->fspace;
5390 0 : kve->kve_fspace_augment = entry->fspace_augment;
5391 0 : kve->kve_offset = entry->offset;
5392 0 : kve->kve_wired_count = entry->wired_count;
5393 0 : kve->kve_etype = entry->etype;
5394 0 : kve->kve_protection = entry->protection;
5395 0 : kve->kve_max_protection = entry->max_protection;
5396 0 : kve->kve_advice = entry->advice;
5397 0 : kve->kve_inheritance = entry->inheritance;
5398 0 : kve->kve_flags = entry->flags;
5399 0 : kve++;
5400 0 : cnt++;
5401 0 : }
5402 0 : vm_map_unlock(map);
5403 :
5404 0 : KASSERT(cnt <= maxcnt);
5405 :
5406 0 : *lenp = sizeof(*kve) * cnt;
5407 0 : return error;
5408 : }
5409 : #endif
5410 :
5411 :
5412 120 : RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry,
5413 : uvm_mapentry_addrcmp, uvm_map_addr_augment);
5414 :
5415 :
5416 : /*
5417 : * MD code: vmspace allocator setup.
5418 : */
5419 :
5420 : #ifdef __i386__
5421 : void
5422 : uvm_map_setup_md(struct vm_map *map)
5423 : {
5424 : vaddr_t min, max;
5425 :
5426 : min = map->min_offset;
5427 : max = map->max_offset;
5428 :
5429 : /*
5430 : * Ensure the selectors will not try to manage page 0;
5431 : * it's too special.
5432 : */
5433 : if (min < VMMAP_MIN_ADDR)
5434 : min = VMMAP_MIN_ADDR;
5435 :
5436 : #if 0 /* Cool stuff, not yet */
5437 : /* Executable code is special. */
5438 : map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR);
5439 : /* Place normal allocations beyond executable mappings. */
5440 : map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max);
5441 : #else /* Crappy stuff, for now */
5442 : map->uaddr_any[0] = uaddr_rnd_create(min, max);
5443 : #endif
5444 :
5445 : #ifndef SMALL_KERNEL
5446 : map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5447 : #endif /* !SMALL_KERNEL */
5448 : }
5449 : #elif __LP64__
5450 : void
5451 0 : uvm_map_setup_md(struct vm_map *map)
5452 : {
5453 : vaddr_t min, max;
5454 :
5455 0 : min = map->min_offset;
5456 0 : max = map->max_offset;
5457 :
5458 : /*
5459 : * Ensure the selectors will not try to manage page 0;
5460 : * it's too special.
5461 : */
5462 0 : if (min < VMMAP_MIN_ADDR)
5463 0 : min = VMMAP_MIN_ADDR;
5464 :
5465 : #if 0 /* Cool stuff, not yet */
5466 : map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max);
5467 : #else /* Crappy stuff, for now */
5468 0 : map->uaddr_any[0] = uaddr_rnd_create(min, max);
5469 : #endif
5470 :
5471 : #ifndef SMALL_KERNEL
5472 0 : map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5473 : #endif /* !SMALL_KERNEL */
5474 0 : }
5475 : #else /* non-i386, 32 bit */
5476 : void
5477 : uvm_map_setup_md(struct vm_map *map)
5478 : {
5479 : vaddr_t min, max;
5480 :
5481 : min = map->min_offset;
5482 : max = map->max_offset;
5483 :
5484 : /*
5485 : * Ensure the selectors will not try to manage page 0;
5486 : * it's too special.
5487 : */
5488 : if (min < VMMAP_MIN_ADDR)
5489 : min = VMMAP_MIN_ADDR;
5490 :
5491 : #if 0 /* Cool stuff, not yet */
5492 : map->uaddr_any[3] = uaddr_pivot_create(min, max);
5493 : #else /* Crappy stuff, for now */
5494 : map->uaddr_any[0] = uaddr_rnd_create(min, max);
5495 : #endif
5496 :
5497 : #ifndef SMALL_KERNEL
5498 : map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5499 : #endif /* !SMALL_KERNEL */
5500 : }
5501 : #endif
|