Line data Source code
1 : /* $OpenBSD: uvm_addr.c,v 1.27 2018/05/16 09:02:11 otto Exp $ */
2 :
3 : /*
4 : * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5 : *
6 : * Permission to use, copy, modify, and distribute this software for any
7 : * purpose with or without fee is hereby granted, provided that the above
8 : * copyright notice and this permission notice appear in all copies.
9 : *
10 : * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 : * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 : * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 : * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 : * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 : * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 : * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 : */
18 :
19 : /* #define DEBUG */
20 :
21 : #include <sys/param.h>
22 : #include <uvm/uvm.h>
23 : #include <uvm/uvm_addr.h>
24 : #include <sys/pool.h>
25 :
26 : /* Max gap between hint allocations. */
27 : #define UADDR_HINT_MAXGAP (4 * PAGE_SIZE)
28 : /* Number of pivots in pivot allocator. */
29 : #define NUM_PIVOTS 16
30 : /*
31 : * Max number (inclusive) of pages the pivot allocator
32 : * will place between allocations.
33 : *
34 : * The uaddr_pivot_random() function attempts to bias towards
35 : * small space between allocations, so putting a large number here is fine.
36 : */
37 : #define PIVOT_RND 8
38 : /*
39 : * Number of allocations that a pivot can supply before expiring.
40 : * When a pivot expires, a new pivot has to be found.
41 : *
42 : * Must be at least 1.
43 : */
44 : #define PIVOT_EXPIRE 1024
45 :
46 :
47 : /* Pool with uvm_addr_state structures. */
48 : struct pool uaddr_pool;
49 : struct pool uaddr_bestfit_pool;
50 : struct pool uaddr_pivot_pool;
51 : struct pool uaddr_rnd_pool;
52 :
53 : /* uvm_addr state for bestfit selector. */
54 : struct uaddr_bestfit_state {
55 : struct uvm_addr_state ubf_uaddr;
56 : struct uaddr_free_rbtree ubf_free;
57 : };
58 :
59 : /* uvm_addr state for rnd selector. */
60 : struct uaddr_rnd_state {
61 : struct uvm_addr_state ur_uaddr;
62 : #if 0
63 : TAILQ_HEAD(, vm_map_entry) ur_free;
64 : #endif
65 : };
66 :
67 : /* Definition of a pivot in pivot selector. */
68 : struct uaddr_pivot {
69 : vaddr_t addr; /* End of prev. allocation. */
70 : int expire;/* Best before date. */
71 : int dir; /* Direction. */
72 : struct vm_map_entry *entry; /* Will contain next alloc. */
73 : };
74 : /* uvm_addr state for pivot selector. */
75 : struct uaddr_pivot_state {
76 : struct uvm_addr_state up_uaddr;
77 :
78 : /* Free space tree, for fast pivot selection. */
79 : struct uaddr_free_rbtree up_free;
80 :
81 : /* List of pivots. The pointers point to after the last allocation. */
82 : struct uaddr_pivot up_pivots[NUM_PIVOTS];
83 : };
84 :
85 : /* Forward declaration (see below). */
86 : extern const struct uvm_addr_functions uaddr_kernel_functions;
87 : struct uvm_addr_state uaddr_kbootstrap;
88 :
89 : /* Support functions. */
90 : #ifndef SMALL_KERNEL
91 : struct vm_map_entry *uvm_addr_entrybyspace(struct uaddr_free_rbtree*,
92 : vsize_t);
93 : #endif /* !SMALL_KERNEL */
94 : void uaddr_kinsert(struct vm_map *,
95 : struct uvm_addr_state *, struct vm_map_entry *);
96 : void uaddr_kremove(struct vm_map *,
97 : struct uvm_addr_state *, struct vm_map_entry *);
98 : void uaddr_kbootstrapdestroy(struct uvm_addr_state *);
99 :
100 : void uaddr_destroy(struct uvm_addr_state *);
101 : void uaddr_kbootstrap_destroy(struct uvm_addr_state *);
102 : void uaddr_rnd_destroy(struct uvm_addr_state *);
103 : void uaddr_bestfit_destroy(struct uvm_addr_state *);
104 : void uaddr_pivot_destroy(struct uvm_addr_state *);
105 :
106 : #if 0
107 : int uaddr_lin_select(struct vm_map *,
108 : struct uvm_addr_state *, struct vm_map_entry **,
109 : vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
110 : vaddr_t);
111 : #endif
112 : int uaddr_kbootstrap_select(struct vm_map *,
113 : struct uvm_addr_state *, struct vm_map_entry **,
114 : vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
115 : vaddr_t);
116 : int uaddr_rnd_select(struct vm_map *,
117 : struct uvm_addr_state *, struct vm_map_entry **,
118 : vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
119 : vaddr_t);
120 : int uaddr_bestfit_select(struct vm_map *,
121 : struct uvm_addr_state*, struct vm_map_entry **,
122 : vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
123 : vaddr_t);
124 : #ifndef SMALL_KERNEL
125 : int uaddr_pivot_select(struct vm_map *,
126 : struct uvm_addr_state *, struct vm_map_entry **,
127 : vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
128 : vaddr_t);
129 : int uaddr_stack_brk_select(struct vm_map *,
130 : struct uvm_addr_state *, struct vm_map_entry **,
131 : vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
132 : vaddr_t);
133 : #endif /* !SMALL_KERNEL */
134 :
135 : void uaddr_rnd_insert(struct vm_map *,
136 : struct uvm_addr_state *, struct vm_map_entry *);
137 : void uaddr_rnd_remove(struct vm_map *,
138 : struct uvm_addr_state *, struct vm_map_entry *);
139 : void uaddr_bestfit_insert(struct vm_map *,
140 : struct uvm_addr_state *, struct vm_map_entry *);
141 : void uaddr_bestfit_remove(struct vm_map *,
142 : struct uvm_addr_state *, struct vm_map_entry *);
143 : void uaddr_pivot_insert(struct vm_map *,
144 : struct uvm_addr_state *, struct vm_map_entry *);
145 : void uaddr_pivot_remove(struct vm_map *,
146 : struct uvm_addr_state *, struct vm_map_entry *);
147 :
148 : #ifndef SMALL_KERNEL
149 : vsize_t uaddr_pivot_random(void);
150 : int uaddr_pivot_newpivot(struct vm_map *,
151 : struct uaddr_pivot_state *, struct uaddr_pivot *,
152 : struct vm_map_entry **, vaddr_t *,
153 : vsize_t, vaddr_t, vaddr_t, vsize_t, vsize_t);
154 : #endif /* !SMALL_KERNEL */
155 :
156 : #if defined(DEBUG) || defined(DDB)
157 : void uaddr_pivot_print(struct uvm_addr_state *, boolean_t,
158 : int (*)(const char *, ...));
159 : #if 0
160 : void uaddr_rnd_print(struct uvm_addr_state *, boolean_t,
161 : int (*)(const char *, ...));
162 : #endif
163 : #endif /* DEBUG || DDB */
164 :
165 :
166 : #ifndef SMALL_KERNEL
167 : /*
168 : * Find smallest entry in tree that will fit sz bytes.
169 : */
170 : struct vm_map_entry *
171 0 : uvm_addr_entrybyspace(struct uaddr_free_rbtree *free, vsize_t sz)
172 : {
173 : struct vm_map_entry *tmp, *res;
174 :
175 0 : tmp = RBT_ROOT(uaddr_free_rbtree, free);
176 : res = NULL;
177 370 : while (tmp) {
178 0 : if (tmp->fspace >= sz) {
179 : res = tmp;
180 0 : tmp = RBT_LEFT(uaddr_free_rbtree, tmp);
181 0 : } else if (tmp->fspace < sz)
182 0 : tmp = RBT_RIGHT(uaddr_free_rbtree, tmp);
183 : }
184 0 : return res;
185 : }
186 : #endif /* !SMALL_KERNEL */
187 :
188 : static __inline vaddr_t
189 0 : uvm_addr_align_forward(vaddr_t addr, vaddr_t align, vaddr_t offset)
190 : {
191 : vaddr_t adjusted;
192 :
193 0 : KASSERT(offset < align || (align == 0 && offset == 0));
194 0 : KASSERT((align & (align - 1)) == 0);
195 0 : KASSERT((offset & PAGE_MASK) == 0);
196 :
197 0 : align = MAX(align, PAGE_SIZE);
198 0 : adjusted = addr & ~(align - 1);
199 0 : adjusted += offset;
200 0 : return (adjusted < addr ? adjusted + align : adjusted);
201 : }
202 :
203 : static __inline vaddr_t
204 0 : uvm_addr_align_backward(vaddr_t addr, vaddr_t align, vaddr_t offset)
205 : {
206 : vaddr_t adjusted;
207 :
208 0 : KASSERT(offset < align || (align == 0 && offset == 0));
209 0 : KASSERT((align & (align - 1)) == 0);
210 0 : KASSERT((offset & PAGE_MASK) == 0);
211 :
212 0 : align = MAX(align, PAGE_SIZE);
213 0 : adjusted = addr & ~(align - 1);
214 0 : adjusted += offset;
215 0 : return (adjusted > addr ? adjusted - align : adjusted);
216 : }
217 :
218 : /*
219 : * Try to fit the requested space into the entry.
220 : */
221 : int
222 0 : uvm_addr_fitspace(vaddr_t *min_result, vaddr_t *max_result,
223 : vaddr_t low_addr, vaddr_t high_addr, vsize_t sz,
224 : vaddr_t align, vaddr_t offset,
225 : vsize_t before_gap, vsize_t after_gap)
226 : {
227 : vaddr_t tmp;
228 : vsize_t fspace;
229 :
230 60 : if (low_addr > high_addr)
231 0 : return ENOMEM;
232 0 : fspace = high_addr - low_addr;
233 0 : if (fspace < before_gap + after_gap)
234 0 : return ENOMEM;
235 0 : if (fspace - before_gap - after_gap < sz)
236 0 : return ENOMEM;
237 :
238 : /* Calculate lowest address. */
239 0 : low_addr += before_gap;
240 0 : low_addr = uvm_addr_align_forward(tmp = low_addr, align, offset);
241 0 : if (low_addr < tmp) /* Overflow during alignment. */
242 0 : return ENOMEM;
243 0 : if (high_addr - after_gap - sz < low_addr)
244 0 : return ENOMEM;
245 :
246 : /* Calculate highest address. */
247 0 : high_addr -= after_gap + sz;
248 0 : high_addr = uvm_addr_align_backward(tmp = high_addr, align, offset);
249 0 : if (high_addr > tmp) /* Overflow during alignment. */
250 0 : return ENOMEM;
251 0 : if (low_addr > high_addr)
252 0 : return ENOMEM;
253 :
254 60 : *min_result = low_addr;
255 0 : *max_result = high_addr;
256 0 : return 0;
257 0 : }
258 :
259 :
260 : /*
261 : * Initialize uvm_addr.
262 : */
263 : void
264 0 : uvm_addr_init(void)
265 : {
266 0 : pool_init(&uaddr_pool, sizeof(struct uvm_addr_state), 0,
267 : IPL_VM, PR_WAITOK, "uaddr", NULL);
268 0 : pool_init(&uaddr_bestfit_pool, sizeof(struct uaddr_bestfit_state), 0,
269 : IPL_VM, PR_WAITOK, "uaddrbest", NULL);
270 0 : pool_init(&uaddr_pivot_pool, sizeof(struct uaddr_pivot_state), 0,
271 : IPL_VM, PR_WAITOK, "uaddrpivot", NULL);
272 0 : pool_init(&uaddr_rnd_pool, sizeof(struct uaddr_rnd_state), 0,
273 : IPL_VM, PR_WAITOK, "uaddrrnd", NULL);
274 :
275 0 : uaddr_kbootstrap.uaddr_minaddr = PAGE_SIZE;
276 0 : uaddr_kbootstrap.uaddr_maxaddr = -(vaddr_t)PAGE_SIZE;
277 0 : uaddr_kbootstrap.uaddr_functions = &uaddr_kernel_functions;
278 0 : }
279 :
280 : /*
281 : * Invoke destructor function of uaddr.
282 : */
283 : void
284 0 : uvm_addr_destroy(struct uvm_addr_state *uaddr)
285 : {
286 0 : if (uaddr)
287 0 : (*uaddr->uaddr_functions->uaddr_destroy)(uaddr);
288 0 : }
289 :
290 : /*
291 : * Move address forward to satisfy align, offset.
292 : */
293 : vaddr_t
294 0 : uvm_addr_align(vaddr_t addr, vaddr_t align, vaddr_t offset)
295 : {
296 0 : vaddr_t result = (addr & ~(align - 1)) + offset;
297 0 : if (result < addr)
298 0 : result += align;
299 0 : return result;
300 : }
301 :
302 : /*
303 : * Move address backwards to satisfy align, offset.
304 : */
305 : vaddr_t
306 0 : uvm_addr_align_back(vaddr_t addr, vaddr_t align, vaddr_t offset)
307 : {
308 0 : vaddr_t result = (addr & ~(align - 1)) + offset;
309 0 : if (result > addr)
310 0 : result -= align;
311 0 : return result;
312 : }
313 :
314 : /*
315 : * Directional first fit.
316 : *
317 : * Do a linear search for free space, starting at addr in entry.
318 : * direction == 1: search forward
319 : * direction == -1: search backward
320 : *
321 : * Output: low <= addr <= high and entry will contain addr.
322 : * 0 will be returned if no space is available.
323 : *
324 : * gap describes the space that must appear between the preceding entry.
325 : */
326 : int
327 0 : uvm_addr_linsearch(struct vm_map *map, struct uvm_addr_state *uaddr,
328 : struct vm_map_entry **entry_out, vaddr_t *addr_out,
329 : vaddr_t hint, vsize_t sz, vaddr_t align, vaddr_t offset,
330 : int direction, vaddr_t low, vaddr_t high,
331 : vsize_t before_gap, vsize_t after_gap)
332 : {
333 : struct vm_map_entry *entry;
334 0 : vaddr_t low_addr, high_addr;
335 :
336 0 : KASSERT(entry_out != NULL && addr_out != NULL);
337 0 : KASSERT(direction == -1 || direction == 1);
338 0 : KASSERT((hint & PAGE_MASK) == 0 && (high & PAGE_MASK) == 0 &&
339 : (low & PAGE_MASK) == 0 &&
340 : (before_gap & PAGE_MASK) == 0 && (after_gap & PAGE_MASK) == 0);
341 0 : KASSERT(high + sz > high); /* Check for overflow. */
342 :
343 : /* Hint magic. */
344 0 : if (hint == 0)
345 0 : hint = (direction == 1 ? low : high);
346 0 : else if (hint > high) {
347 0 : if (direction != -1)
348 0 : return ENOMEM;
349 : hint = high;
350 0 : } else if (hint < low) {
351 0 : if (direction != 1)
352 0 : return ENOMEM;
353 : hint = low;
354 0 : }
355 :
356 0 : for (entry = uvm_map_entrybyaddr(&map->addr,
357 0 : hint - (direction == -1 ? 1 : 0)); entry != NULL;
358 0 : entry = (direction == 1 ?
359 0 : RBT_NEXT(uvm_map_addr, entry) :
360 0 : RBT_PREV(uvm_map_addr, entry))) {
361 0 : if ((direction == 1 && VMMAP_FREE_START(entry) > high) ||
362 0 : (direction == -1 && VMMAP_FREE_END(entry) < low)) {
363 : break;
364 : }
365 :
366 0 : if (uvm_addr_fitspace(&low_addr, &high_addr,
367 0 : MAX(low, VMMAP_FREE_START(entry)),
368 0 : MIN(high, VMMAP_FREE_END(entry)),
369 0 : sz, align, offset, before_gap, after_gap) == 0) {
370 0 : *entry_out = entry;
371 0 : if (hint >= low_addr && hint <= high_addr) {
372 0 : *addr_out = hint;
373 0 : } else {
374 0 : *addr_out = (direction == 1 ?
375 0 : low_addr : high_addr);
376 : }
377 0 : return 0;
378 : }
379 : }
380 :
381 0 : return ENOMEM;
382 0 : }
383 :
384 : /*
385 : * Invoke address selector of uaddr.
386 : * uaddr may be NULL, in which case the algorithm will fail with ENOMEM.
387 : *
388 : * Will invoke uvm_addr_isavail to fill in last_out.
389 : */
390 : int
391 0 : uvm_addr_invoke(struct vm_map *map, struct uvm_addr_state *uaddr,
392 : struct vm_map_entry **entry_out, struct vm_map_entry **last_out,
393 : vaddr_t *addr_out,
394 : vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
395 : {
396 : int error;
397 :
398 420 : if (uaddr == NULL)
399 0 : return ENOMEM;
400 :
401 0 : hint &= ~((vaddr_t)PAGE_MASK);
402 60 : if (hint != 0 &&
403 0 : !(hint >= uaddr->uaddr_minaddr && hint < uaddr->uaddr_maxaddr))
404 0 : return ENOMEM;
405 :
406 0 : error = (*uaddr->uaddr_functions->uaddr_select)(map, uaddr,
407 : entry_out, addr_out, sz, align, offset, prot, hint);
408 :
409 0 : if (error == 0) {
410 0 : KASSERT(*entry_out != NULL);
411 0 : *last_out = NULL;
412 60 : if (!uvm_map_isavail(map, uaddr, entry_out, last_out,
413 0 : *addr_out, sz)) {
414 0 : panic("uvm_addr_invoke: address selector %p "
415 : "(%s 0x%lx-0x%lx) "
416 : "returned unavailable address 0x%lx sz 0x%lx",
417 0 : uaddr, uaddr->uaddr_functions->uaddr_name,
418 0 : uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr,
419 0 : *addr_out, sz);
420 : }
421 : }
422 :
423 0 : return error;
424 0 : }
425 :
426 : #if defined(DEBUG) || defined(DDB)
427 : void
428 0 : uvm_addr_print(struct uvm_addr_state *uaddr, const char *slot, boolean_t full,
429 : int (*pr)(const char *, ...))
430 : {
431 0 : if (uaddr == NULL) {
432 0 : (*pr)("- uvm_addr %s: NULL\n", slot);
433 0 : return;
434 : }
435 :
436 0 : (*pr)("- uvm_addr %s: %p (%s 0x%lx-0x%lx)\n", slot, uaddr,
437 0 : uaddr->uaddr_functions->uaddr_name,
438 0 : uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr);
439 0 : if (uaddr->uaddr_functions->uaddr_print == NULL)
440 : return;
441 :
442 0 : (*uaddr->uaddr_functions->uaddr_print)(uaddr, full, pr);
443 0 : }
444 : #endif /* DEBUG || DDB */
445 :
446 : /*
447 : * Destroy a uvm_addr_state structure.
448 : * The uaddr must have been previously allocated from uaddr_state_pool.
449 : */
450 : void
451 0 : uaddr_destroy(struct uvm_addr_state *uaddr)
452 : {
453 0 : pool_put(&uaddr_pool, uaddr);
454 0 : }
455 :
456 :
457 : #if 0
458 : /*
459 : * Linear allocator.
460 : * This allocator uses a first-fit algorithm.
461 : *
462 : * If hint is set, search will start at the hint position.
463 : * Only searches forward.
464 : */
465 : const struct uvm_addr_functions uaddr_lin_functions = {
466 : .uaddr_select = &uaddr_lin_select,
467 : .uaddr_destroy = &uaddr_destroy,
468 : .uaddr_name = "uaddr_lin"
469 : };
470 :
471 : struct uvm_addr_state *
472 : uaddr_lin_create(vaddr_t minaddr, vaddr_t maxaddr)
473 : {
474 : struct uvm_addr_state *uaddr;
475 :
476 : uaddr = pool_get(&uaddr_pool, PR_WAITOK);
477 : uaddr->uaddr_minaddr = minaddr;
478 : uaddr->uaddr_maxaddr = maxaddr;
479 : uaddr->uaddr_functions = &uaddr_lin_functions;
480 : return uaddr;
481 : }
482 :
483 : int
484 : uaddr_lin_select(struct vm_map *map, struct uvm_addr_state *uaddr,
485 : struct vm_map_entry **entry_out, vaddr_t *addr_out,
486 : vsize_t sz, vaddr_t align, vaddr_t offset,
487 : vm_prot_t prot, vaddr_t hint)
488 : {
489 : vaddr_t guard_sz;
490 :
491 : /* Deal with guardpages: search for space with one extra page. */
492 : guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
493 :
494 : if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr - guard_sz < sz)
495 : return ENOMEM;
496 : return uvm_addr_linsearch(map, uaddr, entry_out, addr_out, 0, sz,
497 : align, offset, 1, uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr - sz,
498 : 0, guard_sz);
499 : }
500 : #endif
501 :
502 : /*
503 : * Randomized allocator.
504 : * This allocator use uvm_map_hint to acquire a random address and searches
505 : * from there.
506 : */
507 :
508 : const struct uvm_addr_functions uaddr_rnd_functions = {
509 : .uaddr_select = &uaddr_rnd_select,
510 : .uaddr_free_insert = &uaddr_rnd_insert,
511 : .uaddr_free_remove = &uaddr_rnd_remove,
512 : .uaddr_destroy = &uaddr_rnd_destroy,
513 : #if defined(DEBUG) || defined(DDB)
514 : #if 0
515 : .uaddr_print = &uaddr_rnd_print,
516 : #endif
517 : #endif /* DEBUG || DDB */
518 : .uaddr_name = "uaddr_rnd"
519 : };
520 :
521 : struct uvm_addr_state *
522 0 : uaddr_rnd_create(vaddr_t minaddr, vaddr_t maxaddr)
523 : {
524 : struct uaddr_rnd_state *uaddr;
525 :
526 0 : uaddr = pool_get(&uaddr_rnd_pool, PR_WAITOK);
527 0 : uaddr->ur_uaddr.uaddr_minaddr = minaddr;
528 0 : uaddr->ur_uaddr.uaddr_maxaddr = maxaddr;
529 0 : uaddr->ur_uaddr.uaddr_functions = &uaddr_rnd_functions;
530 : #if 0
531 : TAILQ_INIT(&uaddr->ur_free);
532 : #endif
533 0 : return &uaddr->ur_uaddr;
534 : }
535 :
536 : int
537 0 : uaddr_rnd_select(struct vm_map *map, struct uvm_addr_state *uaddr,
538 : struct vm_map_entry **entry_out, vaddr_t *addr_out,
539 : vsize_t sz, vaddr_t align, vaddr_t offset,
540 : vm_prot_t prot, vaddr_t hint)
541 : {
542 : struct vmspace *vm;
543 : vaddr_t minaddr, maxaddr;
544 : vaddr_t guard_sz;
545 0 : vaddr_t low_addr, high_addr;
546 : struct vm_map_entry *entry, *next;
547 : vsize_t before_gap, after_gap;
548 : vaddr_t tmp;
549 :
550 0 : KASSERT((map->flags & VM_MAP_ISVMSPACE) != 0);
551 0 : vm = (struct vmspace *)map;
552 :
553 : /* Deal with guardpages: search for space with one extra page. */
554 0 : guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
555 :
556 0 : if (uaddr->uaddr_maxaddr - guard_sz < sz)
557 0 : return ENOMEM;
558 0 : minaddr = uvm_addr_align_forward(uaddr->uaddr_minaddr, align, offset);
559 0 : maxaddr = uvm_addr_align_backward(uaddr->uaddr_maxaddr - sz - guard_sz,
560 : align, offset);
561 :
562 : /* Quick fail if the allocation won't fit. */
563 0 : if (minaddr >= maxaddr)
564 0 : return ENOMEM;
565 :
566 : /* Select a hint. */
567 0 : if (hint == 0)
568 0 : hint = uvm_map_hint(vm, prot, minaddr, maxaddr);
569 : /* Clamp hint to uaddr range. */
570 0 : hint = MIN(MAX(hint, minaddr), maxaddr);
571 :
572 : /* Align hint to align,offset parameters. */
573 : tmp = hint;
574 0 : hint = uvm_addr_align_forward(tmp, align, offset);
575 : /* Check for overflow during alignment. */
576 0 : if (hint < tmp || hint > maxaddr)
577 0 : return ENOMEM; /* Compatibility mode: never look backwards. */
578 :
579 : before_gap = 0;
580 : after_gap = guard_sz;
581 : hint -= MIN(hint, before_gap);
582 :
583 : /*
584 : * Use the augmented address tree to look up the first entry
585 : * at or after hint with sufficient space.
586 : *
587 : * This code is the original optimized code, but will fail if the
588 : * subtree it looks at does have sufficient space, but fails to meet
589 : * the align constraint.
590 : *
591 : * Guard: subtree is not exhausted and max(fspace) >= required.
592 : */
593 0 : entry = uvm_map_entrybyaddr(&map->addr, hint);
594 :
595 : /* Walk up the tree, until there is at least sufficient space. */
596 0 : while (entry != NULL &&
597 0 : entry->fspace_augment < before_gap + after_gap + sz)
598 0 : entry = RBT_PARENT(uvm_map_addr, entry);
599 :
600 0 : while (entry != NULL) {
601 : /* Test if this fits. */
602 0 : if (VMMAP_FREE_END(entry) > hint &&
603 0 : uvm_map_uaddr_e(map, entry) == uaddr &&
604 0 : uvm_addr_fitspace(&low_addr, &high_addr,
605 0 : MAX(uaddr->uaddr_minaddr, VMMAP_FREE_START(entry)),
606 0 : MIN(uaddr->uaddr_maxaddr, VMMAP_FREE_END(entry)),
607 0 : sz, align, offset, before_gap, after_gap) == 0) {
608 0 : *entry_out = entry;
609 0 : if (hint >= low_addr && hint <= high_addr)
610 0 : *addr_out = hint;
611 : else
612 0 : *addr_out = low_addr;
613 0 : return 0;
614 : }
615 :
616 : /* RBT_NEXT, but skip subtrees that cannot possible fit. */
617 0 : next = RBT_RIGHT(uvm_map_addr, entry);
618 0 : if (next != NULL &&
619 0 : next->fspace_augment >= before_gap + after_gap + sz) {
620 : entry = next;
621 0 : while ((next = RBT_LEFT(uvm_map_addr, entry)) !=
622 : NULL)
623 : entry = next;
624 0 : } else {
625 : do_parent:
626 0 : next = RBT_PARENT(uvm_map_addr, entry);
627 0 : if (next == NULL)
628 0 : entry = NULL;
629 0 : else if (RBT_LEFT(uvm_map_addr, next) == entry)
630 : entry = next;
631 : else {
632 : entry = next;
633 0 : goto do_parent;
634 : }
635 : }
636 : }
637 :
638 : /* Lookup failed. */
639 0 : return ENOMEM;
640 0 : }
641 :
642 : /*
643 : * Destroy a uaddr_rnd_state structure.
644 : */
645 : void
646 0 : uaddr_rnd_destroy(struct uvm_addr_state *uaddr)
647 : {
648 0 : pool_put(&uaddr_rnd_pool, uaddr);
649 0 : }
650 :
651 : /*
652 : * Add entry to tailq.
653 : */
654 : void
655 0 : uaddr_rnd_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
656 : struct vm_map_entry *entry)
657 : {
658 0 : return;
659 : }
660 :
661 : /*
662 : * Remove entry from tailq.
663 : */
664 : void
665 0 : uaddr_rnd_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
666 : struct vm_map_entry *entry)
667 : {
668 0 : return;
669 : }
670 :
671 : #if 0
672 : #if defined(DEBUG) || defined(DDB)
673 : void
674 : uaddr_rnd_print(struct uvm_addr_state *uaddr_p, boolean_t full,
675 : int (*pr)(const char*, ...))
676 : {
677 : struct vm_map_entry *entry;
678 : struct uaddr_rnd_state *uaddr;
679 : vaddr_t addr;
680 : size_t count;
681 : vsize_t space;
682 :
683 : uaddr = (struct uaddr_rnd_state *)uaddr_p;
684 : addr = 0;
685 : count = 0;
686 : space = 0;
687 : TAILQ_FOREACH(entry, &uaddr->ur_free, dfree.tailq) {
688 : count++;
689 : space += entry->fspace;
690 :
691 : if (full) {
692 : (*pr)("\tentry %p: 0x%lx-0x%lx G=0x%lx F=0x%lx\n",
693 : entry, entry->start, entry->end,
694 : entry->guard, entry->fspace);
695 : (*pr)("\t\tfree: 0x%lx-0x%lx\n",
696 : VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
697 : }
698 : if (entry->start < addr) {
699 : if (!full)
700 : (*pr)("\tentry %p: 0x%lx-0x%lx "
701 : "G=0x%lx F=0x%lx\n",
702 : entry, entry->start, entry->end,
703 : entry->guard, entry->fspace);
704 : (*pr)("\t\tstart=0x%lx, expected at least 0x%lx\n",
705 : entry->start, addr);
706 : }
707 :
708 : addr = VMMAP_FREE_END(entry);
709 : }
710 : (*pr)("\t0x%lu entries, 0x%lx free bytes\n", count, space);
711 : }
712 : #endif /* DEBUG || DDB */
713 : #endif
714 :
715 : /*
716 : * Kernel allocation bootstrap logic.
717 : */
718 : const struct uvm_addr_functions uaddr_kernel_functions = {
719 : .uaddr_select = &uaddr_kbootstrap_select,
720 : .uaddr_destroy = &uaddr_kbootstrap_destroy,
721 : .uaddr_name = "uaddr_kbootstrap"
722 : };
723 :
724 : /*
725 : * Select an address from the map.
726 : *
727 : * This function ignores the uaddr spec and instead uses the map directly.
728 : * Because of that property, the uaddr algorithm can be shared across all
729 : * kernel maps.
730 : */
731 : int
732 0 : uaddr_kbootstrap_select(struct vm_map *map, struct uvm_addr_state *uaddr,
733 : struct vm_map_entry **entry_out, vaddr_t *addr_out,
734 : vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
735 : {
736 0 : vaddr_t tmp;
737 :
738 0 : RBT_FOREACH(*entry_out, uvm_map_addr, &map->addr) {
739 0 : if (VMMAP_FREE_END(*entry_out) <= uvm_maxkaddr &&
740 0 : uvm_addr_fitspace(addr_out, &tmp,
741 : VMMAP_FREE_START(*entry_out), VMMAP_FREE_END(*entry_out),
742 0 : sz, align, offset, 0, 0) == 0)
743 0 : return 0;
744 : }
745 :
746 0 : return ENOMEM;
747 0 : }
748 :
749 : /*
750 : * Don't destroy the kernel bootstrap allocator.
751 : */
752 : void
753 0 : uaddr_kbootstrap_destroy(struct uvm_addr_state *uaddr)
754 : {
755 0 : KASSERT(uaddr == (struct uvm_addr_state *)&uaddr_kbootstrap);
756 0 : }
757 :
758 : #ifndef SMALL_KERNEL
759 : /*
760 : * Best fit algorithm.
761 : */
762 :
763 : const struct uvm_addr_functions uaddr_bestfit_functions = {
764 : .uaddr_select = &uaddr_bestfit_select,
765 : .uaddr_free_insert = &uaddr_bestfit_insert,
766 : .uaddr_free_remove = &uaddr_bestfit_remove,
767 : .uaddr_destroy = &uaddr_bestfit_destroy,
768 : .uaddr_name = "uaddr_bestfit"
769 : };
770 :
771 : struct uvm_addr_state *
772 0 : uaddr_bestfit_create(vaddr_t minaddr, vaddr_t maxaddr)
773 : {
774 : struct uaddr_bestfit_state *uaddr;
775 :
776 0 : uaddr = pool_get(&uaddr_bestfit_pool, PR_WAITOK);
777 0 : uaddr->ubf_uaddr.uaddr_minaddr = minaddr;
778 0 : uaddr->ubf_uaddr.uaddr_maxaddr = maxaddr;
779 0 : uaddr->ubf_uaddr.uaddr_functions = &uaddr_bestfit_functions;
780 0 : RBT_INIT(uaddr_free_rbtree, &uaddr->ubf_free);
781 0 : return &uaddr->ubf_uaddr;
782 : }
783 :
784 : void
785 0 : uaddr_bestfit_destroy(struct uvm_addr_state *uaddr)
786 : {
787 0 : pool_put(&uaddr_bestfit_pool, uaddr);
788 0 : }
789 :
790 : void
791 0 : uaddr_bestfit_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
792 : struct vm_map_entry *entry)
793 : {
794 : struct uaddr_bestfit_state *uaddr;
795 : struct vm_map_entry *rb_rv;
796 :
797 0 : uaddr = (struct uaddr_bestfit_state *)uaddr_p;
798 180 : if ((rb_rv = RBT_INSERT(uaddr_free_rbtree, &uaddr->ubf_free, entry)) !=
799 : NULL) {
800 0 : panic("%s: duplicate insertion: state %p "
801 : "interting %p, colliding with %p", __func__,
802 : uaddr, entry, rb_rv);
803 : }
804 180 : }
805 :
806 : void
807 0 : uaddr_bestfit_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
808 : struct vm_map_entry *entry)
809 : {
810 : struct uaddr_bestfit_state *uaddr;
811 :
812 0 : uaddr = (struct uaddr_bestfit_state *)uaddr_p;
813 180 : if (RBT_REMOVE(uaddr_free_rbtree, &uaddr->ubf_free, entry) != entry)
814 0 : panic("%s: entry was not in tree", __func__);
815 180 : }
816 :
817 : int
818 0 : uaddr_bestfit_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
819 : struct vm_map_entry **entry_out, vaddr_t *addr_out,
820 : vsize_t sz, vaddr_t align, vaddr_t offset,
821 : vm_prot_t prot, vaddr_t hint)
822 : {
823 0 : vaddr_t min, max;
824 : struct uaddr_bestfit_state *uaddr;
825 : struct vm_map_entry *entry;
826 : vsize_t guardsz;
827 :
828 0 : uaddr = (struct uaddr_bestfit_state *)uaddr_p;
829 60 : guardsz = ((map->flags & VM_MAP_GUARDPAGES) ? PAGE_SIZE : 0);
830 0 : if (sz + guardsz < sz)
831 0 : return ENOMEM;
832 :
833 : /*
834 : * Find smallest item on freelist capable of holding item.
835 : * Deal with guardpages: search for space with one extra page.
836 : */
837 0 : entry = uvm_addr_entrybyspace(&uaddr->ubf_free, sz + guardsz);
838 0 : if (entry == NULL)
839 0 : return ENOMEM;
840 :
841 : /* Walk the tree until we find an entry that fits. */
842 0 : while (uvm_addr_fitspace(&min, &max,
843 0 : VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
844 0 : sz, align, offset, 0, guardsz) != 0) {
845 0 : entry = RBT_NEXT(uaddr_free_rbtree, entry);
846 0 : if (entry == NULL)
847 0 : return ENOMEM;
848 : }
849 :
850 : /* Return the address that generates the least fragmentation. */
851 60 : *entry_out = entry;
852 0 : *addr_out = (min - VMMAP_FREE_START(entry) <=
853 0 : VMMAP_FREE_END(entry) - guardsz - sz - max ?
854 : min : max);
855 0 : return 0;
856 0 : }
857 : #endif /* !SMALL_KERNEL */
858 :
859 :
860 : #ifndef SMALL_KERNEL
861 : /*
862 : * A userspace allocator based on pivots.
863 : */
864 :
865 : const struct uvm_addr_functions uaddr_pivot_functions = {
866 : .uaddr_select = &uaddr_pivot_select,
867 : .uaddr_free_insert = &uaddr_pivot_insert,
868 : .uaddr_free_remove = &uaddr_pivot_remove,
869 : .uaddr_destroy = &uaddr_pivot_destroy,
870 : #if defined(DEBUG) || defined(DDB)
871 : .uaddr_print = &uaddr_pivot_print,
872 : #endif /* DEBUG || DDB */
873 : .uaddr_name = "uaddr_pivot"
874 : };
875 :
876 : /*
877 : * A special random function for pivots.
878 : *
879 : * This function will return:
880 : * - a random number
881 : * - a multiple of PAGE_SIZE
882 : * - at least PAGE_SIZE
883 : *
884 : * The random function has a slightly higher change to return a small number.
885 : */
886 : vsize_t
887 0 : uaddr_pivot_random(void)
888 : {
889 : int r;
890 :
891 : /*
892 : * The sum of two six-sided dice will have a normal distribution.
893 : * We map the highest probable number to 1, by folding the curve
894 : * (think of a graph on a piece of paper, that you fold).
895 : *
896 : * Because the fold happens at PIVOT_RND - 1, the numbers 0 and 1
897 : * have the same and highest probability of happening.
898 : */
899 0 : r = arc4random_uniform(PIVOT_RND) + arc4random_uniform(PIVOT_RND) -
900 : (PIVOT_RND - 1);
901 0 : if (r < 0)
902 0 : r = -r;
903 :
904 : /*
905 : * Make the returned value at least PAGE_SIZE and a multiple of
906 : * PAGE_SIZE.
907 : */
908 0 : return (vaddr_t)(1 + r) << PAGE_SHIFT;
909 : }
910 :
911 : /*
912 : * Select a new pivot.
913 : *
914 : * A pivot must:
915 : * - be chosen random
916 : * - have a randomly chosen gap before it, where the uaddr_state starts
917 : * - have a randomly chosen gap after it, before the uaddr_state ends
918 : *
919 : * Furthermore, the pivot must provide sufficient space for the allocation.
920 : * The addr will be set to the selected address.
921 : *
922 : * Returns ENOMEM on failure.
923 : */
924 : int
925 0 : uaddr_pivot_newpivot(struct vm_map *map, struct uaddr_pivot_state *uaddr,
926 : struct uaddr_pivot *pivot,
927 : struct vm_map_entry **entry_out, vaddr_t *addr_out,
928 : vsize_t sz, vaddr_t align, vaddr_t offset,
929 : vsize_t before_gap, vsize_t after_gap)
930 : {
931 : struct vm_map_entry *entry, *found;
932 : vaddr_t minaddr, maxaddr;
933 : vsize_t dist;
934 : vaddr_t found_minaddr, found_maxaddr;
935 0 : vaddr_t min, max;
936 : vsize_t arc4_arg;
937 : int fit_error;
938 : u_int32_t path;
939 :
940 0 : minaddr = uaddr->up_uaddr.uaddr_minaddr;
941 0 : maxaddr = uaddr->up_uaddr.uaddr_maxaddr;
942 0 : KASSERT(minaddr < maxaddr);
943 : #ifdef DIAGNOSTIC
944 0 : if (minaddr + 2 * PAGE_SIZE > maxaddr) {
945 0 : panic("uaddr_pivot_newpivot: cannot grant random pivot "
946 : "in area less than 2 pages (size = 0x%lx)",
947 : maxaddr - minaddr);
948 : }
949 : #endif /* DIAGNOSTIC */
950 :
951 : /*
952 : * Gap calculation: 1/32 of the size of the managed area.
953 : *
954 : * At most: sufficient to not get truncated at arc4random.
955 : * At least: 2 PAGE_SIZE
956 : *
957 : * minaddr and maxaddr will be changed according to arc4random.
958 : */
959 0 : dist = MAX((maxaddr - minaddr) / 32, 2 * (vaddr_t)PAGE_SIZE);
960 0 : if (dist >> PAGE_SHIFT > 0xffffffff) {
961 0 : minaddr += (vsize_t)arc4random() << PAGE_SHIFT;
962 0 : maxaddr -= (vsize_t)arc4random() << PAGE_SHIFT;
963 0 : } else {
964 0 : minaddr += (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
965 : PAGE_SHIFT;
966 0 : maxaddr -= (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
967 : PAGE_SHIFT;
968 : }
969 :
970 : /*
971 : * A very fast way to find an entry that will be large enough
972 : * to hold the allocation, but still is found more or less
973 : * randomly: the tree path selector has a 50% chance to go for
974 : * a bigger or smaller entry.
975 : *
976 : * Note that the memory may actually be available,
977 : * but the fragmentation may be so bad and the gaps chosen
978 : * so unfortunately, that the allocation will not succeed.
979 : * Or the alignment can only be satisfied by an entry that
980 : * is not visited in the randomly selected path.
981 : *
982 : * This code finds an entry with sufficient space in O(log n) time.
983 : */
984 0 : path = arc4random();
985 : found = NULL;
986 0 : entry = RBT_ROOT(uaddr_free_rbtree, &uaddr->up_free);
987 0 : while (entry != NULL) {
988 0 : fit_error = uvm_addr_fitspace(&min, &max,
989 0 : MAX(VMMAP_FREE_START(entry), minaddr),
990 0 : MIN(VMMAP_FREE_END(entry), maxaddr),
991 : sz, align, offset, before_gap, after_gap);
992 :
993 : /* It fits, save this entry. */
994 0 : if (fit_error == 0) {
995 : found = entry;
996 0 : found_minaddr = min;
997 0 : found_maxaddr = max;
998 0 : }
999 :
1000 : /* Next. */
1001 0 : if (fit_error != 0)
1002 0 : entry = RBT_RIGHT(uaddr_free_rbtree, entry);
1003 0 : else if ((path & 0x1) == 0) {
1004 : path >>= 1;
1005 0 : entry = RBT_RIGHT(uaddr_free_rbtree, entry);
1006 0 : } else {
1007 : path >>= 1;
1008 0 : entry = RBT_LEFT(uaddr_free_rbtree, entry);
1009 : }
1010 : }
1011 0 : if (found == NULL)
1012 0 : return ENOMEM; /* Not found a large enough region. */
1013 :
1014 : /*
1015 : * Calculate a random address within found.
1016 : *
1017 : * found_minaddr and found_maxaddr are already aligned, so be sure
1018 : * to select a multiple of align as the offset in the entry.
1019 : * Preferably, arc4random_uniform is used to provide no bias within
1020 : * the entry.
1021 : * However if the size of the entry exceeds arc4random_uniforms
1022 : * argument limit, we simply use arc4random (thus limiting ourselves
1023 : * to 4G * PAGE_SIZE bytes offset).
1024 : */
1025 0 : if (found_maxaddr == found_minaddr)
1026 0 : *addr_out = found_minaddr;
1027 : else {
1028 0 : KASSERT(align >= PAGE_SIZE && (align & (align - 1)) == 0);
1029 0 : arc4_arg = found_maxaddr - found_minaddr;
1030 0 : if (arc4_arg > 0xffffffff) {
1031 0 : *addr_out = found_minaddr +
1032 0 : (arc4random() & ~(align - 1));
1033 0 : } else {
1034 0 : *addr_out = found_minaddr +
1035 0 : (arc4random_uniform(arc4_arg) & ~(align - 1));
1036 : }
1037 : }
1038 : /* Address was found in this entry. */
1039 0 : *entry_out = found;
1040 :
1041 : /*
1042 : * Set up new pivot and return selected address.
1043 : *
1044 : * Depending on the direction of the pivot, the pivot must be placed
1045 : * at the bottom or the top of the allocation:
1046 : * - if the pivot moves upwards, place the pivot at the top of the
1047 : * allocation,
1048 : * - if the pivot moves downwards, place the pivot at the bottom
1049 : * of the allocation.
1050 : */
1051 0 : pivot->entry = found;
1052 0 : pivot->dir = (arc4random() & 0x1 ? 1 : -1);
1053 0 : if (pivot->dir > 0)
1054 0 : pivot->addr = *addr_out + sz;
1055 : else
1056 0 : pivot->addr = *addr_out;
1057 0 : pivot->expire = PIVOT_EXPIRE - 1; /* First use is right now. */
1058 0 : return 0;
1059 0 : }
1060 :
1061 : /*
1062 : * Pivot selector.
1063 : *
1064 : * Each time the selector is invoked, it will select a random pivot, which
1065 : * it will use to select memory with. The memory will be placed at the pivot,
1066 : * with a randomly sized gap between the allocation and the pivot.
1067 : * The pivot will then move so it will never revisit this address.
1068 : *
1069 : * Each allocation, the pivot expiry timer ticks. Once the pivot becomes
1070 : * expired, it will be replaced with a newly created pivot. Pivots also
1071 : * automatically expire if they fail to provide memory for an allocation.
1072 : *
1073 : * Expired pivots are replaced using the uaddr_pivot_newpivot() function,
1074 : * which will ensure the pivot points at memory in such a way that the
1075 : * allocation will succeed.
1076 : * As an added bonus, the uaddr_pivot_newpivot() function will perform the
1077 : * allocation immediately and move the pivot as appropriate.
1078 : *
1079 : * If uaddr_pivot_newpivot() fails to find a new pivot that will allow the
1080 : * allocation to succeed, it will not create a new pivot and the allocation
1081 : * will fail.
1082 : *
1083 : * A pivot running into used memory will automatically expire (because it will
1084 : * fail to allocate).
1085 : *
1086 : * Characteristics of the allocator:
1087 : * - best case, an allocation is O(log N)
1088 : * (it would be O(1), if it werent for the need to check if the memory is
1089 : * free; although that can be avoided...)
1090 : * - worst case, an allocation is O(log N)
1091 : * (the uaddr_pivot_newpivot() function has that complexity)
1092 : * - failed allocations always take O(log N)
1093 : * (the uaddr_pivot_newpivot() function will walk that deep into the tree).
1094 : */
1095 : int
1096 0 : uaddr_pivot_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1097 : struct vm_map_entry **entry_out, vaddr_t *addr_out,
1098 : vsize_t sz, vaddr_t align, vaddr_t offset,
1099 : vm_prot_t prot, vaddr_t hint)
1100 : {
1101 : struct uaddr_pivot_state *uaddr;
1102 : struct vm_map_entry *entry;
1103 : struct uaddr_pivot *pivot;
1104 0 : vaddr_t min, max;
1105 : vsize_t before_gap, after_gap;
1106 : int err;
1107 :
1108 : /*
1109 : * When we have a hint, use the rnd allocator that finds the
1110 : * area that is closest to the hint, if there is such an area.
1111 : */
1112 0 : if (hint != 0) {
1113 0 : if (uaddr_rnd_select(map, uaddr_p, entry_out, addr_out,
1114 0 : sz, align, offset, prot, hint) == 0)
1115 0 : return 0;
1116 0 : return ENOMEM;
1117 : }
1118 :
1119 : /*
1120 : * Select a random pivot and a random gap sizes around the allocation.
1121 : */
1122 0 : uaddr = (struct uaddr_pivot_state *)uaddr_p;
1123 0 : pivot = &uaddr->up_pivots[
1124 0 : arc4random_uniform(nitems(uaddr->up_pivots))];
1125 0 : before_gap = uaddr_pivot_random();
1126 0 : after_gap = uaddr_pivot_random();
1127 0 : if (pivot->addr == 0 || pivot->entry == NULL || pivot->expire == 0)
1128 : goto expired; /* Pivot is invalid (null or expired). */
1129 :
1130 : /* Attempt to use the pivot to map the entry. */
1131 : entry = pivot->entry;
1132 0 : if (pivot->dir > 0) {
1133 0 : if (uvm_addr_fitspace(&min, &max,
1134 0 : MAX(VMMAP_FREE_START(entry), pivot->addr),
1135 0 : VMMAP_FREE_END(entry), sz, align, offset,
1136 0 : before_gap, after_gap) == 0) {
1137 0 : *addr_out = min;
1138 0 : *entry_out = entry;
1139 0 : pivot->addr = min + sz;
1140 0 : pivot->expire--;
1141 0 : return 0;
1142 : }
1143 : } else {
1144 0 : if (uvm_addr_fitspace(&min, &max,
1145 : VMMAP_FREE_START(entry),
1146 0 : MIN(VMMAP_FREE_END(entry), pivot->addr),
1147 0 : sz, align, offset, before_gap, after_gap) == 0) {
1148 0 : *addr_out = max;
1149 0 : *entry_out = entry;
1150 0 : pivot->addr = max;
1151 0 : pivot->expire--;
1152 0 : return 0;
1153 : }
1154 : }
1155 :
1156 : expired:
1157 : /*
1158 : * Pivot expired or allocation failed.
1159 : * Use pivot selector to do the allocation and find a new pivot.
1160 : */
1161 0 : err = uaddr_pivot_newpivot(map, uaddr, pivot, entry_out, addr_out,
1162 : sz, align, offset, before_gap, after_gap);
1163 0 : return err;
1164 0 : }
1165 :
1166 : /*
1167 : * Free the pivot.
1168 : */
1169 : void
1170 0 : uaddr_pivot_destroy(struct uvm_addr_state *uaddr)
1171 : {
1172 0 : pool_put(&uaddr_pivot_pool, uaddr);
1173 0 : }
1174 :
1175 : /*
1176 : * Insert an entry with free space in the space tree.
1177 : */
1178 : void
1179 0 : uaddr_pivot_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1180 : struct vm_map_entry *entry)
1181 : {
1182 : struct uaddr_pivot_state *uaddr;
1183 : struct vm_map_entry *rb_rv;
1184 : struct uaddr_pivot *p;
1185 : vaddr_t check_addr;
1186 : vaddr_t start, end;
1187 :
1188 0 : uaddr = (struct uaddr_pivot_state *)uaddr_p;
1189 0 : if ((rb_rv = RBT_INSERT(uaddr_free_rbtree, &uaddr->up_free, entry)) !=
1190 : NULL) {
1191 0 : panic("%s: duplicate insertion: state %p "
1192 : "inserting entry %p which collides with %p", __func__,
1193 : uaddr, entry, rb_rv);
1194 : }
1195 :
1196 0 : start = VMMAP_FREE_START(entry);
1197 0 : end = VMMAP_FREE_END(entry);
1198 :
1199 : /*
1200 : * Update all pivots that are contained in this entry.
1201 : */
1202 0 : for (p = &uaddr->up_pivots[0];
1203 0 : p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
1204 0 : check_addr = p->addr;
1205 0 : if (check_addr == 0)
1206 : continue;
1207 0 : if (p->dir < 0)
1208 0 : check_addr--;
1209 :
1210 0 : if (start <= check_addr &&
1211 0 : check_addr < end) {
1212 0 : KASSERT(p->entry == NULL);
1213 0 : p->entry = entry;
1214 0 : }
1215 : }
1216 0 : }
1217 :
1218 : /*
1219 : * Remove an entry with free space from the space tree.
1220 : */
1221 : void
1222 0 : uaddr_pivot_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1223 : struct vm_map_entry *entry)
1224 : {
1225 : struct uaddr_pivot_state *uaddr;
1226 : struct uaddr_pivot *p;
1227 :
1228 0 : uaddr = (struct uaddr_pivot_state *)uaddr_p;
1229 0 : if (RBT_REMOVE(uaddr_free_rbtree, &uaddr->up_free, entry) != entry)
1230 0 : panic("%s: entry was not in tree", __func__);
1231 :
1232 : /*
1233 : * Inform any pivot with this entry that the entry is gone.
1234 : * Note that this does not automatically invalidate the pivot.
1235 : */
1236 0 : for (p = &uaddr->up_pivots[0];
1237 0 : p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
1238 0 : if (p->entry == entry)
1239 0 : p->entry = NULL;
1240 : }
1241 0 : }
1242 :
1243 : /*
1244 : * Create a new pivot selector.
1245 : *
1246 : * Initially, all pivots are in the expired state.
1247 : * Two reasons for this:
1248 : * - it means this allocator will not take a huge amount of time
1249 : * - pivots select better on demand, because the pivot selection will be
1250 : * affected by preceding allocations:
1251 : * the next pivots will likely end up in different segments of free memory,
1252 : * that was segmented by an earlier allocation; better spread.
1253 : */
1254 : struct uvm_addr_state *
1255 0 : uaddr_pivot_create(vaddr_t minaddr, vaddr_t maxaddr)
1256 : {
1257 : struct uaddr_pivot_state *uaddr;
1258 :
1259 0 : uaddr = pool_get(&uaddr_pivot_pool, PR_WAITOK);
1260 0 : uaddr->up_uaddr.uaddr_minaddr = minaddr;
1261 0 : uaddr->up_uaddr.uaddr_maxaddr = maxaddr;
1262 0 : uaddr->up_uaddr.uaddr_functions = &uaddr_pivot_functions;
1263 0 : RBT_INIT(uaddr_free_rbtree, &uaddr->up_free);
1264 0 : memset(uaddr->up_pivots, 0, sizeof(uaddr->up_pivots));
1265 :
1266 0 : return &uaddr->up_uaddr;
1267 : }
1268 :
1269 : #if defined(DEBUG) || defined(DDB)
1270 : /*
1271 : * Print the uaddr_pivot_state.
1272 : *
1273 : * If full, a listing of all entries in the state will be provided.
1274 : */
1275 : void
1276 0 : uaddr_pivot_print(struct uvm_addr_state *uaddr_p, boolean_t full,
1277 : int (*pr)(const char *, ...))
1278 : {
1279 : struct uaddr_pivot_state *uaddr;
1280 : struct uaddr_pivot *pivot;
1281 : struct vm_map_entry *entry;
1282 : int i;
1283 : vaddr_t check_addr;
1284 :
1285 0 : uaddr = (struct uaddr_pivot_state *)uaddr_p;
1286 :
1287 0 : for (i = 0; i < NUM_PIVOTS; i++) {
1288 0 : pivot = &uaddr->up_pivots[i];
1289 :
1290 0 : (*pr)("\tpivot 0x%lx, epires in %d, direction %d\n",
1291 0 : pivot->addr, pivot->expire, pivot->dir);
1292 : }
1293 0 : if (!full)
1294 0 : return;
1295 :
1296 0 : if (RBT_EMPTY(uaddr_free_rbtree, &uaddr->up_free))
1297 0 : (*pr)("\tempty\n");
1298 : /* Print list of free space. */
1299 0 : RBT_FOREACH(entry, uaddr_free_rbtree, &uaddr->up_free) {
1300 0 : (*pr)("\t0x%lx - 0x%lx free (0x%lx bytes)\n",
1301 0 : VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
1302 : VMMAP_FREE_END(entry) - VMMAP_FREE_START(entry));
1303 :
1304 0 : for (i = 0; i < NUM_PIVOTS; i++) {
1305 0 : pivot = &uaddr->up_pivots[i];
1306 0 : check_addr = pivot->addr;
1307 0 : if (check_addr == 0)
1308 : continue;
1309 0 : if (pivot->dir < 0)
1310 0 : check_addr--;
1311 :
1312 0 : if (VMMAP_FREE_START(entry) <= check_addr &&
1313 0 : check_addr < VMMAP_FREE_END(entry)) {
1314 0 : (*pr)("\t\tcontains pivot %d (0x%lx)\n",
1315 0 : i, pivot->addr);
1316 0 : }
1317 : }
1318 : }
1319 0 : }
1320 : #endif /* DEBUG || DDB */
1321 : #endif /* !SMALL_KERNEL */
1322 :
1323 : #ifndef SMALL_KERNEL
1324 : /*
1325 : * Stack/break allocator.
1326 : *
1327 : * Stack area is grown into in the opposite direction of the stack growth,
1328 : * brk area is grown downward (because sbrk() grows upward).
1329 : *
1330 : * Both areas are grown into proportially: a weighted chance is used to
1331 : * select which one (stack or brk area) to try. If the allocation fails,
1332 : * the other one is tested.
1333 : */
1334 : const struct uvm_addr_functions uaddr_stack_brk_functions = {
1335 : .uaddr_select = &uaddr_stack_brk_select,
1336 : .uaddr_destroy = &uaddr_destroy,
1337 : .uaddr_name = "uaddr_stckbrk"
1338 : };
1339 :
1340 : /*
1341 : * Stack/brk address selector.
1342 : */
1343 : int
1344 0 : uaddr_stack_brk_select(struct vm_map *map, struct uvm_addr_state *uaddr,
1345 : struct vm_map_entry **entry_out, vaddr_t *addr_out,
1346 : vsize_t sz, vaddr_t align, vaddr_t offset,
1347 : vm_prot_t prot, vaddr_t hint)
1348 : {
1349 : vaddr_t start;
1350 : vaddr_t end;
1351 : vsize_t before_gap;
1352 : vsize_t after_gap;
1353 : int dir;
1354 :
1355 : /* Set up brk search strategy. */
1356 0 : start = MAX(map->b_start, uaddr->uaddr_minaddr);
1357 0 : end = MIN(map->b_end, uaddr->uaddr_maxaddr);
1358 : before_gap = 0;
1359 : after_gap = 0;
1360 : dir = -1; /* Opposite of brk() growth. */
1361 :
1362 0 : if (end - start >= sz) {
1363 0 : if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out,
1364 0 : 0, sz, align, offset, dir, start, end - sz,
1365 0 : before_gap, after_gap) == 0)
1366 0 : return 0;
1367 : }
1368 :
1369 : /* Set up stack search strategy. */
1370 0 : start = MAX(map->s_start, uaddr->uaddr_minaddr);
1371 0 : end = MIN(map->s_end, uaddr->uaddr_maxaddr);
1372 0 : before_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
1373 0 : after_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
1374 : #ifdef MACHINE_STACK_GROWS_UP
1375 : dir = -1;
1376 : #else
1377 : dir = 1;
1378 : #endif
1379 0 : if (end - start >= before_gap + after_gap &&
1380 0 : end - start - before_gap - after_gap >= sz) {
1381 0 : if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out,
1382 0 : 0, sz, align, offset, dir, start, end - sz,
1383 0 : before_gap, after_gap) == 0)
1384 0 : return 0;
1385 : }
1386 :
1387 0 : return ENOMEM;
1388 0 : }
1389 :
1390 : struct uvm_addr_state *
1391 0 : uaddr_stack_brk_create(vaddr_t minaddr, vaddr_t maxaddr)
1392 : {
1393 : struct uvm_addr_state* uaddr;
1394 :
1395 0 : uaddr = pool_get(&uaddr_pool, PR_WAITOK);
1396 0 : uaddr->uaddr_minaddr = minaddr;
1397 0 : uaddr->uaddr_maxaddr = maxaddr;
1398 0 : uaddr->uaddr_functions = &uaddr_stack_brk_functions;
1399 0 : return uaddr;
1400 : }
1401 : #endif /* !SMALL_KERNEL */
1402 :
1403 :
1404 : #ifndef SMALL_KERNEL
1405 : /*
1406 : * Free space comparison.
1407 : * Compares smaller free-space before larger free-space.
1408 : */
1409 : static inline int
1410 0 : uvm_mapent_fspace_cmp(const struct vm_map_entry *e1,
1411 : const struct vm_map_entry *e2)
1412 : {
1413 1030 : if (e1->fspace != e2->fspace)
1414 370 : return (e1->fspace < e2->fspace ? -1 : 1);
1415 660 : return (e1->start < e2->start ? -1 : e1->start > e2->start);
1416 0 : }
1417 :
1418 0 : RBT_GENERATE(uaddr_free_rbtree, vm_map_entry, dfree.rbtree,
1419 : uvm_mapent_fspace_cmp);
1420 : #endif /* !SMALL_KERNEL */
|