Line data Source code
1 : /* $OpenBSD: subr_pool.c,v 1.223 2018/06/08 15:38:15 guenther Exp $ */
2 : /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */
3 :
4 : /*-
5 : * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6 : * All rights reserved.
7 : *
8 : * This code is derived from software contributed to The NetBSD Foundation
9 : * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10 : * Simulation Facility, NASA Ames Research Center.
11 : *
12 : * Redistribution and use in source and binary forms, with or without
13 : * modification, are permitted provided that the following conditions
14 : * are met:
15 : * 1. Redistributions of source code must retain the above copyright
16 : * notice, this list of conditions and the following disclaimer.
17 : * 2. Redistributions in binary form must reproduce the above copyright
18 : * notice, this list of conditions and the following disclaimer in the
19 : * documentation and/or other materials provided with the distribution.
20 : *
21 : * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22 : * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 : * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 : * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25 : * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 : * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 : * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 : * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 : * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 : * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 : * POSSIBILITY OF SUCH DAMAGE.
32 : */
33 :
34 : #include <sys/param.h>
35 : #include <sys/systm.h>
36 : #include <sys/errno.h>
37 : #include <sys/kernel.h>
38 : #include <sys/malloc.h>
39 : #include <sys/pool.h>
40 : #include <sys/proc.h>
41 : #include <sys/syslog.h>
42 : #include <sys/sysctl.h>
43 : #include <sys/task.h>
44 : #include <sys/timeout.h>
45 : #include <sys/percpu.h>
46 :
47 : #include <uvm/uvm_extern.h>
48 :
49 : /*
50 : * Pool resource management utility.
51 : *
52 : * Memory is allocated in pages which are split into pieces according to
53 : * the pool item size. Each page is kept on one of three lists in the
54 : * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
55 : * for empty, full and partially-full pages respectively. The individual
56 : * pool items are on a linked list headed by `ph_items' in each page
57 : * header. The memory for building the page list is either taken from
58 : * the allocated pages themselves (for small pool items) or taken from
59 : * an internal pool of page headers (`phpool').
60 : */
61 :
62 : /* List of all pools */
63 : SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head);
64 :
65 : /*
66 : * Every pool gets a unique serial number assigned to it. If this counter
67 : * wraps, we're screwed, but we shouldn't create so many pools anyway.
68 : */
69 : unsigned int pool_serial;
70 : unsigned int pool_count;
71 :
72 : /* Lock the previous variables making up the global pool state */
73 : struct rwlock pool_lock = RWLOCK_INITIALIZER("pools");
74 :
75 : /* Private pool for page header structures */
76 : struct pool phpool;
77 :
78 : struct pool_lock_ops {
79 : void (*pl_init)(struct pool *, union pool_lock *,
80 : const struct lock_type *);
81 : void (*pl_enter)(union pool_lock * LOCK_FL_VARS);
82 : int (*pl_enter_try)(union pool_lock * LOCK_FL_VARS);
83 : void (*pl_leave)(union pool_lock * LOCK_FL_VARS);
84 : void (*pl_assert_locked)(union pool_lock *);
85 : void (*pl_assert_unlocked)(union pool_lock *);
86 : int (*pl_sleep)(void *, union pool_lock *, int, const char *, int);
87 : };
88 :
89 : static const struct pool_lock_ops pool_lock_ops_mtx;
90 : static const struct pool_lock_ops pool_lock_ops_rw;
91 :
92 : #ifdef WITNESS
93 : #define pl_init(pp, pl) do { \
94 : static const struct lock_type __lock_type = { .lt_name = #pl }; \
95 : (pp)->pr_lock_ops->pl_init(pp, pl, &__lock_type); \
96 : } while (0)
97 : #else /* WITNESS */
98 : #define pl_init(pp, pl) (pp)->pr_lock_ops->pl_init(pp, pl, NULL)
99 : #endif /* WITNESS */
100 :
101 : static inline void
102 0 : pl_enter(struct pool *pp, union pool_lock *pl LOCK_FL_VARS)
103 : {
104 111 : pp->pr_lock_ops->pl_enter(pl LOCK_FL_ARGS);
105 0 : }
106 : static inline int
107 0 : pl_enter_try(struct pool *pp, union pool_lock *pl LOCK_FL_VARS)
108 : {
109 0 : return pp->pr_lock_ops->pl_enter_try(pl LOCK_FL_ARGS);
110 : }
111 : static inline void
112 0 : pl_leave(struct pool *pp, union pool_lock *pl LOCK_FL_VARS)
113 : {
114 0 : pp->pr_lock_ops->pl_leave(pl LOCK_FL_ARGS);
115 0 : }
116 : static inline void
117 0 : pl_assert_locked(struct pool *pp, union pool_lock *pl)
118 : {
119 120 : pp->pr_lock_ops->pl_assert_locked(pl);
120 0 : }
121 : static inline void
122 0 : pl_assert_unlocked(struct pool *pp, union pool_lock *pl)
123 : {
124 0 : pp->pr_lock_ops->pl_assert_unlocked(pl);
125 0 : }
126 : static inline int
127 0 : pl_sleep(struct pool *pp, void *ident, union pool_lock *lock, int priority,
128 : const char *wmesg, int timo)
129 : {
130 0 : return pp->pr_lock_ops->pl_sleep(ident, lock, priority, wmesg, timo);
131 : }
132 :
133 : #ifdef WITNESS
134 : # define pl_enter(pp,pl) pl_enter(pp,pl LOCK_FILE_LINE)
135 : # define pl_enter_try(pp,pl) pl_enter_try(pp,pl LOCK_FILE_LINE)
136 : # define pl_leave(pp,pl) pl_leave(pp,pl LOCK_FILE_LINE)
137 : #endif
138 :
139 : struct pool_item {
140 : u_long pi_magic;
141 : XSIMPLEQ_ENTRY(pool_item) pi_list;
142 : };
143 : #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
144 :
145 : struct pool_page_header {
146 : /* Page headers */
147 : TAILQ_ENTRY(pool_page_header)
148 : ph_entry; /* pool page list */
149 : XSIMPLEQ_HEAD(, pool_item)
150 : ph_items; /* free items on the page */
151 : RBT_ENTRY(pool_page_header)
152 : ph_node; /* off-page page headers */
153 : unsigned int ph_nmissing; /* # of chunks in use */
154 : caddr_t ph_page; /* this page's address */
155 : caddr_t ph_colored; /* page's colored address */
156 : unsigned long ph_magic;
157 : int ph_tick;
158 : };
159 : #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
160 : #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
161 :
162 : #ifdef MULTIPROCESSOR
163 : struct pool_cache_item {
164 : struct pool_cache_item *ci_next; /* next item in list */
165 : unsigned long ci_nitems; /* number of items in list */
166 : TAILQ_ENTRY(pool_cache_item)
167 : ci_nextl; /* entry in list of lists */
168 : };
169 :
170 : /* we store whether the cached item is poisoned in the high bit of nitems */
171 : #define POOL_CACHE_ITEM_NITEMS_MASK 0x7ffffffUL
172 : #define POOL_CACHE_ITEM_NITEMS_POISON 0x8000000UL
173 :
174 : #define POOL_CACHE_ITEM_NITEMS(_ci) \
175 : ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK)
176 :
177 : #define POOL_CACHE_ITEM_POISONED(_ci) \
178 : ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON)
179 :
180 : struct pool_cache {
181 : struct pool_cache_item *pc_actv; /* active list of items */
182 : unsigned long pc_nactv; /* actv head nitems cache */
183 : struct pool_cache_item *pc_prev; /* previous list of items */
184 :
185 : uint64_t pc_gen; /* generation number */
186 : uint64_t pc_nget; /* # of successful requests */
187 : uint64_t pc_nfail; /* # of unsuccessful reqs */
188 : uint64_t pc_nput; /* # of releases */
189 : uint64_t pc_nlget; /* # of list requests */
190 : uint64_t pc_nlfail; /* # of fails getting a list */
191 : uint64_t pc_nlput; /* # of list releases */
192 :
193 : int pc_nout;
194 : };
195 :
196 : void *pool_cache_get(struct pool *);
197 : void pool_cache_put(struct pool *, void *);
198 : void pool_cache_destroy(struct pool *);
199 : void pool_cache_gc(struct pool *);
200 : #endif
201 : void pool_cache_pool_info(struct pool *, struct kinfo_pool *);
202 : int pool_cache_info(struct pool *, void *, size_t *);
203 : int pool_cache_cpus_info(struct pool *, void *, size_t *);
204 :
205 : #ifdef POOL_DEBUG
206 : int pool_debug = 1;
207 : #else
208 : int pool_debug = 0;
209 : #endif
210 :
211 : #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0)
212 :
213 : struct pool_page_header *
214 : pool_p_alloc(struct pool *, int, int *);
215 : void pool_p_insert(struct pool *, struct pool_page_header *);
216 : void pool_p_remove(struct pool *, struct pool_page_header *);
217 : void pool_p_free(struct pool *, struct pool_page_header *);
218 :
219 : void pool_update_curpage(struct pool *);
220 : void *pool_do_get(struct pool *, int, int *);
221 : void pool_do_put(struct pool *, void *);
222 : int pool_chk_page(struct pool *, struct pool_page_header *, int);
223 : int pool_chk(struct pool *);
224 : void pool_get_done(struct pool *, void *, void *);
225 : void pool_runqueue(struct pool *, int);
226 :
227 : void *pool_allocator_alloc(struct pool *, int, int *);
228 : void pool_allocator_free(struct pool *, void *);
229 :
230 : /*
231 : * The default pool allocator.
232 : */
233 : void *pool_page_alloc(struct pool *, int, int *);
234 : void pool_page_free(struct pool *, void *);
235 :
236 : /*
237 : * safe for interrupts; this is the default allocator
238 : */
239 : struct pool_allocator pool_allocator_single = {
240 : pool_page_alloc,
241 : pool_page_free,
242 : POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED)
243 : };
244 :
245 : void *pool_multi_alloc(struct pool *, int, int *);
246 : void pool_multi_free(struct pool *, void *);
247 :
248 : struct pool_allocator pool_allocator_multi = {
249 : pool_multi_alloc,
250 : pool_multi_free,
251 : POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
252 : };
253 :
254 : void *pool_multi_alloc_ni(struct pool *, int, int *);
255 : void pool_multi_free_ni(struct pool *, void *);
256 :
257 : struct pool_allocator pool_allocator_multi_ni = {
258 : pool_multi_alloc_ni,
259 : pool_multi_free_ni,
260 : POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
261 : };
262 :
263 : #ifdef DDB
264 : void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
265 : __attribute__((__format__(__kprintf__,1,2))));
266 : void pool_print1(struct pool *, const char *, int (*)(const char *, ...)
267 : __attribute__((__format__(__kprintf__,1,2))));
268 : #endif
269 :
270 : /* stale page garbage collectors */
271 : void pool_gc_sched(void *);
272 : struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
273 : void pool_gc_pages(void *);
274 : struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
275 : int pool_wait_free = 1;
276 : int pool_wait_gc = 8;
277 :
278 0 : RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
279 :
280 : static inline int
281 0 : phtree_compare(const struct pool_page_header *a,
282 : const struct pool_page_header *b)
283 : {
284 0 : vaddr_t va = (vaddr_t)a->ph_page;
285 0 : vaddr_t vb = (vaddr_t)b->ph_page;
286 :
287 : /* the compares in this order are important for the NFIND to work */
288 0 : if (vb < va)
289 0 : return (-1);
290 0 : if (vb > va)
291 0 : return (1);
292 :
293 0 : return (0);
294 0 : }
295 :
296 0 : RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare);
297 :
298 : /*
299 : * Return the pool page header based on page address.
300 : */
301 : static inline struct pool_page_header *
302 0 : pr_find_pagehead(struct pool *pp, void *v)
303 : {
304 0 : struct pool_page_header *ph, key;
305 :
306 0 : if (POOL_INPGHDR(pp)) {
307 : caddr_t page;
308 :
309 0 : page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
310 :
311 0 : return ((struct pool_page_header *)(page + pp->pr_phoffset));
312 : }
313 :
314 0 : key.ph_page = v;
315 0 : ph = RBT_NFIND(phtree, &pp->pr_phtree, &key);
316 0 : if (ph == NULL)
317 0 : panic("%s: %s: page header missing", __func__, pp->pr_wchan);
318 :
319 0 : KASSERT(ph->ph_page <= (caddr_t)v);
320 0 : if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v)
321 0 : panic("%s: %s: incorrect page", __func__, pp->pr_wchan);
322 :
323 0 : return (ph);
324 0 : }
325 :
326 : /*
327 : * Initialize the given pool resource structure.
328 : *
329 : * We export this routine to allow other kernel parts to declare
330 : * static pools that must be initialized before malloc() is available.
331 : */
332 : void
333 0 : pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags,
334 : const char *wchan, struct pool_allocator *palloc)
335 : {
336 : int off = 0, space;
337 : unsigned int pgsize = PAGE_SIZE, items;
338 : size_t pa_pagesz;
339 : #ifdef DIAGNOSTIC
340 : struct pool *iter;
341 : #endif
342 :
343 0 : if (align == 0)
344 0 : align = ALIGN(1);
345 :
346 0 : if (size < sizeof(struct pool_item))
347 0 : size = sizeof(struct pool_item);
348 :
349 0 : size = roundup(size, align);
350 :
351 0 : while (size * 8 > pgsize)
352 0 : pgsize <<= 1;
353 :
354 0 : if (palloc == NULL) {
355 0 : if (pgsize > PAGE_SIZE) {
356 0 : palloc = ISSET(flags, PR_WAITOK) ?
357 : &pool_allocator_multi_ni : &pool_allocator_multi;
358 0 : } else
359 : palloc = &pool_allocator_single;
360 :
361 0 : pa_pagesz = palloc->pa_pagesz;
362 0 : } else {
363 : size_t pgsizes;
364 :
365 0 : pa_pagesz = palloc->pa_pagesz;
366 0 : if (pa_pagesz == 0)
367 : pa_pagesz = POOL_ALLOC_DEFAULT;
368 :
369 0 : pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED;
370 :
371 : /* make sure the allocator can fit at least one item */
372 0 : if (size > pgsizes) {
373 0 : panic("%s: pool %s item size 0x%zx > "
374 : "allocator %p sizes 0x%zx", __func__, wchan,
375 : size, palloc, pgsizes);
376 : }
377 :
378 : /* shrink pgsize until it fits into the range */
379 0 : while (!ISSET(pgsizes, pgsize))
380 0 : pgsize >>= 1;
381 : }
382 0 : KASSERT(ISSET(pa_pagesz, pgsize));
383 :
384 0 : items = pgsize / size;
385 :
386 : /*
387 : * Decide whether to put the page header off page to avoid
388 : * wasting too large a part of the page. Off-page page headers
389 : * go into an RB tree, so we can match a returned item with
390 : * its header based on the page address.
391 : */
392 0 : if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) {
393 0 : if (pgsize - (size * items) >
394 : sizeof(struct pool_page_header)) {
395 0 : off = pgsize - sizeof(struct pool_page_header);
396 0 : } else if (sizeof(struct pool_page_header) * 2 >= size) {
397 0 : off = pgsize - sizeof(struct pool_page_header);
398 0 : items = off / size;
399 0 : }
400 : }
401 :
402 0 : KASSERT(items > 0);
403 :
404 : /*
405 : * Initialize the pool structure.
406 : */
407 0 : memset(pp, 0, sizeof(*pp));
408 0 : if (ISSET(flags, PR_RWLOCK)) {
409 0 : KASSERT(flags & PR_WAITOK);
410 0 : pp->pr_lock_ops = &pool_lock_ops_rw;
411 0 : } else
412 0 : pp->pr_lock_ops = &pool_lock_ops_mtx;
413 0 : TAILQ_INIT(&pp->pr_emptypages);
414 0 : TAILQ_INIT(&pp->pr_fullpages);
415 0 : TAILQ_INIT(&pp->pr_partpages);
416 0 : pp->pr_curpage = NULL;
417 0 : pp->pr_npages = 0;
418 0 : pp->pr_minitems = 0;
419 0 : pp->pr_minpages = 0;
420 0 : pp->pr_maxpages = 8;
421 0 : pp->pr_size = size;
422 0 : pp->pr_pgsize = pgsize;
423 0 : pp->pr_pgmask = ~0UL ^ (pgsize - 1);
424 0 : pp->pr_phoffset = off;
425 0 : pp->pr_itemsperpage = items;
426 0 : pp->pr_wchan = wchan;
427 0 : pp->pr_alloc = palloc;
428 0 : pp->pr_nitems = 0;
429 0 : pp->pr_nout = 0;
430 0 : pp->pr_hardlimit = UINT_MAX;
431 0 : pp->pr_hardlimit_warning = NULL;
432 0 : pp->pr_hardlimit_ratecap.tv_sec = 0;
433 0 : pp->pr_hardlimit_ratecap.tv_usec = 0;
434 0 : pp->pr_hardlimit_warning_last.tv_sec = 0;
435 0 : pp->pr_hardlimit_warning_last.tv_usec = 0;
436 0 : RBT_INIT(phtree, &pp->pr_phtree);
437 :
438 : /*
439 : * Use the space between the chunks and the page header
440 : * for cache coloring.
441 : */
442 0 : space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize;
443 0 : space -= pp->pr_itemsperpage * pp->pr_size;
444 0 : pp->pr_align = align;
445 0 : pp->pr_maxcolors = (space / align) + 1;
446 :
447 0 : pp->pr_nget = 0;
448 0 : pp->pr_nfail = 0;
449 0 : pp->pr_nput = 0;
450 0 : pp->pr_npagealloc = 0;
451 0 : pp->pr_npagefree = 0;
452 0 : pp->pr_hiwat = 0;
453 0 : pp->pr_nidle = 0;
454 :
455 0 : pp->pr_ipl = ipl;
456 0 : pp->pr_flags = flags;
457 :
458 0 : pl_init(pp, &pp->pr_lock);
459 0 : pl_init(pp, &pp->pr_requests_lock);
460 0 : TAILQ_INIT(&pp->pr_requests);
461 :
462 0 : if (phpool.pr_size == 0) {
463 0 : pool_init(&phpool, sizeof(struct pool_page_header), 0,
464 : IPL_HIGH, 0, "phpool", NULL);
465 :
466 : /* make sure phpool wont "recurse" */
467 0 : KASSERT(POOL_INPGHDR(&phpool));
468 : }
469 :
470 : /* pglistalloc/constraint parameters */
471 0 : pp->pr_crange = &kp_dirty;
472 :
473 : /* Insert this into the list of all pools. */
474 0 : rw_enter_write(&pool_lock);
475 : #ifdef DIAGNOSTIC
476 0 : SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
477 0 : if (iter == pp)
478 0 : panic("%s: pool %s already on list", __func__, wchan);
479 : }
480 : #endif
481 :
482 0 : pp->pr_serial = ++pool_serial;
483 0 : if (pool_serial == 0)
484 0 : panic("%s: too much uptime", __func__);
485 :
486 0 : SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
487 0 : pool_count++;
488 0 : rw_exit_write(&pool_lock);
489 0 : }
490 :
491 : /*
492 : * Decommission a pool resource.
493 : */
494 : void
495 0 : pool_destroy(struct pool *pp)
496 : {
497 : struct pool_page_header *ph;
498 : struct pool *prev, *iter;
499 :
500 : #ifdef MULTIPROCESSOR
501 0 : if (pp->pr_cache != NULL)
502 0 : pool_cache_destroy(pp);
503 : #endif
504 :
505 : #ifdef DIAGNOSTIC
506 0 : if (pp->pr_nout != 0)
507 0 : panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
508 : #endif
509 :
510 : /* Remove from global pool list */
511 0 : rw_enter_write(&pool_lock);
512 0 : pool_count--;
513 0 : if (pp == SIMPLEQ_FIRST(&pool_head))
514 0 : SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist);
515 : else {
516 : prev = SIMPLEQ_FIRST(&pool_head);
517 0 : SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
518 0 : if (iter == pp) {
519 0 : SIMPLEQ_REMOVE_AFTER(&pool_head, prev,
520 : pr_poollist);
521 : break;
522 : }
523 : prev = iter;
524 : }
525 : }
526 0 : rw_exit_write(&pool_lock);
527 :
528 : /* Remove all pages */
529 0 : while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) {
530 0 : pl_enter(pp, &pp->pr_lock);
531 0 : pool_p_remove(pp, ph);
532 0 : pl_leave(pp, &pp->pr_lock);
533 0 : pool_p_free(pp, ph);
534 : }
535 0 : KASSERT(TAILQ_EMPTY(&pp->pr_fullpages));
536 0 : KASSERT(TAILQ_EMPTY(&pp->pr_partpages));
537 0 : }
538 :
539 : void
540 0 : pool_request_init(struct pool_request *pr,
541 : void (*handler)(struct pool *, void *, void *), void *cookie)
542 : {
543 0 : pr->pr_handler = handler;
544 0 : pr->pr_cookie = cookie;
545 0 : pr->pr_item = NULL;
546 0 : }
547 :
548 : void
549 0 : pool_request(struct pool *pp, struct pool_request *pr)
550 : {
551 0 : pl_enter(pp, &pp->pr_requests_lock);
552 0 : TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
553 0 : pool_runqueue(pp, PR_NOWAIT);
554 0 : pl_leave(pp, &pp->pr_requests_lock);
555 0 : }
556 :
557 : struct pool_get_memory {
558 : union pool_lock lock;
559 : void * volatile v;
560 : };
561 :
562 : /*
563 : * Grab an item from the pool.
564 : */
565 : void *
566 0 : pool_get(struct pool *pp, int flags)
567 : {
568 : void *v = NULL;
569 120 : int slowdown = 0;
570 :
571 0 : KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
572 0 : if (pp->pr_flags & PR_RWLOCK)
573 0 : KASSERT(flags & PR_WAITOK);
574 :
575 : #ifdef MULTIPROCESSOR
576 120 : if (pp->pr_cache != NULL) {
577 0 : v = pool_cache_get(pp);
578 0 : if (v != NULL)
579 : goto good;
580 : }
581 : #endif
582 :
583 0 : pl_enter(pp, &pp->pr_lock);
584 0 : if (pp->pr_nout >= pp->pr_hardlimit) {
585 0 : if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL))
586 : goto fail;
587 120 : } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) {
588 0 : if (ISSET(flags, PR_NOWAIT))
589 : goto fail;
590 : }
591 0 : pl_leave(pp, &pp->pr_lock);
592 :
593 0 : if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK))
594 0 : yield();
595 :
596 120 : if (v == NULL) {
597 0 : struct pool_get_memory mem = { .v = NULL };
598 0 : struct pool_request pr;
599 :
600 : #ifdef DIAGNOSTIC
601 0 : if (ISSET(flags, PR_WAITOK) && curproc == &proc0)
602 0 : panic("%s: cannot sleep for memory during boot",
603 : __func__);
604 : #endif
605 0 : pl_init(pp, &mem.lock);
606 0 : pool_request_init(&pr, pool_get_done, &mem);
607 0 : pool_request(pp, &pr);
608 :
609 0 : pl_enter(pp, &mem.lock);
610 0 : while (mem.v == NULL)
611 0 : pl_sleep(pp, &mem, &mem.lock, PSWP, pp->pr_wchan, 0);
612 0 : pl_leave(pp, &mem.lock);
613 :
614 0 : v = mem.v;
615 0 : }
616 :
617 : #ifdef MULTIPROCESSOR
618 : good:
619 : #endif
620 240 : if (ISSET(flags, PR_ZERO))
621 0 : memset(v, 0, pp->pr_size);
622 :
623 0 : return (v);
624 :
625 : fail:
626 0 : pp->pr_nfail++;
627 0 : pl_leave(pp, &pp->pr_lock);
628 0 : return (NULL);
629 0 : }
630 :
631 : void
632 0 : pool_get_done(struct pool *pp, void *xmem, void *v)
633 : {
634 0 : struct pool_get_memory *mem = xmem;
635 :
636 0 : pl_enter(pp, &mem->lock);
637 0 : mem->v = v;
638 0 : pl_leave(pp, &mem->lock);
639 :
640 0 : wakeup_one(mem);
641 0 : }
642 :
643 : void
644 0 : pool_runqueue(struct pool *pp, int flags)
645 : {
646 0 : struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl);
647 : struct pool_request *pr;
648 :
649 0 : pl_assert_unlocked(pp, &pp->pr_lock);
650 0 : pl_assert_locked(pp, &pp->pr_requests_lock);
651 :
652 0 : if (pp->pr_requesting++)
653 0 : return;
654 :
655 0 : do {
656 0 : pp->pr_requesting = 1;
657 :
658 : /* no TAILQ_JOIN? :( */
659 0 : while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) {
660 0 : TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry);
661 0 : TAILQ_INSERT_TAIL(&prl, pr, pr_entry);
662 : }
663 0 : if (TAILQ_EMPTY(&prl))
664 : continue;
665 :
666 0 : pl_leave(pp, &pp->pr_requests_lock);
667 :
668 0 : pl_enter(pp, &pp->pr_lock);
669 0 : pr = TAILQ_FIRST(&prl);
670 0 : while (pr != NULL) {
671 0 : int slowdown = 0;
672 :
673 0 : if (pp->pr_nout >= pp->pr_hardlimit)
674 0 : break;
675 :
676 0 : pr->pr_item = pool_do_get(pp, flags, &slowdown);
677 0 : if (pr->pr_item == NULL) /* || slowdown ? */
678 0 : break;
679 :
680 0 : pr = TAILQ_NEXT(pr, pr_entry);
681 0 : }
682 0 : pl_leave(pp, &pp->pr_lock);
683 :
684 0 : while ((pr = TAILQ_FIRST(&prl)) != NULL &&
685 0 : pr->pr_item != NULL) {
686 0 : TAILQ_REMOVE(&prl, pr, pr_entry);
687 0 : (*pr->pr_handler)(pp, pr->pr_cookie, pr->pr_item);
688 : }
689 :
690 0 : pl_enter(pp, &pp->pr_requests_lock);
691 0 : } while (--pp->pr_requesting);
692 :
693 : /* no TAILQ_JOIN :( */
694 0 : while ((pr = TAILQ_FIRST(&prl)) != NULL) {
695 0 : TAILQ_REMOVE(&prl, pr, pr_entry);
696 0 : TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
697 : }
698 0 : }
699 :
700 : void *
701 0 : pool_do_get(struct pool *pp, int flags, int *slowdown)
702 : {
703 : struct pool_item *pi;
704 : struct pool_page_header *ph;
705 :
706 0 : pl_assert_locked(pp, &pp->pr_lock);
707 :
708 120 : splassert(pp->pr_ipl);
709 :
710 : /*
711 : * Account for this item now to avoid races if we need to give up
712 : * pr_lock to allocate a page.
713 : */
714 0 : pp->pr_nout++;
715 :
716 0 : if (pp->pr_curpage == NULL) {
717 0 : pl_leave(pp, &pp->pr_lock);
718 0 : ph = pool_p_alloc(pp, flags, slowdown);
719 0 : pl_enter(pp, &pp->pr_lock);
720 :
721 0 : if (ph == NULL) {
722 0 : pp->pr_nout--;
723 0 : return (NULL);
724 : }
725 :
726 0 : pool_p_insert(pp, ph);
727 0 : }
728 :
729 120 : ph = pp->pr_curpage;
730 0 : pi = XSIMPLEQ_FIRST(&ph->ph_items);
731 0 : if (__predict_false(pi == NULL))
732 0 : panic("%s: %s: page empty", __func__, pp->pr_wchan);
733 :
734 0 : if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
735 0 : panic("%s: %s free list modified: "
736 : "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx",
737 0 : __func__, pp->pr_wchan, ph->ph_page, pi,
738 : 0, pi->pi_magic, POOL_IMAGIC(ph, pi));
739 : }
740 :
741 1 : XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list);
742 :
743 : #ifdef DIAGNOSTIC
744 119 : if (pool_debug && POOL_PHPOISON(ph)) {
745 0 : size_t pidx;
746 0 : uint32_t pval;
747 0 : if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
748 : &pidx, &pval)) {
749 0 : int *ip = (int *)(pi + 1);
750 0 : panic("%s: %s free list modified: "
751 : "page %p; item addr %p; offset 0x%zx=0x%x",
752 0 : __func__, pp->pr_wchan, ph->ph_page, pi,
753 0 : (pidx * sizeof(int)) + sizeof(*pi), ip[pidx]);
754 : }
755 120 : }
756 : #endif /* DIAGNOSTIC */
757 :
758 0 : if (ph->ph_nmissing++ == 0) {
759 : /*
760 : * This page was previously empty. Move it to the list of
761 : * partially-full pages. This page is already curpage.
762 : */
763 0 : TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
764 0 : TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
765 :
766 0 : pp->pr_nidle--;
767 0 : }
768 :
769 239 : if (ph->ph_nmissing == pp->pr_itemsperpage) {
770 : /*
771 : * This page is now full. Move it to the full list
772 : * and select a new current page.
773 : */
774 1 : TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
775 0 : TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry);
776 0 : pool_update_curpage(pp);
777 0 : }
778 :
779 0 : pp->pr_nget++;
780 :
781 0 : return (pi);
782 0 : }
783 :
784 : /*
785 : * Return resource to the pool.
786 : */
787 : void
788 0 : pool_put(struct pool *pp, void *v)
789 : {
790 : struct pool_page_header *ph, *freeph = NULL;
791 :
792 : #ifdef DIAGNOSTIC
793 111 : if (v == NULL)
794 0 : panic("%s: NULL item", __func__);
795 : #endif
796 :
797 : #ifdef MULTIPROCESSOR
798 0 : if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) {
799 0 : pool_cache_put(pp, v);
800 0 : return;
801 : }
802 : #endif
803 :
804 0 : pl_enter(pp, &pp->pr_lock);
805 :
806 0 : pool_do_put(pp, v);
807 :
808 0 : pp->pr_nout--;
809 0 : pp->pr_nput++;
810 :
811 : /* is it time to free a page? */
812 94 : if (pp->pr_nidle > pp->pr_maxpages &&
813 0 : (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
814 0 : (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
815 : freeph = ph;
816 0 : pool_p_remove(pp, freeph);
817 0 : }
818 :
819 0 : pl_leave(pp, &pp->pr_lock);
820 :
821 0 : if (freeph != NULL)
822 0 : pool_p_free(pp, freeph);
823 :
824 92 : if (!TAILQ_EMPTY(&pp->pr_requests)) {
825 0 : pl_enter(pp, &pp->pr_requests_lock);
826 0 : pool_runqueue(pp, PR_NOWAIT);
827 0 : pl_leave(pp, &pp->pr_requests_lock);
828 0 : }
829 91 : }
830 :
831 : void
832 0 : pool_do_put(struct pool *pp, void *v)
833 : {
834 0 : struct pool_item *pi = v;
835 : struct pool_page_header *ph;
836 :
837 207 : splassert(pp->pr_ipl);
838 :
839 0 : ph = pr_find_pagehead(pp, v);
840 :
841 : #ifdef DIAGNOSTIC
842 0 : if (pool_debug) {
843 : struct pool_item *qi;
844 1 : XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) {
845 3558 : if (pi == qi) {
846 0 : panic("%s: %s: double pool_put: %p", __func__,
847 0 : pp->pr_wchan, pi);
848 : }
849 : }
850 0 : }
851 : #endif /* DIAGNOSTIC */
852 :
853 0 : pi->pi_magic = POOL_IMAGIC(ph, pi);
854 95 : XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
855 : #ifdef DIAGNOSTIC
856 0 : if (POOL_PHPOISON(ph))
857 94 : poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
858 : #endif /* DIAGNOSTIC */
859 :
860 0 : if (ph->ph_nmissing-- == pp->pr_itemsperpage) {
861 : /*
862 : * The page was previously completely full, move it to the
863 : * partially-full list.
864 : */
865 1 : TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry);
866 0 : TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
867 0 : }
868 :
869 187 : if (ph->ph_nmissing == 0) {
870 : /*
871 : * The page is now empty, so move it to the empty page list.
872 : */
873 0 : pp->pr_nidle++;
874 :
875 0 : ph->ph_tick = ticks;
876 0 : TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
877 0 : TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
878 0 : pool_update_curpage(pp);
879 0 : }
880 0 : }
881 :
882 : /*
883 : * Add N items to the pool.
884 : */
885 : int
886 0 : pool_prime(struct pool *pp, int n)
887 : {
888 0 : struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
889 : struct pool_page_header *ph;
890 : int newpages;
891 :
892 0 : newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
893 :
894 0 : while (newpages-- > 0) {
895 0 : int slowdown = 0;
896 :
897 0 : ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown);
898 0 : if (ph == NULL) /* or slowdown? */
899 0 : break;
900 :
901 0 : TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
902 0 : }
903 :
904 0 : pl_enter(pp, &pp->pr_lock);
905 0 : while ((ph = TAILQ_FIRST(&pl)) != NULL) {
906 0 : TAILQ_REMOVE(&pl, ph, ph_entry);
907 0 : pool_p_insert(pp, ph);
908 : }
909 0 : pl_leave(pp, &pp->pr_lock);
910 :
911 0 : return (0);
912 0 : }
913 :
914 : struct pool_page_header *
915 0 : pool_p_alloc(struct pool *pp, int flags, int *slowdown)
916 : {
917 : struct pool_page_header *ph;
918 : struct pool_item *pi;
919 : caddr_t addr;
920 : unsigned int order;
921 : int o;
922 : int n;
923 :
924 0 : pl_assert_unlocked(pp, &pp->pr_lock);
925 0 : KASSERT(pp->pr_size >= sizeof(*pi));
926 :
927 0 : addr = pool_allocator_alloc(pp, flags, slowdown);
928 0 : if (addr == NULL)
929 0 : return (NULL);
930 :
931 0 : if (POOL_INPGHDR(pp))
932 0 : ph = (struct pool_page_header *)(addr + pp->pr_phoffset);
933 : else {
934 0 : ph = pool_get(&phpool, flags);
935 0 : if (ph == NULL) {
936 0 : pool_allocator_free(pp, addr);
937 0 : return (NULL);
938 : }
939 : }
940 :
941 0 : XSIMPLEQ_INIT(&ph->ph_items);
942 0 : ph->ph_page = addr;
943 0 : addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors);
944 0 : ph->ph_colored = addr;
945 0 : ph->ph_nmissing = 0;
946 0 : arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic));
947 : #ifdef DIAGNOSTIC
948 : /* use a bit in ph_magic to record if we poison page items */
949 0 : if (pool_debug)
950 0 : SET(ph->ph_magic, POOL_MAGICBIT);
951 : else
952 0 : CLR(ph->ph_magic, POOL_MAGICBIT);
953 : #endif /* DIAGNOSTIC */
954 :
955 0 : n = pp->pr_itemsperpage;
956 : o = 32;
957 0 : while (n--) {
958 0 : pi = (struct pool_item *)addr;
959 0 : pi->pi_magic = POOL_IMAGIC(ph, pi);
960 :
961 0 : if (o == 32) {
962 0 : order = arc4random();
963 : o = 0;
964 0 : }
965 0 : if (ISSET(order, 1 << o++))
966 0 : XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list);
967 : else
968 0 : XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
969 :
970 : #ifdef DIAGNOSTIC
971 0 : if (POOL_PHPOISON(ph))
972 0 : poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
973 : #endif /* DIAGNOSTIC */
974 :
975 0 : addr += pp->pr_size;
976 : }
977 :
978 0 : return (ph);
979 0 : }
980 :
981 : void
982 0 : pool_p_free(struct pool *pp, struct pool_page_header *ph)
983 : {
984 : struct pool_item *pi;
985 :
986 0 : pl_assert_unlocked(pp, &pp->pr_lock);
987 0 : KASSERT(ph->ph_nmissing == 0);
988 :
989 0 : XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
990 0 : if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
991 0 : panic("%s: %s free list modified: "
992 : "page %p; item addr %p; offset 0x%x=0x%lx",
993 0 : __func__, pp->pr_wchan, ph->ph_page, pi,
994 : 0, pi->pi_magic);
995 : }
996 :
997 : #ifdef DIAGNOSTIC
998 0 : if (POOL_PHPOISON(ph)) {
999 0 : size_t pidx;
1000 0 : uint32_t pval;
1001 0 : if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
1002 : &pidx, &pval)) {
1003 0 : int *ip = (int *)(pi + 1);
1004 0 : panic("%s: %s free list modified: "
1005 : "page %p; item addr %p; offset 0x%zx=0x%x",
1006 0 : __func__, pp->pr_wchan, ph->ph_page, pi,
1007 0 : pidx * sizeof(int), ip[pidx]);
1008 : }
1009 0 : }
1010 : #endif
1011 : }
1012 :
1013 0 : pool_allocator_free(pp, ph->ph_page);
1014 :
1015 0 : if (!POOL_INPGHDR(pp))
1016 0 : pool_put(&phpool, ph);
1017 0 : }
1018 :
1019 : void
1020 0 : pool_p_insert(struct pool *pp, struct pool_page_header *ph)
1021 : {
1022 0 : pl_assert_locked(pp, &pp->pr_lock);
1023 :
1024 : /* If the pool was depleted, point at the new page */
1025 0 : if (pp->pr_curpage == NULL)
1026 0 : pp->pr_curpage = ph;
1027 :
1028 0 : TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
1029 0 : if (!POOL_INPGHDR(pp))
1030 0 : RBT_INSERT(phtree, &pp->pr_phtree, ph);
1031 :
1032 0 : pp->pr_nitems += pp->pr_itemsperpage;
1033 0 : pp->pr_nidle++;
1034 :
1035 0 : pp->pr_npagealloc++;
1036 0 : if (++pp->pr_npages > pp->pr_hiwat)
1037 0 : pp->pr_hiwat = pp->pr_npages;
1038 0 : }
1039 :
1040 : void
1041 0 : pool_p_remove(struct pool *pp, struct pool_page_header *ph)
1042 : {
1043 0 : pl_assert_locked(pp, &pp->pr_lock);
1044 :
1045 0 : pp->pr_npagefree++;
1046 0 : pp->pr_npages--;
1047 0 : pp->pr_nidle--;
1048 0 : pp->pr_nitems -= pp->pr_itemsperpage;
1049 :
1050 0 : if (!POOL_INPGHDR(pp))
1051 0 : RBT_REMOVE(phtree, &pp->pr_phtree, ph);
1052 0 : TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
1053 :
1054 0 : pool_update_curpage(pp);
1055 0 : }
1056 :
1057 : void
1058 0 : pool_update_curpage(struct pool *pp)
1059 : {
1060 0 : pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist);
1061 1 : if (pp->pr_curpage == NULL) {
1062 0 : pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist);
1063 0 : }
1064 0 : }
1065 :
1066 : void
1067 0 : pool_setlowat(struct pool *pp, int n)
1068 : {
1069 : int prime = 0;
1070 :
1071 0 : pl_enter(pp, &pp->pr_lock);
1072 0 : pp->pr_minitems = n;
1073 0 : pp->pr_minpages = (n == 0)
1074 : ? 0
1075 0 : : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1076 :
1077 0 : if (pp->pr_nitems < n)
1078 0 : prime = n - pp->pr_nitems;
1079 0 : pl_leave(pp, &pp->pr_lock);
1080 :
1081 0 : if (prime > 0)
1082 0 : pool_prime(pp, prime);
1083 0 : }
1084 :
1085 : void
1086 0 : pool_sethiwat(struct pool *pp, int n)
1087 : {
1088 0 : pp->pr_maxpages = (n == 0)
1089 : ? 0
1090 0 : : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1091 0 : }
1092 :
1093 : int
1094 0 : pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
1095 : {
1096 : int error = 0;
1097 :
1098 0 : if (n < pp->pr_nout) {
1099 : error = EINVAL;
1100 0 : goto done;
1101 : }
1102 :
1103 0 : pp->pr_hardlimit = n;
1104 0 : pp->pr_hardlimit_warning = warnmsg;
1105 0 : pp->pr_hardlimit_ratecap.tv_sec = ratecap;
1106 0 : pp->pr_hardlimit_warning_last.tv_sec = 0;
1107 0 : pp->pr_hardlimit_warning_last.tv_usec = 0;
1108 :
1109 : done:
1110 0 : return (error);
1111 : }
1112 :
1113 : void
1114 0 : pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
1115 : {
1116 0 : pp->pr_crange = mode;
1117 0 : }
1118 :
1119 : /*
1120 : * Release all complete pages that have not been used recently.
1121 : *
1122 : * Returns non-zero if any pages have been reclaimed.
1123 : */
1124 : int
1125 0 : pool_reclaim(struct pool *pp)
1126 : {
1127 : struct pool_page_header *ph, *phnext;
1128 0 : struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
1129 :
1130 0 : pl_enter(pp, &pp->pr_lock);
1131 0 : for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
1132 0 : phnext = TAILQ_NEXT(ph, ph_entry);
1133 :
1134 : /* Check our minimum page claim */
1135 0 : if (pp->pr_npages <= pp->pr_minpages)
1136 : break;
1137 :
1138 : /*
1139 : * If freeing this page would put us below
1140 : * the low water mark, stop now.
1141 : */
1142 0 : if ((pp->pr_nitems - pp->pr_itemsperpage) <
1143 0 : pp->pr_minitems)
1144 : break;
1145 :
1146 0 : pool_p_remove(pp, ph);
1147 0 : TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
1148 : }
1149 0 : pl_leave(pp, &pp->pr_lock);
1150 :
1151 0 : if (TAILQ_EMPTY(&pl))
1152 0 : return (0);
1153 :
1154 0 : while ((ph = TAILQ_FIRST(&pl)) != NULL) {
1155 0 : TAILQ_REMOVE(&pl, ph, ph_entry);
1156 0 : pool_p_free(pp, ph);
1157 : }
1158 :
1159 0 : return (1);
1160 0 : }
1161 :
1162 : /*
1163 : * Release all complete pages that have not been used recently
1164 : * from all pools.
1165 : */
1166 : void
1167 0 : pool_reclaim_all(void)
1168 : {
1169 : struct pool *pp;
1170 :
1171 0 : rw_enter_read(&pool_lock);
1172 0 : SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist)
1173 0 : pool_reclaim(pp);
1174 0 : rw_exit_read(&pool_lock);
1175 0 : }
1176 :
1177 : #ifdef DDB
1178 : #include <machine/db_machdep.h>
1179 : #include <ddb/db_output.h>
1180 :
1181 : /*
1182 : * Diagnostic helpers.
1183 : */
1184 : void
1185 0 : pool_printit(struct pool *pp, const char *modif,
1186 : int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1187 : {
1188 0 : pool_print1(pp, modif, pr);
1189 0 : }
1190 :
1191 : void
1192 0 : pool_print_pagelist(struct pool_pagelist *pl,
1193 : int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1194 : {
1195 : struct pool_page_header *ph;
1196 : struct pool_item *pi;
1197 :
1198 0 : TAILQ_FOREACH(ph, pl, ph_entry) {
1199 0 : (*pr)("\t\tpage %p, color %p, nmissing %d\n",
1200 0 : ph->ph_page, ph->ph_colored, ph->ph_nmissing);
1201 0 : XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1202 0 : if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1203 0 : (*pr)("\t\t\titem %p, magic 0x%lx\n",
1204 : pi, pi->pi_magic);
1205 0 : }
1206 : }
1207 : }
1208 0 : }
1209 :
1210 : void
1211 0 : pool_print1(struct pool *pp, const char *modif,
1212 : int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1213 : {
1214 : struct pool_page_header *ph;
1215 : int print_pagelist = 0;
1216 : char c;
1217 :
1218 0 : while ((c = *modif++) != '\0') {
1219 0 : if (c == 'p')
1220 0 : print_pagelist = 1;
1221 0 : modif++;
1222 : }
1223 :
1224 0 : (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size,
1225 0 : pp->pr_maxcolors);
1226 0 : (*pr)("\talloc %p\n", pp->pr_alloc);
1227 0 : (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1228 0 : pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1229 0 : (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1230 0 : pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1231 :
1232 0 : (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1233 0 : pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1234 0 : (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1235 0 : pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1236 :
1237 0 : if (print_pagelist == 0)
1238 0 : return;
1239 :
1240 0 : if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL)
1241 0 : (*pr)("\n\tempty page list:\n");
1242 0 : pool_print_pagelist(&pp->pr_emptypages, pr);
1243 0 : if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL)
1244 0 : (*pr)("\n\tfull page list:\n");
1245 0 : pool_print_pagelist(&pp->pr_fullpages, pr);
1246 0 : if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL)
1247 0 : (*pr)("\n\tpartial-page list:\n");
1248 0 : pool_print_pagelist(&pp->pr_partpages, pr);
1249 :
1250 0 : if (pp->pr_curpage == NULL)
1251 0 : (*pr)("\tno current page\n");
1252 : else
1253 0 : (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1254 0 : }
1255 :
1256 : void
1257 0 : db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1258 : {
1259 : struct pool *pp;
1260 0 : char maxp[16];
1261 : int ovflw;
1262 : char mode;
1263 :
1264 0 : mode = modif[0];
1265 0 : if (mode != '\0' && mode != 'a') {
1266 0 : db_printf("usage: show all pools [/a]\n");
1267 0 : return;
1268 : }
1269 :
1270 0 : if (mode == '\0')
1271 0 : db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1272 : "Name",
1273 : "Size",
1274 : "Requests",
1275 : "Fail",
1276 : "Releases",
1277 : "Pgreq",
1278 : "Pgrel",
1279 : "Npage",
1280 : "Hiwat",
1281 : "Minpg",
1282 : "Maxpg",
1283 : "Idle");
1284 : else
1285 0 : db_printf("%-12s %18s %18s\n",
1286 : "Name", "Address", "Allocator");
1287 :
1288 0 : SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1289 0 : if (mode == 'a') {
1290 0 : db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp,
1291 0 : pp->pr_alloc);
1292 0 : continue;
1293 : }
1294 :
1295 0 : if (!pp->pr_nget)
1296 : continue;
1297 :
1298 0 : if (pp->pr_maxpages == UINT_MAX)
1299 0 : snprintf(maxp, sizeof maxp, "inf");
1300 : else
1301 0 : snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1302 :
1303 : #define PRWORD(ovflw, fmt, width, fixed, val) do { \
1304 : (ovflw) += db_printf((fmt), \
1305 : (width) - (fixed) - (ovflw) > 0 ? \
1306 : (width) - (fixed) - (ovflw) : 0, \
1307 : (val)) - (width); \
1308 : if ((ovflw) < 0) \
1309 : (ovflw) = 0; \
1310 : } while (/* CONSTCOND */0)
1311 :
1312 : ovflw = 0;
1313 0 : PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1314 0 : PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1315 0 : PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1316 0 : PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1317 0 : PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1318 0 : PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1319 0 : PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1320 0 : PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1321 0 : PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1322 0 : PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1323 0 : PRWORD(ovflw, " %*s", 6, 1, maxp);
1324 0 : PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1325 :
1326 0 : pool_chk(pp);
1327 0 : }
1328 0 : }
1329 : #endif /* DDB */
1330 :
1331 : #if defined(POOL_DEBUG) || defined(DDB)
1332 : int
1333 0 : pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected)
1334 : {
1335 : struct pool_item *pi;
1336 : caddr_t page;
1337 : int n;
1338 0 : const char *label = pp->pr_wchan;
1339 :
1340 0 : page = (caddr_t)((u_long)ph & pp->pr_pgmask);
1341 0 : if (page != ph->ph_page && POOL_INPGHDR(pp)) {
1342 0 : printf("%s: ", label);
1343 0 : printf("pool(%p:%s): page inconsistency: page %p; "
1344 : "at page head addr %p (p %p)\n",
1345 0 : pp, pp->pr_wchan, ph->ph_page, ph, page);
1346 0 : return 1;
1347 : }
1348 :
1349 0 : for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0;
1350 0 : pi != NULL;
1351 0 : pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) {
1352 0 : if ((caddr_t)pi < ph->ph_page ||
1353 0 : (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) {
1354 0 : printf("%s: ", label);
1355 0 : printf("pool(%p:%s): page inconsistency: page %p;"
1356 : " item ordinal %d; addr %p\n", pp,
1357 0 : pp->pr_wchan, ph->ph_page, n, pi);
1358 0 : return (1);
1359 : }
1360 :
1361 0 : if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1362 0 : printf("%s: ", label);
1363 0 : printf("pool(%p:%s): free list modified: "
1364 : "page %p; item ordinal %d; addr %p "
1365 : "(p %p); offset 0x%x=0x%lx\n",
1366 0 : pp, pp->pr_wchan, ph->ph_page, n, pi, page,
1367 0 : 0, pi->pi_magic);
1368 0 : }
1369 :
1370 : #ifdef DIAGNOSTIC
1371 0 : if (POOL_PHPOISON(ph)) {
1372 0 : size_t pidx;
1373 0 : uint32_t pval;
1374 0 : if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
1375 : &pidx, &pval)) {
1376 0 : int *ip = (int *)(pi + 1);
1377 0 : printf("pool(%s): free list modified: "
1378 : "page %p; item ordinal %d; addr %p "
1379 : "(p %p); offset 0x%zx=0x%x\n",
1380 0 : pp->pr_wchan, ph->ph_page, n, pi,
1381 0 : page, pidx * sizeof(int), ip[pidx]);
1382 0 : }
1383 0 : }
1384 : #endif /* DIAGNOSTIC */
1385 : }
1386 0 : if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
1387 0 : printf("pool(%p:%s): page inconsistency: page %p;"
1388 : " %d on list, %d missing, %d items per page\n", pp,
1389 0 : pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1390 : pp->pr_itemsperpage);
1391 0 : return 1;
1392 : }
1393 0 : if (expected >= 0 && n != expected) {
1394 0 : printf("pool(%p:%s): page inconsistency: page %p;"
1395 : " %d on list, %d missing, %d expected\n", pp,
1396 0 : pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1397 : expected);
1398 0 : return 1;
1399 : }
1400 0 : return 0;
1401 0 : }
1402 :
1403 : int
1404 0 : pool_chk(struct pool *pp)
1405 : {
1406 : struct pool_page_header *ph;
1407 : int r = 0;
1408 :
1409 0 : TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry)
1410 0 : r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
1411 0 : TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry)
1412 0 : r += pool_chk_page(pp, ph, 0);
1413 0 : TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry)
1414 0 : r += pool_chk_page(pp, ph, -1);
1415 :
1416 0 : return (r);
1417 : }
1418 : #endif /* defined(POOL_DEBUG) || defined(DDB) */
1419 :
1420 : #ifdef DDB
1421 : void
1422 0 : pool_walk(struct pool *pp, int full,
1423 : int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))),
1424 : void (*func)(void *, int, int (*)(const char *, ...)
1425 : __attribute__((__format__(__kprintf__,1,2)))))
1426 : {
1427 : struct pool_page_header *ph;
1428 : struct pool_item *pi;
1429 : caddr_t cp;
1430 : int n;
1431 :
1432 0 : TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) {
1433 0 : cp = ph->ph_colored;
1434 0 : n = ph->ph_nmissing;
1435 :
1436 0 : while (n--) {
1437 0 : func(cp, full, pr);
1438 0 : cp += pp->pr_size;
1439 : }
1440 : }
1441 :
1442 0 : TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) {
1443 0 : cp = ph->ph_colored;
1444 0 : n = ph->ph_nmissing;
1445 :
1446 0 : do {
1447 0 : XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1448 0 : if (cp == (caddr_t)pi)
1449 : break;
1450 : }
1451 0 : if (cp != (caddr_t)pi) {
1452 0 : func(cp, full, pr);
1453 0 : n--;
1454 0 : }
1455 :
1456 0 : cp += pp->pr_size;
1457 0 : } while (n > 0);
1458 : }
1459 0 : }
1460 : #endif
1461 :
1462 : /*
1463 : * We have three different sysctls.
1464 : * kern.pool.npools - the number of pools.
1465 : * kern.pool.pool.<pool#> - the pool struct for the pool#.
1466 : * kern.pool.name.<pool#> - the name for pool#.
1467 : */
1468 : int
1469 0 : sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
1470 : {
1471 0 : struct kinfo_pool pi;
1472 : struct pool *pp;
1473 : int rv = ENOENT;
1474 :
1475 0 : switch (name[0]) {
1476 : case KERN_POOL_NPOOLS:
1477 0 : if (namelen != 1)
1478 0 : return (ENOTDIR);
1479 0 : return (sysctl_rdint(oldp, oldlenp, NULL, pool_count));
1480 :
1481 : case KERN_POOL_NAME:
1482 : case KERN_POOL_POOL:
1483 : case KERN_POOL_CACHE:
1484 : case KERN_POOL_CACHE_CPUS:
1485 : break;
1486 : default:
1487 0 : return (EOPNOTSUPP);
1488 : }
1489 :
1490 0 : if (namelen != 2)
1491 0 : return (ENOTDIR);
1492 :
1493 0 : rw_enter_read(&pool_lock);
1494 :
1495 0 : SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1496 0 : if (name[1] == pp->pr_serial)
1497 : break;
1498 : }
1499 :
1500 0 : if (pp == NULL)
1501 : goto done;
1502 :
1503 0 : switch (name[0]) {
1504 : case KERN_POOL_NAME:
1505 0 : rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan);
1506 0 : break;
1507 : case KERN_POOL_POOL:
1508 0 : memset(&pi, 0, sizeof(pi));
1509 :
1510 0 : pl_enter(pp, &pp->pr_lock);
1511 0 : pi.pr_size = pp->pr_size;
1512 0 : pi.pr_pgsize = pp->pr_pgsize;
1513 0 : pi.pr_itemsperpage = pp->pr_itemsperpage;
1514 0 : pi.pr_npages = pp->pr_npages;
1515 0 : pi.pr_minpages = pp->pr_minpages;
1516 0 : pi.pr_maxpages = pp->pr_maxpages;
1517 0 : pi.pr_hardlimit = pp->pr_hardlimit;
1518 0 : pi.pr_nout = pp->pr_nout;
1519 0 : pi.pr_nitems = pp->pr_nitems;
1520 0 : pi.pr_nget = pp->pr_nget;
1521 0 : pi.pr_nput = pp->pr_nput;
1522 0 : pi.pr_nfail = pp->pr_nfail;
1523 0 : pi.pr_npagealloc = pp->pr_npagealloc;
1524 0 : pi.pr_npagefree = pp->pr_npagefree;
1525 0 : pi.pr_hiwat = pp->pr_hiwat;
1526 0 : pi.pr_nidle = pp->pr_nidle;
1527 0 : pl_leave(pp, &pp->pr_lock);
1528 :
1529 0 : pool_cache_pool_info(pp, &pi);
1530 :
1531 0 : rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
1532 0 : break;
1533 :
1534 : case KERN_POOL_CACHE:
1535 0 : rv = pool_cache_info(pp, oldp, oldlenp);
1536 0 : break;
1537 :
1538 : case KERN_POOL_CACHE_CPUS:
1539 0 : rv = pool_cache_cpus_info(pp, oldp, oldlenp);
1540 0 : break;
1541 : }
1542 :
1543 : done:
1544 0 : rw_exit_read(&pool_lock);
1545 :
1546 0 : return (rv);
1547 0 : }
1548 :
1549 : void
1550 0 : pool_gc_sched(void *null)
1551 : {
1552 0 : task_add(systqmp, &pool_gc_task);
1553 0 : }
1554 :
1555 : void
1556 0 : pool_gc_pages(void *null)
1557 : {
1558 : struct pool *pp;
1559 : struct pool_page_header *ph, *freeph;
1560 : int s;
1561 :
1562 0 : rw_enter_read(&pool_lock);
1563 0 : s = splvm(); /* XXX go to splvm until all pools _setipl properly */
1564 0 : SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1565 : #ifdef MULTIPROCESSOR
1566 0 : if (pp->pr_cache != NULL)
1567 0 : pool_cache_gc(pp);
1568 : #endif
1569 :
1570 0 : if (pp->pr_nidle <= pp->pr_minpages || /* guess */
1571 0 : !pl_enter_try(pp, &pp->pr_lock)) /* try */
1572 : continue;
1573 :
1574 : /* is it time to free a page? */
1575 0 : if (pp->pr_nidle > pp->pr_minpages &&
1576 0 : (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
1577 0 : (ticks - ph->ph_tick) > (hz * pool_wait_gc)) {
1578 : freeph = ph;
1579 0 : pool_p_remove(pp, freeph);
1580 0 : } else
1581 : freeph = NULL;
1582 :
1583 0 : pl_leave(pp, &pp->pr_lock);
1584 :
1585 0 : if (freeph != NULL)
1586 0 : pool_p_free(pp, freeph);
1587 : }
1588 0 : splx(s);
1589 0 : rw_exit_read(&pool_lock);
1590 :
1591 0 : timeout_add_sec(&pool_gc_tick, 1);
1592 0 : }
1593 :
1594 : /*
1595 : * Pool backend allocators.
1596 : */
1597 :
1598 : void *
1599 0 : pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
1600 : {
1601 : void *v;
1602 :
1603 0 : v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown);
1604 :
1605 : #ifdef DIAGNOSTIC
1606 0 : if (v != NULL && POOL_INPGHDR(pp)) {
1607 0 : vaddr_t addr = (vaddr_t)v;
1608 0 : if ((addr & pp->pr_pgmask) != addr) {
1609 0 : panic("%s: %s page address %p isnt aligned to %u",
1610 0 : __func__, pp->pr_wchan, v, pp->pr_pgsize);
1611 : }
1612 0 : }
1613 : #endif
1614 :
1615 0 : return (v);
1616 : }
1617 :
1618 : void
1619 0 : pool_allocator_free(struct pool *pp, void *v)
1620 : {
1621 0 : struct pool_allocator *pa = pp->pr_alloc;
1622 :
1623 0 : (*pa->pa_free)(pp, v);
1624 0 : }
1625 :
1626 : void *
1627 0 : pool_page_alloc(struct pool *pp, int flags, int *slowdown)
1628 : {
1629 0 : struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1630 :
1631 0 : kd.kd_waitok = ISSET(flags, PR_WAITOK);
1632 0 : kd.kd_slowdown = slowdown;
1633 :
1634 0 : return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
1635 0 : }
1636 :
1637 : void
1638 0 : pool_page_free(struct pool *pp, void *v)
1639 : {
1640 0 : km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
1641 0 : }
1642 :
1643 : void *
1644 0 : pool_multi_alloc(struct pool *pp, int flags, int *slowdown)
1645 : {
1646 0 : struct kmem_va_mode kv = kv_intrsafe;
1647 0 : struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1648 : void *v;
1649 : int s;
1650 :
1651 0 : if (POOL_INPGHDR(pp))
1652 0 : kv.kv_align = pp->pr_pgsize;
1653 :
1654 0 : kd.kd_waitok = ISSET(flags, PR_WAITOK);
1655 0 : kd.kd_slowdown = slowdown;
1656 :
1657 0 : s = splvm();
1658 0 : v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1659 0 : splx(s);
1660 :
1661 0 : return (v);
1662 0 : }
1663 :
1664 : void
1665 0 : pool_multi_free(struct pool *pp, void *v)
1666 : {
1667 0 : struct kmem_va_mode kv = kv_intrsafe;
1668 : int s;
1669 :
1670 0 : if (POOL_INPGHDR(pp))
1671 0 : kv.kv_align = pp->pr_pgsize;
1672 :
1673 0 : s = splvm();
1674 0 : km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1675 0 : splx(s);
1676 0 : }
1677 :
1678 : void *
1679 0 : pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown)
1680 : {
1681 0 : struct kmem_va_mode kv = kv_any;
1682 0 : struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1683 : void *v;
1684 :
1685 0 : if (POOL_INPGHDR(pp))
1686 0 : kv.kv_align = pp->pr_pgsize;
1687 :
1688 0 : kd.kd_waitok = ISSET(flags, PR_WAITOK);
1689 0 : kd.kd_slowdown = slowdown;
1690 :
1691 0 : KERNEL_LOCK();
1692 0 : v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1693 0 : KERNEL_UNLOCK();
1694 :
1695 0 : return (v);
1696 0 : }
1697 :
1698 : void
1699 0 : pool_multi_free_ni(struct pool *pp, void *v)
1700 : {
1701 0 : struct kmem_va_mode kv = kv_any;
1702 :
1703 0 : if (POOL_INPGHDR(pp))
1704 0 : kv.kv_align = pp->pr_pgsize;
1705 :
1706 0 : KERNEL_LOCK();
1707 0 : km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1708 0 : KERNEL_UNLOCK();
1709 0 : }
1710 :
1711 : #ifdef MULTIPROCESSOR
1712 :
1713 : struct pool pool_caches; /* per cpu cache entries */
1714 :
1715 : void
1716 0 : pool_cache_init(struct pool *pp)
1717 : {
1718 : struct cpumem *cm;
1719 : struct pool_cache *pc;
1720 0 : struct cpumem_iter i;
1721 :
1722 0 : if (pool_caches.pr_size == 0) {
1723 0 : pool_init(&pool_caches, sizeof(struct pool_cache),
1724 : CACHELINESIZE, IPL_NONE, PR_WAITOK | PR_RWLOCK,
1725 : "plcache", NULL);
1726 0 : }
1727 :
1728 : /* must be able to use the pool items as cache list items */
1729 0 : KASSERT(pp->pr_size >= sizeof(struct pool_cache_item));
1730 :
1731 0 : cm = cpumem_get(&pool_caches);
1732 :
1733 0 : pl_init(pp, &pp->pr_cache_lock);
1734 0 : arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic));
1735 0 : TAILQ_INIT(&pp->pr_cache_lists);
1736 0 : pp->pr_cache_nitems = 0;
1737 0 : pp->pr_cache_tick = ticks;
1738 0 : pp->pr_cache_items = 8;
1739 0 : pp->pr_cache_contention = 0;
1740 0 : pp->pr_cache_ngc = 0;
1741 :
1742 0 : CPUMEM_FOREACH(pc, &i, cm) {
1743 0 : pc->pc_actv = NULL;
1744 0 : pc->pc_nactv = 0;
1745 0 : pc->pc_prev = NULL;
1746 :
1747 0 : pc->pc_nget = 0;
1748 0 : pc->pc_nfail = 0;
1749 0 : pc->pc_nput = 0;
1750 0 : pc->pc_nlget = 0;
1751 0 : pc->pc_nlfail = 0;
1752 0 : pc->pc_nlput = 0;
1753 0 : pc->pc_nout = 0;
1754 : }
1755 :
1756 0 : membar_producer();
1757 :
1758 0 : pp->pr_cache = cm;
1759 0 : }
1760 :
1761 : static inline void
1762 0 : pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci)
1763 : {
1764 0 : unsigned long *entry = (unsigned long *)&ci->ci_nextl;
1765 :
1766 0 : entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci;
1767 0 : entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1768 0 : }
1769 :
1770 : static inline void
1771 0 : pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci)
1772 : {
1773 : unsigned long *entry;
1774 : unsigned long val;
1775 :
1776 0 : entry = (unsigned long *)&ci->ci_nextl;
1777 0 : val = pp->pr_cache_magic[0] ^ (u_long)ci;
1778 0 : if (*entry != val)
1779 : goto fail;
1780 :
1781 0 : entry++;
1782 0 : val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1783 0 : if (*entry != val)
1784 : goto fail;
1785 :
1786 : return;
1787 :
1788 : fail:
1789 0 : panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx",
1790 0 : __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci,
1791 0 : *entry, val);
1792 0 : }
1793 :
1794 : static inline void
1795 0 : pool_list_enter(struct pool *pp)
1796 : {
1797 0 : if (pl_enter_try(pp, &pp->pr_cache_lock) == 0) {
1798 0 : pl_enter(pp, &pp->pr_cache_lock);
1799 0 : pp->pr_cache_contention++;
1800 0 : }
1801 0 : }
1802 :
1803 : static inline void
1804 0 : pool_list_leave(struct pool *pp)
1805 : {
1806 0 : pl_leave(pp, &pp->pr_cache_lock);
1807 0 : }
1808 :
1809 : static inline struct pool_cache_item *
1810 0 : pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc)
1811 : {
1812 : struct pool_cache_item *pl;
1813 :
1814 0 : pool_list_enter(pp);
1815 0 : pl = TAILQ_FIRST(&pp->pr_cache_lists);
1816 0 : if (pl != NULL) {
1817 0 : TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
1818 0 : pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
1819 :
1820 0 : pool_cache_item_magic(pp, pl);
1821 :
1822 0 : pc->pc_nlget++;
1823 0 : } else
1824 0 : pc->pc_nlfail++;
1825 :
1826 : /* fold this cpus nout into the global while we have the lock */
1827 0 : pp->pr_cache_nout += pc->pc_nout;
1828 0 : pc->pc_nout = 0;
1829 0 : pool_list_leave(pp);
1830 :
1831 0 : return (pl);
1832 : }
1833 :
1834 : static inline void
1835 0 : pool_cache_list_free(struct pool *pp, struct pool_cache *pc,
1836 : struct pool_cache_item *ci)
1837 : {
1838 0 : pool_list_enter(pp);
1839 0 : if (TAILQ_EMPTY(&pp->pr_cache_lists))
1840 0 : pp->pr_cache_tick = ticks;
1841 :
1842 0 : pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci);
1843 0 : TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl);
1844 :
1845 0 : pc->pc_nlput++;
1846 :
1847 : /* fold this cpus nout into the global while we have the lock */
1848 0 : pp->pr_cache_nout += pc->pc_nout;
1849 0 : pc->pc_nout = 0;
1850 0 : pool_list_leave(pp);
1851 0 : }
1852 :
1853 : static inline struct pool_cache *
1854 0 : pool_cache_enter(struct pool *pp, int *s)
1855 : {
1856 : struct pool_cache *pc;
1857 :
1858 0 : pc = cpumem_enter(pp->pr_cache);
1859 0 : *s = splraise(pp->pr_ipl);
1860 0 : pc->pc_gen++;
1861 :
1862 0 : return (pc);
1863 : }
1864 :
1865 : static inline void
1866 0 : pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s)
1867 : {
1868 0 : pc->pc_gen++;
1869 0 : splx(s);
1870 0 : cpumem_leave(pp->pr_cache, pc);
1871 0 : }
1872 :
1873 : void *
1874 0 : pool_cache_get(struct pool *pp)
1875 : {
1876 : struct pool_cache *pc;
1877 : struct pool_cache_item *ci;
1878 0 : int s;
1879 :
1880 0 : pc = pool_cache_enter(pp, &s);
1881 :
1882 0 : if (pc->pc_actv != NULL) {
1883 : ci = pc->pc_actv;
1884 0 : } else if (pc->pc_prev != NULL) {
1885 : ci = pc->pc_prev;
1886 0 : pc->pc_prev = NULL;
1887 0 : } else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) {
1888 0 : pc->pc_nfail++;
1889 0 : goto done;
1890 : }
1891 :
1892 0 : pool_cache_item_magic_check(pp, ci);
1893 : #ifdef DIAGNOSTIC
1894 0 : if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) {
1895 0 : size_t pidx;
1896 0 : uint32_t pval;
1897 :
1898 0 : if (poison_check(ci + 1, pp->pr_size - sizeof(*ci),
1899 : &pidx, &pval)) {
1900 0 : int *ip = (int *)(ci + 1);
1901 0 : ip += pidx;
1902 :
1903 0 : panic("%s: %s cpu free list modified: "
1904 : "item addr %p+%zu 0x%x!=0x%x",
1905 0 : __func__, pp->pr_wchan, ci,
1906 0 : (caddr_t)ip - (caddr_t)ci, *ip, pval);
1907 : }
1908 0 : }
1909 : #endif
1910 :
1911 0 : pc->pc_actv = ci->ci_next;
1912 0 : pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1;
1913 0 : pc->pc_nget++;
1914 0 : pc->pc_nout++;
1915 :
1916 : done:
1917 0 : pool_cache_leave(pp, pc, s);
1918 :
1919 0 : return (ci);
1920 0 : }
1921 :
1922 : void
1923 0 : pool_cache_put(struct pool *pp, void *v)
1924 : {
1925 : struct pool_cache *pc;
1926 0 : struct pool_cache_item *ci = v;
1927 : unsigned long nitems;
1928 0 : int s;
1929 : #ifdef DIAGNOSTIC
1930 0 : int poison = pool_debug && pp->pr_size > sizeof(*ci);
1931 :
1932 0 : if (poison)
1933 0 : poison_mem(ci + 1, pp->pr_size - sizeof(*ci));
1934 : #endif
1935 :
1936 0 : pc = pool_cache_enter(pp, &s);
1937 :
1938 0 : nitems = pc->pc_nactv;
1939 0 : if (nitems >= pp->pr_cache_items) {
1940 0 : if (pc->pc_prev != NULL)
1941 0 : pool_cache_list_free(pp, pc, pc->pc_prev);
1942 :
1943 0 : pc->pc_prev = pc->pc_actv;
1944 :
1945 0 : pc->pc_actv = NULL;
1946 0 : pc->pc_nactv = 0;
1947 : nitems = 0;
1948 0 : }
1949 :
1950 0 : ci->ci_next = pc->pc_actv;
1951 0 : ci->ci_nitems = ++nitems;
1952 : #ifdef DIAGNOSTIC
1953 0 : ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0;
1954 : #endif
1955 0 : pool_cache_item_magic(pp, ci);
1956 :
1957 0 : pc->pc_actv = ci;
1958 0 : pc->pc_nactv = nitems;
1959 :
1960 0 : pc->pc_nput++;
1961 0 : pc->pc_nout--;
1962 :
1963 0 : pool_cache_leave(pp, pc, s);
1964 0 : }
1965 :
1966 : struct pool_cache_item *
1967 0 : pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl)
1968 : {
1969 : struct pool_cache_item *rpl, *next;
1970 :
1971 0 : if (pl == NULL)
1972 0 : return (NULL);
1973 :
1974 0 : rpl = TAILQ_NEXT(pl, ci_nextl);
1975 :
1976 0 : pl_enter(pp, &pp->pr_lock);
1977 0 : do {
1978 0 : next = pl->ci_next;
1979 0 : pool_do_put(pp, pl);
1980 : pl = next;
1981 0 : } while (pl != NULL);
1982 0 : pl_leave(pp, &pp->pr_lock);
1983 :
1984 0 : return (rpl);
1985 0 : }
1986 :
1987 : void
1988 0 : pool_cache_destroy(struct pool *pp)
1989 : {
1990 : struct pool_cache *pc;
1991 : struct pool_cache_item *pl;
1992 0 : struct cpumem_iter i;
1993 : struct cpumem *cm;
1994 :
1995 0 : rw_enter_write(&pool_lock); /* serialise with the gc */
1996 0 : cm = pp->pr_cache;
1997 0 : pp->pr_cache = NULL; /* make pool_put avoid the cache */
1998 0 : rw_exit_write(&pool_lock);
1999 :
2000 0 : CPUMEM_FOREACH(pc, &i, cm) {
2001 0 : pool_cache_list_put(pp, pc->pc_actv);
2002 0 : pool_cache_list_put(pp, pc->pc_prev);
2003 : }
2004 :
2005 0 : cpumem_put(&pool_caches, cm);
2006 :
2007 0 : pl = TAILQ_FIRST(&pp->pr_cache_lists);
2008 0 : while (pl != NULL)
2009 0 : pl = pool_cache_list_put(pp, pl);
2010 0 : }
2011 :
2012 : void
2013 0 : pool_cache_gc(struct pool *pp)
2014 : {
2015 : unsigned int contention, delta;
2016 :
2017 0 : if ((ticks - pp->pr_cache_tick) > (hz * pool_wait_gc) &&
2018 0 : !TAILQ_EMPTY(&pp->pr_cache_lists) &&
2019 0 : pl_enter_try(pp, &pp->pr_cache_lock)) {
2020 : struct pool_cache_item *pl = NULL;
2021 :
2022 0 : pl = TAILQ_FIRST(&pp->pr_cache_lists);
2023 0 : if (pl != NULL) {
2024 0 : TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
2025 0 : pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
2026 0 : pp->pr_cache_tick = ticks;
2027 :
2028 0 : pp->pr_cache_ngc++;
2029 0 : }
2030 :
2031 0 : pl_leave(pp, &pp->pr_cache_lock);
2032 :
2033 0 : pool_cache_list_put(pp, pl);
2034 0 : }
2035 :
2036 : /*
2037 : * if there's a lot of contention on the pr_cache_mtx then consider
2038 : * growing the length of the list to reduce the need to access the
2039 : * global pool.
2040 : */
2041 :
2042 0 : contention = pp->pr_cache_contention;
2043 0 : delta = contention - pp->pr_cache_contention_prev;
2044 0 : if (delta > 8 /* magic */) {
2045 0 : if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems)
2046 0 : pp->pr_cache_items += 8;
2047 0 : } else if (delta == 0) {
2048 0 : if (pp->pr_cache_items > 8)
2049 0 : pp->pr_cache_items--;
2050 : }
2051 0 : pp->pr_cache_contention_prev = contention;
2052 0 : }
2053 :
2054 : void
2055 0 : pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
2056 : {
2057 : struct pool_cache *pc;
2058 0 : struct cpumem_iter i;
2059 :
2060 0 : if (pp->pr_cache == NULL)
2061 0 : return;
2062 :
2063 : /* loop through the caches twice to collect stats */
2064 :
2065 : /* once without the lock so we can yield while reading nget/nput */
2066 0 : CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
2067 : uint64_t gen, nget, nput;
2068 :
2069 0 : do {
2070 0 : while ((gen = pc->pc_gen) & 1)
2071 0 : yield();
2072 :
2073 0 : nget = pc->pc_nget;
2074 0 : nput = pc->pc_nput;
2075 0 : } while (gen != pc->pc_gen);
2076 :
2077 0 : pi->pr_nget += nget;
2078 0 : pi->pr_nput += nput;
2079 : }
2080 :
2081 : /* and once with the mtx so we can get consistent nout values */
2082 0 : pl_enter(pp, &pp->pr_cache_lock);
2083 0 : CPUMEM_FOREACH(pc, &i, pp->pr_cache)
2084 0 : pi->pr_nout += pc->pc_nout;
2085 :
2086 0 : pi->pr_nout += pp->pr_cache_nout;
2087 0 : pl_leave(pp, &pp->pr_cache_lock);
2088 0 : }
2089 :
2090 : int
2091 0 : pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2092 : {
2093 0 : struct kinfo_pool_cache kpc;
2094 :
2095 0 : if (pp->pr_cache == NULL)
2096 0 : return (EOPNOTSUPP);
2097 :
2098 0 : memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */
2099 :
2100 0 : pl_enter(pp, &pp->pr_cache_lock);
2101 0 : kpc.pr_ngc = pp->pr_cache_ngc;
2102 0 : kpc.pr_len = pp->pr_cache_items;
2103 0 : kpc.pr_nitems = pp->pr_cache_nitems;
2104 0 : kpc.pr_contention = pp->pr_cache_contention;
2105 0 : pl_leave(pp, &pp->pr_cache_lock);
2106 :
2107 0 : return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc)));
2108 0 : }
2109 :
2110 : int
2111 0 : pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2112 : {
2113 : struct pool_cache *pc;
2114 : struct kinfo_pool_cache_cpu *kpcc, *info;
2115 : unsigned int cpu = 0;
2116 0 : struct cpumem_iter i;
2117 : int error = 0;
2118 : size_t len;
2119 :
2120 0 : if (pp->pr_cache == NULL)
2121 0 : return (EOPNOTSUPP);
2122 0 : if (*oldlenp % sizeof(*kpcc))
2123 0 : return (EINVAL);
2124 :
2125 0 : kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP,
2126 : M_WAITOK|M_CANFAIL|M_ZERO);
2127 0 : if (kpcc == NULL)
2128 0 : return (EIO);
2129 :
2130 0 : len = ncpusfound * sizeof(*kpcc);
2131 :
2132 0 : CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
2133 : uint64_t gen;
2134 :
2135 0 : if (cpu >= ncpusfound) {
2136 : error = EIO;
2137 0 : goto err;
2138 : }
2139 :
2140 0 : info = &kpcc[cpu];
2141 0 : info->pr_cpu = cpu;
2142 :
2143 0 : do {
2144 0 : while ((gen = pc->pc_gen) & 1)
2145 0 : yield();
2146 :
2147 0 : info->pr_nget = pc->pc_nget;
2148 0 : info->pr_nfail = pc->pc_nfail;
2149 0 : info->pr_nput = pc->pc_nput;
2150 0 : info->pr_nlget = pc->pc_nlget;
2151 0 : info->pr_nlfail = pc->pc_nlfail;
2152 0 : info->pr_nlput = pc->pc_nlput;
2153 0 : } while (gen != pc->pc_gen);
2154 :
2155 0 : cpu++;
2156 0 : }
2157 :
2158 0 : error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len);
2159 : err:
2160 0 : free(kpcc, M_TEMP, len);
2161 :
2162 0 : return (error);
2163 0 : }
2164 : #else /* MULTIPROCESSOR */
2165 : void
2166 : pool_cache_init(struct pool *pp)
2167 : {
2168 : /* nop */
2169 : }
2170 :
2171 : void
2172 : pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
2173 : {
2174 : /* nop */
2175 : }
2176 :
2177 : int
2178 : pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2179 : {
2180 : return (EOPNOTSUPP);
2181 : }
2182 :
2183 : int
2184 : pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2185 : {
2186 : return (EOPNOTSUPP);
2187 : }
2188 : #endif /* MULTIPROCESSOR */
2189 :
2190 :
2191 : void
2192 0 : pool_lock_mtx_init(struct pool *pp, union pool_lock *lock,
2193 : const struct lock_type *type)
2194 : {
2195 0 : _mtx_init_flags(&lock->prl_mtx, pp->pr_ipl, pp->pr_wchan, 0, type);
2196 0 : }
2197 :
2198 : void
2199 0 : pool_lock_mtx_enter(union pool_lock *lock LOCK_FL_VARS)
2200 : {
2201 230 : _mtx_enter(&lock->prl_mtx LOCK_FL_ARGS);
2202 0 : }
2203 :
2204 : int
2205 0 : pool_lock_mtx_enter_try(union pool_lock *lock LOCK_FL_VARS)
2206 : {
2207 0 : return (_mtx_enter_try(&lock->prl_mtx LOCK_FL_ARGS));
2208 : }
2209 :
2210 : void
2211 0 : pool_lock_mtx_leave(union pool_lock *lock LOCK_FL_VARS)
2212 : {
2213 214 : _mtx_leave(&lock->prl_mtx LOCK_FL_ARGS);
2214 0 : }
2215 :
2216 : void
2217 0 : pool_lock_mtx_assert_locked(union pool_lock *lock)
2218 : {
2219 120 : MUTEX_ASSERT_LOCKED(&lock->prl_mtx);
2220 120 : }
2221 :
2222 : void
2223 0 : pool_lock_mtx_assert_unlocked(union pool_lock *lock)
2224 : {
2225 0 : MUTEX_ASSERT_UNLOCKED(&lock->prl_mtx);
2226 0 : }
2227 :
2228 : int
2229 0 : pool_lock_mtx_sleep(void *ident, union pool_lock *lock, int priority,
2230 : const char *wmesg, int timo)
2231 : {
2232 0 : return msleep(ident, &lock->prl_mtx, priority, wmesg, timo);
2233 : }
2234 :
2235 : static const struct pool_lock_ops pool_lock_ops_mtx = {
2236 : pool_lock_mtx_init,
2237 : pool_lock_mtx_enter,
2238 : pool_lock_mtx_enter_try,
2239 : pool_lock_mtx_leave,
2240 : pool_lock_mtx_assert_locked,
2241 : pool_lock_mtx_assert_unlocked,
2242 : pool_lock_mtx_sleep,
2243 : };
2244 :
2245 : void
2246 0 : pool_lock_rw_init(struct pool *pp, union pool_lock *lock,
2247 : const struct lock_type *type)
2248 : {
2249 0 : _rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type);
2250 0 : }
2251 :
2252 : void
2253 0 : pool_lock_rw_enter(union pool_lock *lock LOCK_FL_VARS)
2254 : {
2255 0 : _rw_enter_write(&lock->prl_rwlock LOCK_FL_ARGS);
2256 0 : }
2257 :
2258 : int
2259 0 : pool_lock_rw_enter_try(union pool_lock *lock LOCK_FL_VARS)
2260 : {
2261 0 : return (_rw_enter(&lock->prl_rwlock, RW_WRITE | RW_NOSLEEP
2262 0 : LOCK_FL_ARGS) == 0);
2263 : }
2264 :
2265 : void
2266 0 : pool_lock_rw_leave(union pool_lock *lock LOCK_FL_VARS)
2267 : {
2268 0 : _rw_exit_write(&lock->prl_rwlock LOCK_FL_ARGS);
2269 0 : }
2270 :
2271 : void
2272 0 : pool_lock_rw_assert_locked(union pool_lock *lock)
2273 : {
2274 0 : rw_assert_wrlock(&lock->prl_rwlock);
2275 0 : }
2276 :
2277 : void
2278 0 : pool_lock_rw_assert_unlocked(union pool_lock *lock)
2279 : {
2280 0 : KASSERT(rw_status(&lock->prl_rwlock) != RW_WRITE);
2281 0 : }
2282 :
2283 : int
2284 0 : pool_lock_rw_sleep(void *ident, union pool_lock *lock, int priority,
2285 : const char *wmesg, int timo)
2286 : {
2287 0 : return rwsleep(ident, &lock->prl_rwlock, priority, wmesg, timo);
2288 : }
2289 :
2290 : static const struct pool_lock_ops pool_lock_ops_rw = {
2291 : pool_lock_rw_init,
2292 : pool_lock_rw_enter,
2293 : pool_lock_rw_enter_try,
2294 : pool_lock_rw_leave,
2295 : pool_lock_rw_assert_locked,
2296 : pool_lock_rw_assert_unlocked,
2297 : pool_lock_rw_sleep,
2298 : };
|