Line data Source code
1 : /* $OpenBSD: art.c,v 1.27 2017/02/28 09:50:13 mpi Exp $ */
2 :
3 : /*
4 : * Copyright (c) 2015 Martin Pieuchot
5 : * Copyright (c) 2001 Yoichi Hariguchi
6 : *
7 : * Permission to use, copy, modify, and distribute this software for any
8 : * purpose with or without fee is hereby granted, provided that the above
9 : * copyright notice and this permission notice appear in all copies.
10 : *
11 : * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 : * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 : * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 : * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 : * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 : * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 : * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 : */
19 :
20 : /*
21 : * Allotment Routing Table (ART).
22 : *
23 : * Yoichi Hariguchi paper can be found at:
24 : * http://www.hariguchi.org/art/art.pdf
25 : */
26 :
27 : #ifndef _KERNEL
28 : #include "kern_compat.h"
29 : #else
30 : #include <sys/param.h>
31 : #include <sys/systm.h>
32 : #include <sys/malloc.h>
33 : #include <sys/pool.h>
34 : #include <sys/task.h>
35 : #include <sys/socket.h>
36 : #endif
37 :
38 : #include <net/art.h>
39 :
40 : #define ISLEAF(e) (((unsigned long)(e) & 1) == 0)
41 : #define SUBTABLE(e) ((struct art_table *)((unsigned long)(e) & ~1))
42 : #define ASNODE(t) ((struct art_node *)((unsigned long)(t) | 1))
43 :
44 : /*
45 : * Allotment Table.
46 : */
47 : struct art_table {
48 : struct art_table *at_parent; /* Parent table */
49 : uint32_t at_index; /* Index in the parent table */
50 : uint32_t at_minfringe; /* Index that fringe begins */
51 : uint32_t at_level; /* Level of the table */
52 : uint8_t at_bits; /* Stride length of the table */
53 : uint8_t at_offset; /* Sum of parents' stride len */
54 :
55 : /*
56 : * Items stored in the heap are pointers to nodes, in the leaf
57 : * case, or tables otherwise. One exception is index 0 which
58 : * is a route counter.
59 : */
60 : union {
61 : struct srp node;
62 : unsigned long count;
63 : } *at_heap; /* Array of 2^(slen+1) items */
64 : };
65 : #define at_refcnt at_heap[0].count/* Refcounter (1 per different route) */
66 : #define at_default at_heap[1].node /* Default route (was in parent heap) */
67 :
68 : /* Heap size for an ART table of stride length ``slen''. */
69 : #define AT_HEAPSIZE(slen) ((1 << ((slen) + 1)) * sizeof(void *))
70 :
71 : int art_bindex(struct art_table *, uint8_t *, int);
72 : void art_allot(struct art_table *at, int, struct art_node *,
73 : struct art_node *);
74 : struct art_table *art_table_get(struct art_root *, struct art_table *,
75 : int);
76 : struct art_table *art_table_put(struct art_root *, struct art_table *);
77 : struct art_node *art_table_insert(struct art_root *, struct art_table *,
78 : int, struct art_node *);
79 : struct art_node *art_table_delete(struct art_root *, struct art_table *,
80 : int, struct art_node *);
81 : struct art_table *art_table_ref(struct art_root *, struct art_table *);
82 : int art_table_free(struct art_root *, struct art_table *);
83 : int art_table_walk(struct art_root *, struct art_table *,
84 : int (*f)(struct art_node *, void *), void *);
85 : int art_walk_apply(struct art_root *,
86 : struct art_node *, struct art_node *,
87 : int (*f)(struct art_node *, void *), void *);
88 : void art_table_gc(void *);
89 : void art_gc(void *);
90 :
91 : struct pool an_pool, at_pool, at_heap_4_pool, at_heap_8_pool;
92 :
93 : struct art_table *art_table_gc_list = NULL;
94 : struct mutex art_table_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
95 : struct task art_table_gc_task =
96 : TASK_INITIALIZER(art_table_gc, NULL);
97 :
98 : struct art_node *art_node_gc_list = NULL;
99 : struct mutex art_node_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
100 : struct task art_node_gc_task = TASK_INITIALIZER(art_gc, NULL);
101 :
102 : void
103 0 : art_init(void)
104 : {
105 0 : pool_init(&an_pool, sizeof(struct art_node), 0, IPL_SOFTNET, 0,
106 : "art_node", NULL);
107 0 : pool_init(&at_pool, sizeof(struct art_table), 0, IPL_SOFTNET, 0,
108 : "art_table", NULL);
109 0 : pool_init(&at_heap_4_pool, AT_HEAPSIZE(4), 0, IPL_SOFTNET, 0,
110 : "art_heap4", NULL);
111 0 : pool_init(&at_heap_8_pool, AT_HEAPSIZE(8), 0, IPL_SOFTNET, 0,
112 : "art_heap8", &pool_allocator_single);
113 0 : }
114 :
115 : /*
116 : * Per routing table initialization API function.
117 : */
118 : struct art_root *
119 0 : art_alloc(unsigned int rtableid, unsigned int alen, unsigned int off)
120 : {
121 : struct art_root *ar;
122 : int i;
123 :
124 0 : ar = malloc(sizeof(*ar), M_RTABLE, M_NOWAIT|M_ZERO);
125 0 : if (ar == NULL)
126 0 : return (NULL);
127 :
128 0 : switch (alen) {
129 : case 32:
130 0 : ar->ar_alen = 32;
131 0 : ar->ar_nlvl = 7;
132 0 : ar->ar_bits[0] = 8;
133 0 : for (i = 1; i < ar->ar_nlvl; i++)
134 0 : ar->ar_bits[i] = 4;
135 : break;
136 : case 128:
137 0 : ar->ar_alen = 128;
138 0 : ar->ar_nlvl = 32;
139 0 : for (i = 0; i < ar->ar_nlvl; i++)
140 0 : ar->ar_bits[i] = 4;
141 : break;
142 : default:
143 0 : printf("%s: incorrect address length %u\n", __func__, alen);
144 0 : free(ar, M_RTABLE, sizeof(*ar));
145 0 : return (NULL);
146 : }
147 :
148 0 : ar->ar_off = off;
149 0 : ar->ar_rtableid = rtableid;
150 0 : rw_init(&ar->ar_lock, "art");
151 :
152 0 : return (ar);
153 0 : }
154 :
155 : /*
156 : * Return 1 if ``old'' and ``new`` are identical, 0 otherwise.
157 : */
158 : static inline int
159 0 : art_check_duplicate(struct art_root *ar, struct art_node *old,
160 : struct art_node *new)
161 : {
162 0 : if (old == NULL)
163 0 : return (0);
164 :
165 0 : if (old->an_plen == new->an_plen)
166 0 : return (1);
167 :
168 0 : return (0);
169 0 : }
170 :
171 : /*
172 : * Return the base index of the part of ``addr'' and ``plen''
173 : * corresponding to the range covered by the table ``at''.
174 : *
175 : * In other words, this function take the multi-level (complete)
176 : * address ``addr'' and prefix length ``plen'' and return the
177 : * single level base index for the table ``at''.
178 : *
179 : * For example with an address size of 32bit divided into four
180 : * 8bit-long tables, there's a maximum of 4 base indexes if the
181 : * prefix length is > 24.
182 : */
183 : int
184 0 : art_bindex(struct art_table *at, uint8_t *addr, int plen)
185 : {
186 : uint8_t boff, bend;
187 : uint32_t k;
188 :
189 0 : if (plen < at->at_offset || plen > (at->at_offset + at->at_bits))
190 0 : return (-1);
191 :
192 : /*
193 : * We are only interested in the part of the prefix length
194 : * corresponding to the range of this table.
195 : */
196 0 : plen -= at->at_offset;
197 :
198 : /*
199 : * Jump to the first byte of the address containing bits
200 : * covered by this table.
201 : */
202 0 : addr += (at->at_offset / 8);
203 :
204 : /* ``at'' covers the bit range between ``boff'' & ``bend''. */
205 0 : boff = (at->at_offset % 8);
206 0 : bend = (at->at_bits + boff);
207 :
208 0 : KASSERT(bend <= 32);
209 :
210 0 : if (bend > 24) {
211 0 : k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8);
212 0 : k |= addr[1] << (bend - 16);
213 0 : k |= addr[2] << (bend - 24);
214 0 : k |= addr[3] >> (32 - bend);
215 0 : } else if (bend > 16) {
216 0 : k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8);
217 0 : k |= addr[1] << (bend - 16);
218 0 : k |= addr[2] >> (24 - bend);
219 0 : } else if (bend > 8) {
220 0 : k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8);
221 0 : k |= addr[1] >> (16 - bend);
222 0 : } else {
223 0 : k = (addr[0] >> (8 - bend)) & ((1 << at->at_bits) - 1);
224 : }
225 :
226 : /*
227 : * Single level base index formula:
228 : */
229 0 : return ((k >> (at->at_bits - plen)) + (1 << plen));
230 0 : }
231 :
232 : /*
233 : * Single level lookup function.
234 : *
235 : * Return the fringe index of the part of ``addr''
236 : * corresponding to the range covered by the table ``at''.
237 : */
238 : static inline int
239 0 : art_findex(struct art_table *at, uint8_t *addr)
240 : {
241 0 : return art_bindex(at, addr, (at->at_offset + at->at_bits));
242 : }
243 :
244 : /*
245 : * (Non-perfect) lookup API function.
246 : *
247 : * Return the best existing match for a destination.
248 : */
249 : struct art_node *
250 0 : art_match(struct art_root *ar, void *addr, struct srp_ref *nsr)
251 : {
252 0 : struct srp_ref dsr, ndsr;
253 : void *entry;
254 : struct art_table *at;
255 : struct art_node *dflt, *ndflt;
256 : int j;
257 :
258 0 : entry = srp_enter(nsr, &ar->ar_root);
259 0 : at = entry;
260 :
261 0 : if (at == NULL)
262 : goto done;
263 :
264 : /*
265 : * Remember the default route of each table we visit in case
266 : * we do not find a better matching route.
267 : */
268 0 : dflt = srp_enter(&dsr, &at->at_default);
269 :
270 : /*
271 : * Iterate until we find a leaf.
272 : */
273 0 : while (1) {
274 : /* Do a single level route lookup. */
275 0 : j = art_findex(at, addr);
276 0 : entry = srp_follow(nsr, &at->at_heap[j].node);
277 :
278 : /* If this is a leaf (NULL is a leaf) we're done. */
279 0 : if (ISLEAF(entry))
280 : break;
281 :
282 0 : at = SUBTABLE(entry);
283 :
284 0 : ndflt = srp_enter(&ndsr, &at->at_default);
285 0 : if (ndflt != NULL) {
286 0 : srp_leave(&dsr);
287 0 : dsr = ndsr;
288 : dflt = ndflt;
289 0 : } else
290 0 : srp_leave(&ndsr);
291 : }
292 :
293 0 : if (entry == NULL) {
294 0 : srp_leave(nsr);
295 0 : *nsr = dsr;
296 0 : KASSERT(ISLEAF(dflt));
297 0 : return (dflt);
298 : }
299 :
300 0 : srp_leave(&dsr);
301 : done:
302 0 : KASSERT(ISLEAF(entry));
303 0 : return (entry);
304 0 : }
305 :
306 : /*
307 : * Perfect lookup API function.
308 : *
309 : * Return a perfect match for a destination/prefix-length pair or NULL if
310 : * it does not exist.
311 : */
312 : struct art_node *
313 0 : art_lookup(struct art_root *ar, void *addr, int plen, struct srp_ref *nsr)
314 : {
315 : void *entry;
316 : struct art_table *at;
317 : int i, j;
318 :
319 0 : KASSERT(plen >= 0 && plen <= ar->ar_alen);
320 :
321 0 : entry = srp_enter(nsr, &ar->ar_root);
322 0 : at = entry;
323 :
324 0 : if (at == NULL)
325 : goto done;
326 :
327 : /* Default route */
328 0 : if (plen == 0) {
329 0 : entry = srp_follow(nsr, &at->at_default);
330 0 : goto done;
331 : }
332 :
333 : /*
334 : * If the prefix length is smaller than the sum of
335 : * the stride length at this level the entry must
336 : * be in the current table.
337 : */
338 0 : while (plen > (at->at_offset + at->at_bits)) {
339 : /* Do a single level route lookup. */
340 0 : j = art_findex(at, addr);
341 0 : entry = srp_follow(nsr, &at->at_heap[j].node);
342 :
343 : /* A leaf is a match, but not a perfect one, or NULL */
344 0 : if (ISLEAF(entry))
345 0 : return (NULL);
346 :
347 0 : at = SUBTABLE(entry);
348 : }
349 :
350 0 : i = art_bindex(at, addr, plen);
351 0 : if (i == -1)
352 0 : return (NULL);
353 :
354 0 : entry = srp_follow(nsr, &at->at_heap[i].node);
355 0 : if (!ISLEAF(entry))
356 0 : entry = srp_follow(nsr, &SUBTABLE(entry)->at_default);
357 :
358 : done:
359 0 : KASSERT(ISLEAF(entry));
360 0 : return (entry);
361 0 : }
362 :
363 :
364 : /*
365 : * Insertion API function.
366 : *
367 : * Insert the given node or return an existing one if a node with the
368 : * same destination/mask pair is already present.
369 : */
370 : struct art_node *
371 0 : art_insert(struct art_root *ar, struct art_node *an, void *addr, int plen)
372 : {
373 : struct art_table *at, *child;
374 : struct art_node *node;
375 : int i, j;
376 :
377 0 : rw_assert_wrlock(&ar->ar_lock);
378 0 : KASSERT(plen >= 0 && plen <= ar->ar_alen);
379 :
380 0 : at = srp_get_locked(&ar->ar_root);
381 0 : if (at == NULL) {
382 0 : at = art_table_get(ar, NULL, -1);
383 0 : if (at == NULL)
384 0 : return (NULL);
385 :
386 0 : srp_swap_locked(&ar->ar_root, at);
387 0 : }
388 :
389 : /* Default route */
390 0 : if (plen == 0) {
391 0 : node = srp_get_locked(&at->at_default);
392 0 : if (node != NULL)
393 0 : return (node);
394 :
395 0 : art_table_ref(ar, at);
396 0 : srp_swap_locked(&at->at_default, an);
397 0 : return (an);
398 : }
399 :
400 : /*
401 : * If the prefix length is smaller than the sum of
402 : * the stride length at this level the entry must
403 : * be in the current table.
404 : */
405 0 : while (plen > (at->at_offset + at->at_bits)) {
406 : /* Do a single level route lookup. */
407 0 : j = art_findex(at, addr);
408 0 : node = srp_get_locked(&at->at_heap[j].node);
409 :
410 : /*
411 : * If the node corresponding to the fringe index is
412 : * a leaf we need to allocate a subtable. The route
413 : * entry of this node will then become the default
414 : * route of the subtable.
415 : */
416 0 : if (ISLEAF(node)) {
417 0 : child = art_table_get(ar, at, j);
418 0 : if (child == NULL)
419 0 : return (NULL);
420 :
421 0 : art_table_ref(ar, at);
422 0 : srp_swap_locked(&at->at_heap[j].node, ASNODE(child));
423 : at = child;
424 0 : } else
425 0 : at = SUBTABLE(node);
426 : }
427 :
428 0 : i = art_bindex(at, addr, plen);
429 0 : if (i == -1)
430 0 : return (NULL);
431 :
432 0 : return (art_table_insert(ar, at, i, an));
433 0 : }
434 :
435 : /*
436 : * Single level insertion.
437 : */
438 : struct art_node *
439 0 : art_table_insert(struct art_root *ar, struct art_table *at, int i,
440 : struct art_node *an)
441 : {
442 : struct art_node *prev, *node;
443 :
444 0 : node = srp_get_locked(&at->at_heap[i].node);
445 0 : if (!ISLEAF(node))
446 0 : prev = srp_get_locked(&SUBTABLE(node)->at_default);
447 : else
448 : prev = node;
449 :
450 0 : if (art_check_duplicate(ar, prev, an))
451 0 : return (prev);
452 :
453 0 : art_table_ref(ar, at);
454 :
455 : /*
456 : * If the index `i' of the route that we are inserting is not
457 : * a fringe index, we need to allot this new route pointer to
458 : * all the corresponding fringe indices.
459 : */
460 0 : if (i < at->at_minfringe)
461 0 : art_allot(at, i, prev, an);
462 0 : else if (!ISLEAF(node))
463 0 : srp_swap_locked(&SUBTABLE(node)->at_default, an);
464 : else
465 0 : srp_swap_locked(&at->at_heap[i].node, an);
466 :
467 0 : return (an);
468 0 : }
469 :
470 :
471 : /*
472 : * Deletion API function.
473 : */
474 : struct art_node *
475 0 : art_delete(struct art_root *ar, struct art_node *an, void *addr, int plen)
476 : {
477 : struct art_table *at;
478 : struct art_node *node;
479 : int i, j;
480 :
481 0 : rw_assert_wrlock(&ar->ar_lock);
482 0 : KASSERT(plen >= 0 && plen <= ar->ar_alen);
483 :
484 0 : at = srp_get_locked(&ar->ar_root);
485 0 : if (at == NULL)
486 0 : return (NULL);
487 :
488 : /* Default route */
489 0 : if (plen == 0) {
490 0 : node = srp_get_locked(&at->at_default);
491 0 : srp_swap_locked(&at->at_default, NULL);
492 0 : art_table_free(ar, at);
493 0 : return (node);
494 : }
495 :
496 : /*
497 : * If the prefix length is smaller than the sum of
498 : * the stride length at this level the entry must
499 : * be in the current table.
500 : */
501 0 : while (plen > (at->at_offset + at->at_bits)) {
502 : /* Do a single level route lookup. */
503 0 : j = art_findex(at, addr);
504 0 : node = srp_get_locked(&at->at_heap[j].node);
505 :
506 : /* If this is a leaf, there is no route to delete. */
507 0 : if (ISLEAF(node))
508 0 : return (NULL);
509 :
510 0 : at = SUBTABLE(node);
511 : }
512 :
513 0 : i = art_bindex(at, addr, plen);
514 0 : if (i == -1)
515 0 : return (NULL);
516 :
517 0 : return (art_table_delete(ar, at, i, an));
518 0 : }
519 :
520 : /*
521 : * Single level deletion.
522 : */
523 : struct art_node *
524 0 : art_table_delete(struct art_root *ar, struct art_table *at, int i,
525 : struct art_node *an)
526 : {
527 : struct art_node *next, *node;
528 :
529 : #ifdef DIAGNOSTIC
530 : struct art_node *prev;
531 : #endif
532 :
533 0 : node = srp_get_locked(&at->at_heap[i].node);
534 : #ifdef DIAGNOSTIC
535 0 : if (!ISLEAF(node))
536 0 : prev = srp_get_locked(&SUBTABLE(node)->at_default);
537 : else
538 : prev = node;
539 :
540 0 : KASSERT(prev == an);
541 : #endif
542 :
543 : /* Get the next most specific route for the index `i'. */
544 0 : if ((i >> 1) > 1)
545 0 : next = srp_get_locked(&at->at_heap[i >> 1].node);
546 : else
547 : next = NULL;
548 :
549 : /*
550 : * If the index `i' of the route that we are removing is not
551 : * a fringe index, we need to allot the next most specific
552 : * route pointer to all the corresponding fringe indices.
553 : */
554 0 : if (i < at->at_minfringe)
555 0 : art_allot(at, i, an, next);
556 0 : else if (!ISLEAF(node))
557 0 : srp_swap_locked(&SUBTABLE(node)->at_default, next);
558 : else
559 0 : srp_swap_locked(&at->at_heap[i].node, next);
560 :
561 : /* We have removed an entry from this table. */
562 0 : art_table_free(ar, at);
563 :
564 0 : return (an);
565 : }
566 :
567 : struct art_table *
568 0 : art_table_ref(struct art_root *ar, struct art_table *at)
569 : {
570 0 : at->at_refcnt++;
571 0 : return (at);
572 : }
573 :
574 : static inline int
575 0 : art_table_rele(struct art_table *at)
576 : {
577 0 : if (at == NULL)
578 0 : return (0);
579 :
580 0 : return (--at->at_refcnt == 0);
581 0 : }
582 :
583 : int
584 0 : art_table_free(struct art_root *ar, struct art_table *at)
585 : {
586 0 : if (art_table_rele(at)) {
587 : /*
588 : * Garbage collect this table and all its parents
589 : * that are empty.
590 : */
591 0 : do {
592 0 : at = art_table_put(ar, at);
593 0 : } while (art_table_rele(at));
594 :
595 0 : return (1);
596 : }
597 :
598 0 : return (0);
599 0 : }
600 :
601 : /*
602 : * Iteration API function.
603 : */
604 : int
605 0 : art_walk(struct art_root *ar, int (*f)(struct art_node *, void *), void *arg)
606 : {
607 0 : struct srp_ref sr;
608 : struct art_table *at;
609 : struct art_node *node;
610 : int error = 0;
611 :
612 0 : rw_enter_write(&ar->ar_lock);
613 0 : at = srp_get_locked(&ar->ar_root);
614 0 : if (at != NULL) {
615 0 : art_table_ref(ar, at);
616 :
617 : /*
618 : * The default route should be processed here because the root
619 : * table does not have a parent.
620 : */
621 0 : node = srp_enter(&sr, &at->at_default);
622 0 : error = art_walk_apply(ar, node, NULL, f, arg);
623 0 : srp_leave(&sr);
624 :
625 0 : if (error == 0)
626 0 : error = art_table_walk(ar, at, f, arg);
627 :
628 0 : art_table_free(ar, at);
629 0 : }
630 0 : rw_exit_write(&ar->ar_lock);
631 :
632 0 : return (error);
633 0 : }
634 :
635 : int
636 0 : art_table_walk(struct art_root *ar, struct art_table *at,
637 : int (*f)(struct art_node *, void *), void *arg)
638 : {
639 0 : struct srp_ref sr;
640 : struct art_node *node, *next;
641 : struct art_table *nat;
642 : int i, j, error = 0;
643 0 : uint32_t maxfringe = (at->at_minfringe << 1);
644 :
645 : /*
646 : * Iterate non-fringe nodes in ``natural'' order.
647 : */
648 0 : for (j = 1; j < at->at_minfringe; j += 2) {
649 : /*
650 : * The default route (index 1) is processed by the
651 : * parent table (where it belongs) otherwise it could
652 : * be processed more than once.
653 : */
654 0 : for (i = max(j, 2); i < at->at_minfringe; i <<= 1) {
655 0 : next = srp_get_locked(&at->at_heap[i >> 1].node);
656 :
657 0 : node = srp_enter(&sr, &at->at_heap[i].node);
658 0 : error = art_walk_apply(ar, node, next, f, arg);
659 0 : srp_leave(&sr);
660 :
661 0 : if (error != 0)
662 0 : return (error);
663 : }
664 : }
665 :
666 : /*
667 : * Iterate fringe nodes.
668 : */
669 0 : for (i = at->at_minfringe; i < maxfringe; i++) {
670 0 : next = srp_get_locked(&at->at_heap[i >> 1].node);
671 :
672 0 : node = srp_enter(&sr, &at->at_heap[i].node);
673 0 : if (!ISLEAF(node)) {
674 0 : nat = art_table_ref(ar, SUBTABLE(node));
675 0 : node = srp_follow(&sr, &nat->at_default);
676 0 : } else
677 : nat = NULL;
678 :
679 0 : error = art_walk_apply(ar, node, next, f, arg);
680 0 : srp_leave(&sr);
681 :
682 0 : if (error != 0) {
683 0 : art_table_free(ar, nat);
684 0 : return (error);
685 : }
686 :
687 0 : if (nat != NULL) {
688 0 : error = art_table_walk(ar, nat, f, arg);
689 0 : art_table_free(ar, nat);
690 0 : if (error != 0)
691 0 : return (error);
692 : }
693 : }
694 :
695 0 : return (0);
696 0 : }
697 :
698 : int
699 0 : art_walk_apply(struct art_root *ar,
700 : struct art_node *an, struct art_node *next,
701 : int (*f)(struct art_node *, void *), void *arg)
702 : {
703 : int error = 0;
704 :
705 0 : if ((an != NULL) && (an != next)) {
706 0 : rw_exit_write(&ar->ar_lock);
707 0 : error = (*f)(an, arg);
708 0 : rw_enter_write(&ar->ar_lock);
709 0 : }
710 :
711 0 : return (error);
712 : }
713 :
714 :
715 : /*
716 : * Create a table and use the given index to set its default route.
717 : *
718 : * Note: This function does not modify the root or the parent.
719 : */
720 : struct art_table *
721 0 : art_table_get(struct art_root *ar, struct art_table *parent, int j)
722 : {
723 : struct art_table *at;
724 : struct art_node *node;
725 : void *at_heap;
726 : uint32_t lvl;
727 :
728 0 : KASSERT(j != 0 && j != 1);
729 0 : KASSERT(parent != NULL || j == -1);
730 :
731 0 : if (parent != NULL)
732 0 : lvl = parent->at_level + 1;
733 : else
734 : lvl = 0;
735 :
736 0 : KASSERT(lvl < ar->ar_nlvl);
737 :
738 0 : at = pool_get(&at_pool, PR_NOWAIT|PR_ZERO);
739 0 : if (at == NULL)
740 0 : return (NULL);
741 :
742 0 : switch (AT_HEAPSIZE(ar->ar_bits[lvl])) {
743 : case AT_HEAPSIZE(4):
744 0 : at_heap = pool_get(&at_heap_4_pool, PR_NOWAIT|PR_ZERO);
745 0 : break;
746 : case AT_HEAPSIZE(8):
747 0 : at_heap = pool_get(&at_heap_8_pool, PR_NOWAIT|PR_ZERO);
748 0 : break;
749 : default:
750 0 : panic("incorrect stride length %u", ar->ar_bits[lvl]);
751 : }
752 :
753 0 : if (at_heap == NULL) {
754 0 : pool_put(&at_pool, at);
755 0 : return (NULL);
756 : }
757 :
758 0 : at->at_parent = parent;
759 0 : at->at_index = j;
760 0 : at->at_minfringe = (1 << ar->ar_bits[lvl]);
761 0 : at->at_level = lvl;
762 0 : at->at_bits = ar->ar_bits[lvl];
763 0 : at->at_heap = at_heap;
764 0 : at->at_refcnt = 0;
765 :
766 0 : if (parent != NULL) {
767 0 : node = srp_get_locked(&parent->at_heap[j].node);
768 : /* node isn't being deleted, no srp_finalize needed */
769 0 : srp_swap_locked(&at->at_default, node);
770 0 : at->at_offset = (parent->at_offset + parent->at_bits);
771 0 : }
772 :
773 0 : return (at);
774 0 : }
775 :
776 :
777 : /*
778 : * Delete a table and use its index to restore its parent's default route.
779 : *
780 : * Note: Modify its parent to unlink the table from it.
781 : */
782 : struct art_table *
783 0 : art_table_put(struct art_root *ar, struct art_table *at)
784 : {
785 0 : struct art_table *parent = at->at_parent;
786 : struct art_node *node;
787 0 : uint32_t j = at->at_index;
788 :
789 0 : KASSERT(at->at_refcnt == 0);
790 0 : KASSERT(j != 0 && j != 1);
791 :
792 0 : if (parent != NULL) {
793 0 : KASSERT(j != -1);
794 0 : KASSERT(at->at_level == parent->at_level + 1);
795 0 : KASSERT(parent->at_refcnt >= 1);
796 :
797 : /* Give the route back to its parent. */
798 0 : node = srp_get_locked(&at->at_default);
799 0 : srp_swap_locked(&parent->at_heap[j].node, node);
800 0 : } else {
801 0 : KASSERT(j == -1);
802 0 : KASSERT(at->at_level == 0);
803 0 : srp_swap_locked(&ar->ar_root, NULL);
804 : }
805 :
806 0 : mtx_enter(&art_table_gc_mtx);
807 0 : at->at_parent = art_table_gc_list;
808 0 : art_table_gc_list = at;
809 0 : mtx_leave(&art_table_gc_mtx);
810 :
811 0 : task_add(systqmp, &art_table_gc_task);
812 :
813 0 : return (parent);
814 : }
815 :
816 : void
817 0 : art_table_gc(void *null)
818 : {
819 : struct art_table *at, *next;
820 :
821 0 : mtx_enter(&art_table_gc_mtx);
822 0 : at = art_table_gc_list;
823 0 : art_table_gc_list = NULL;
824 0 : mtx_leave(&art_table_gc_mtx);
825 :
826 0 : while (at != NULL) {
827 0 : next = at->at_parent;
828 :
829 0 : if (at->at_level == 0)
830 0 : srp_finalize(at, "arttfini");
831 : else
832 0 : srp_finalize(ASNODE(at), "arttfini");
833 :
834 0 : switch (AT_HEAPSIZE(at->at_bits)) {
835 : case AT_HEAPSIZE(4):
836 0 : pool_put(&at_heap_4_pool, at->at_heap);
837 0 : break;
838 : case AT_HEAPSIZE(8):
839 0 : pool_put(&at_heap_8_pool, at->at_heap);
840 0 : break;
841 : default:
842 0 : panic("incorrect stride length %u", at->at_bits);
843 : }
844 :
845 0 : pool_put(&at_pool, at);
846 :
847 : at = next;
848 : }
849 0 : }
850 :
851 : /*
852 : * Substitute a node by another in the subtree whose root index is given.
853 : *
854 : * This function iterates on the table ``at'' at index ``i'' until no
855 : * more ``old'' node can be replaced by ``new''.
856 : *
857 : * This function was originally written by Don Knuth in CWEB. The
858 : * complicated ``goto''s are the result of expansion of the two
859 : * following recursions:
860 : *
861 : * art_allot(at, i, old, new)
862 : * {
863 : * int k = i;
864 : * if (at->at_heap[k] == old)
865 : * at->at_heap[k] = new;
866 : * if (k >= at->at_minfringe)
867 : * return;
868 : * k <<= 1;
869 : * art_allot(at, k, old, new);
870 : * k++;
871 : * art_allot(at, k, old, new);
872 : * }
873 : */
874 : void
875 0 : art_allot(struct art_table *at, int i, struct art_node *old,
876 : struct art_node *new)
877 : {
878 : struct art_node *node, *dflt;
879 : int k = i;
880 :
881 0 : KASSERT(i < at->at_minfringe);
882 :
883 : again:
884 0 : k <<= 1;
885 0 : if (k < at->at_minfringe)
886 0 : goto nonfringe;
887 :
888 : /* Change fringe nodes. */
889 0 : while (1) {
890 0 : node = srp_get_locked(&at->at_heap[k].node);
891 0 : if (!ISLEAF(node)) {
892 0 : dflt = srp_get_locked(&SUBTABLE(node)->at_default);
893 0 : if (dflt == old) {
894 0 : srp_swap_locked(&SUBTABLE(node)->at_default,
895 0 : new);
896 0 : }
897 0 : } else if (node == old) {
898 0 : srp_swap_locked(&at->at_heap[k].node, new);
899 0 : }
900 0 : if (k % 2)
901 : goto moveup;
902 0 : k++;
903 : }
904 :
905 : nonfringe:
906 0 : node = srp_get_locked(&at->at_heap[k].node);
907 0 : if (node == old)
908 0 : goto again;
909 : moveon:
910 0 : if (k % 2)
911 : goto moveup;
912 0 : k++;
913 0 : goto nonfringe;
914 : moveup:
915 0 : k >>= 1;
916 0 : srp_swap_locked(&at->at_heap[k].node, new);
917 :
918 : /* Change non-fringe node. */
919 0 : if (k != i)
920 0 : goto moveon;
921 0 : }
922 :
923 : struct art_node *
924 0 : art_get(void *dst, uint8_t plen)
925 : {
926 : struct art_node *an;
927 :
928 0 : an = pool_get(&an_pool, PR_NOWAIT | PR_ZERO);
929 0 : if (an == NULL)
930 0 : return (NULL);
931 :
932 0 : an->an_plen = plen;
933 0 : SRPL_INIT(&an->an_rtlist);
934 :
935 0 : return (an);
936 0 : }
937 :
938 : void
939 0 : art_put(struct art_node *an)
940 : {
941 0 : KASSERT(SRPL_EMPTY_LOCKED(&an->an_rtlist));
942 :
943 0 : mtx_enter(&art_node_gc_mtx);
944 0 : an->an_gc = art_node_gc_list;
945 0 : art_node_gc_list = an;
946 0 : mtx_leave(&art_node_gc_mtx);
947 :
948 0 : task_add(systqmp, &art_node_gc_task);
949 0 : }
950 :
951 : void
952 0 : art_gc(void *null)
953 : {
954 : struct art_node *an, *next;
955 :
956 0 : mtx_enter(&art_node_gc_mtx);
957 0 : an = art_node_gc_list;
958 0 : art_node_gc_list = NULL;
959 0 : mtx_leave(&art_node_gc_mtx);
960 :
961 0 : while (an != NULL) {
962 0 : next = an->an_gc;
963 :
964 0 : srp_finalize(an, "artnfini");
965 :
966 0 : pool_put(&an_pool, an);
967 :
968 : an = next;
969 : }
970 0 : }
|