GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.sbin/bgpd/rde_rib.c Lines: 0 523 0.0 %
Date: 2016-12-06 Branches: 0 575 0.0 %

Line Branch Exec Source
1
/*	$OpenBSD: rde_rib.c,v 1.142 2015/03/14 03:52:42 claudio Exp $ */
2
3
/*
4
 * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org>
5
 *
6
 * Permission to use, copy, modify, and distribute this software for any
7
 * purpose with or without fee is hereby granted, provided that the above
8
 * copyright notice and this permission notice appear in all copies.
9
 *
10
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
 */
18
19
#include <sys/types.h>
20
#include <sys/queue.h>
21
22
#include <stdlib.h>
23
#include <string.h>
24
#include <siphash.h>
25
26
#include "bgpd.h"
27
#include "rde.h"
28
29
/*
30
 * BGP RIB -- Routing Information Base
31
 *
32
 * The RIB is build with one aspect in mind. Speed -- actually update speed.
33
 * Therefore one thing needs to be absolutely avoided, long table walks.
34
 * This is achieved by heavily linking the different parts together.
35
 */
36
u_int16_t rib_size;
37
struct rib *ribs;
38
39
LIST_HEAD(, rib_context) rib_dump_h = LIST_HEAD_INITIALIZER(rib_dump_h);
40
41
struct rib_entry *rib_add(struct rib *, struct bgpd_addr *, int);
42
int rib_compare(const struct rib_entry *, const struct rib_entry *);
43
void rib_remove(struct rib_entry *);
44
int rib_empty(struct rib_entry *);
45
struct rib_entry *rib_restart(struct rib_context *);
46
47
RB_PROTOTYPE(rib_tree, rib_entry, rib_e, rib_compare);
48
RB_GENERATE(rib_tree, rib_entry, rib_e, rib_compare);
49
50
51
/* RIB specific functions */
52
u_int16_t
53
rib_new(char *name, u_int rtableid, u_int16_t flags)
54
{
55
	struct rib	*xribs;
56
	u_int16_t	id;
57
58
	for (id = 0; id < rib_size; id++) {
59
		if (*ribs[id].name == '\0')
60
			break;
61
	}
62
63
	if (id == RIB_FAILED)
64
		fatalx("rib_new: trying to use reserved id");
65
66
	if (id >= rib_size) {
67
		if ((xribs = reallocarray(ribs, id + 1,
68
		    sizeof(struct rib))) == NULL) {
69
			/* XXX this is not clever */
70
			fatal("rib_add");
71
		}
72
		ribs = xribs;
73
		rib_size = id + 1;
74
	}
75
76
	bzero(&ribs[id], sizeof(struct rib));
77
	strlcpy(ribs[id].name, name, sizeof(ribs[id].name));
78
	RB_INIT(&ribs[id].rib);
79
	ribs[id].state = RECONF_REINIT;
80
	ribs[id].id = id;
81
	ribs[id].flags = flags;
82
	ribs[id].rtableid = rtableid;
83
84
	ribs[id].in_rules = calloc(1, sizeof(struct filter_head));
85
	if (ribs[id].in_rules == NULL)
86
		fatal(NULL);
87
	TAILQ_INIT(ribs[id].in_rules);
88
89
	return (id);
90
}
91
92
u_int16_t
93
rib_find(char *name)
94
{
95
	u_int16_t id;
96
97
	if (name == NULL || *name == '\0')
98
		return (1);	/* no name returns the Loc-RIB */
99
100
	for (id = 0; id < rib_size; id++) {
101
		if (!strcmp(ribs[id].name, name))
102
			return (id);
103
	}
104
105
	return (RIB_FAILED);
106
}
107
108
void
109
rib_free(struct rib *rib)
110
{
111
	struct rib_context *ctx, *next;
112
	struct rib_entry *re, *xre;
113
	struct prefix *p, *np;
114
115
	/* abort pending rib_dumps */
116
	for (ctx = LIST_FIRST(&rib_dump_h); ctx != NULL; ctx = next) {
117
		next = LIST_NEXT(ctx, entry);
118
		if (ctx->ctx_rib == rib) {
119
			re = ctx->ctx_re;
120
			re->flags &= ~F_RIB_ENTRYLOCK;
121
			LIST_REMOVE(ctx, entry);
122
			if (ctx->ctx_done)
123
				ctx->ctx_done(ctx->ctx_arg);
124
			else
125
				free(ctx);
126
		}
127
	}
128
129
	for (re = RB_MIN(rib_tree, &rib->rib); re != NULL; re = xre) {
130
		xre = RB_NEXT(rib_tree,  &rib->rib, re);
131
132
		/*
133
		 * Removing the prefixes is tricky because the last one
134
		 * will remove the rib_entry as well and because we do
135
		 * an empty check in prefix_destroy() it is not possible to
136
		 * use the default for loop.
137
		 */
138
		while ((p = LIST_FIRST(&re->prefix_h))) {
139
			np = LIST_NEXT(p, rib_l);
140
			if (p->aspath->pftableid) {
141
				struct bgpd_addr addr;
142
143
				pt_getaddr(p->prefix, &addr);
144
				/* Commit is done in peer_down() */
145
				rde_send_pftable(p->aspath->pftableid, &addr,
146
				    p->prefix->prefixlen, 1);
147
			}
148
			prefix_destroy(p);
149
			if (np == NULL)
150
				break;
151
		}
152
	}
153
	filterlist_free(rib->in_rules_tmp);
154
	filterlist_free(rib->in_rules);
155
	bzero(rib, sizeof(struct rib));
156
}
157
158
int
159
rib_compare(const struct rib_entry *a, const struct rib_entry *b)
160
{
161
	return (pt_prefix_cmp(a->prefix, b->prefix));
162
}
163
164
struct rib_entry *
165
rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen)
166
{
167
	struct rib_entry xre;
168
	struct pt_entry	*pte;
169
170
	pte = pt_fill(prefix, prefixlen);
171
	bzero(&xre, sizeof(xre));
172
	xre.prefix = pte;
173
174
	return (RB_FIND(rib_tree, &rib->rib, &xre));
175
}
176
177
struct rib_entry *
178
rib_lookup(struct rib *rib, struct bgpd_addr *addr)
179
{
180
	struct rib_entry *re;
181
	int		 i;
182
183
	switch (addr->aid) {
184
	case AID_INET:
185
	case AID_VPN_IPv4:
186
		for (i = 32; i >= 0; i--) {
187
			re = rib_get(rib, addr, i);
188
			if (re != NULL)
189
				return (re);
190
		}
191
		break;
192
	case AID_INET6:
193
		for (i = 128; i >= 0; i--) {
194
			re = rib_get(rib, addr, i);
195
			if (re != NULL)
196
				return (re);
197
		}
198
		break;
199
	default:
200
		fatalx("rib_lookup: unknown af");
201
	}
202
	return (NULL);
203
}
204
205
206
struct rib_entry *
207
rib_add(struct rib *rib, struct bgpd_addr *prefix, int prefixlen)
208
{
209
	struct pt_entry	*pte;
210
	struct rib_entry *re;
211
212
	pte = pt_get(prefix, prefixlen);
213
	if (pte == NULL)
214
		pte = pt_add(prefix, prefixlen);
215
216
	if ((re = calloc(1, sizeof(*re))) == NULL)
217
		fatal("rib_add");
218
219
	LIST_INIT(&re->prefix_h);
220
	re->prefix = pte;
221
	re->flags = rib->flags;
222
	re->ribid = rib->id;
223
224
        if (RB_INSERT(rib_tree, &rib->rib, re) != NULL) {
225
		log_warnx("rib_add: insert failed");
226
		free(re);
227
		return (NULL);
228
	}
229
230
	pt_ref(pte);
231
232
	rdemem.rib_cnt++;
233
234
	return (re);
235
}
236
237
void
238
rib_remove(struct rib_entry *re)
239
{
240
	if (!rib_empty(re))
241
		fatalx("rib_remove: entry not empty");
242
243
	if (re->flags & F_RIB_ENTRYLOCK)
244
		/* entry is locked, don't free it. */
245
		return;
246
247
	pt_unref(re->prefix);
248
	if (pt_empty(re->prefix))
249
		pt_remove(re->prefix);
250
251
	if (RB_REMOVE(rib_tree, &ribs[re->ribid].rib, re) == NULL)
252
		log_warnx("rib_remove: remove failed.");
253
254
	free(re);
255
	rdemem.rib_cnt--;
256
}
257
258
int
259
rib_empty(struct rib_entry *re)
260
{
261
	return LIST_EMPTY(&re->prefix_h);
262
}
263
264
void
265
rib_dump(struct rib *rib, void (*upcall)(struct rib_entry *, void *),
266
    void *arg, u_int8_t aid)
267
{
268
	struct rib_context	*ctx;
269
270
	if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
271
		fatal("rib_dump");
272
	ctx->ctx_rib = rib;
273
	ctx->ctx_upcall = upcall;
274
	ctx->ctx_arg = arg;
275
	ctx->ctx_aid = aid;
276
	rib_dump_r(ctx);
277
}
278
279
void
280
rib_dump_r(struct rib_context *ctx)
281
{
282
	struct rib_entry	*re;
283
	unsigned int		 i;
284
285
	if (ctx->ctx_re == NULL) {
286
		re = RB_MIN(rib_tree, &ctx->ctx_rib->rib);
287
		LIST_INSERT_HEAD(&rib_dump_h, ctx, entry);
288
	} else
289
		re = rib_restart(ctx);
290
291
	for (i = 0; re != NULL; re = RB_NEXT(rib_tree, unused, re)) {
292
		if (ctx->ctx_aid != AID_UNSPEC &&
293
		    ctx->ctx_aid != re->prefix->aid)
294
			continue;
295
		if (ctx->ctx_count && i++ >= ctx->ctx_count &&
296
		    (re->flags & F_RIB_ENTRYLOCK) == 0) {
297
			/* store and lock last element */
298
			ctx->ctx_re = re;
299
			re->flags |= F_RIB_ENTRYLOCK;
300
			return;
301
		}
302
		ctx->ctx_upcall(re, ctx->ctx_arg);
303
	}
304
305
	LIST_REMOVE(ctx, entry);
306
	if (ctx->ctx_done)
307
		ctx->ctx_done(ctx->ctx_arg);
308
	else
309
		free(ctx);
310
}
311
312
struct rib_entry *
313
rib_restart(struct rib_context *ctx)
314
{
315
	struct rib_entry *re;
316
317
	re = ctx->ctx_re;
318
	re->flags &= ~F_RIB_ENTRYLOCK;
319
320
	/* find first non empty element */
321
	while (re && rib_empty(re))
322
		re = RB_NEXT(rib_tree, unused, re);
323
324
	/* free the previously locked rib element if empty */
325
	if (rib_empty(ctx->ctx_re))
326
		rib_remove(ctx->ctx_re);
327
	ctx->ctx_re = NULL;
328
	return (re);
329
}
330
331
void
332
rib_dump_runner(void)
333
{
334
	struct rib_context	*ctx, *next;
335
336
	for (ctx = LIST_FIRST(&rib_dump_h); ctx != NULL; ctx = next) {
337
		next = LIST_NEXT(ctx, entry);
338
		rib_dump_r(ctx);
339
	}
340
}
341
342
int
343
rib_dump_pending(void)
344
{
345
	return (!LIST_EMPTY(&rib_dump_h));
346
}
347
348
/* used to bump correct prefix counters */
349
#define PREFIX_COUNT(x, op)			\
350
	do {					\
351
		(x)->prefix_cnt += (op);	\
352
	} while (0)
353
354
/* path specific functions */
355
356
static void	path_link(struct rde_aspath *, struct rde_peer *);
357
358
struct path_table pathtable;
359
360
SIPHASH_KEY pathtablekey;
361
362
/* XXX the hash should also include communities and the other attrs */
363
#define PATH_HASH(x)				\
364
	&pathtable.path_hashtbl[SipHash24(&pathtablekey, (x)->data, (x)->len) & \
365
	    pathtable.path_hashmask]
366
367
void
368
path_init(u_int32_t hashsize)
369
{
370
	u_int32_t	hs, i;
371
372
	for (hs = 1; hs < hashsize; hs <<= 1)
373
		;
374
	pathtable.path_hashtbl = calloc(hs, sizeof(struct aspath_head));
375
	if (pathtable.path_hashtbl == NULL)
376
		fatal("path_init");
377
378
	for (i = 0; i < hs; i++)
379
		LIST_INIT(&pathtable.path_hashtbl[i]);
380
381
	pathtable.path_hashmask = hs - 1;
382
	arc4random_buf(&pathtablekey, sizeof(pathtablekey));
383
}
384
385
void
386
path_shutdown(void)
387
{
388
	u_int32_t	i;
389
390
	for (i = 0; i <= pathtable.path_hashmask; i++)
391
		if (!LIST_EMPTY(&pathtable.path_hashtbl[i]))
392
			log_warnx("path_free: free non-free table");
393
394
	free(pathtable.path_hashtbl);
395
}
396
397
int
398
path_update(struct rib *rib, struct rde_peer *peer, struct rde_aspath *nasp,
399
    struct bgpd_addr *prefix, int prefixlen)
400
{
401
	struct rde_aspath	*asp;
402
	struct prefix		*p;
403
404
	if (nasp->pftableid) {
405
		rde_send_pftable(nasp->pftableid, prefix, prefixlen, 0);
406
		rde_send_pftable_commit();
407
	}
408
409
	/*
410
	 * First try to find a prefix in the specified RIB.
411
	 */
412
	if ((p = prefix_get(rib, peer, prefix, prefixlen, 0)) != NULL) {
413
		if (path_compare(nasp, p->aspath) == 0) {
414
			/* no change, update last change */
415
			p->lastchange = time(NULL);
416
			return (0);
417
		}
418
	}
419
420
	/*
421
	 * Either the prefix does not exist or the path changed.
422
	 * In both cases lookup the new aspath to make sure it is not
423
	 * already in the RIB.
424
	 */
425
	if ((asp = path_lookup(nasp, peer)) == NULL) {
426
		/* Path not available, create and link a new one. */
427
		asp = path_copy(nasp);
428
		path_link(asp, peer);
429
	}
430
431
	/* If the prefix was found move it else add it to the aspath. */
432
	if (p != NULL)
433
		prefix_move(asp, p);
434
	else
435
		return (prefix_add(rib, asp, prefix, prefixlen));
436
	return (0);
437
}
438
439
int
440
path_compare(struct rde_aspath *a, struct rde_aspath *b)
441
{
442
	int		 r;
443
444
	if (a->origin > b->origin)
445
		return (1);
446
	if (a->origin < b->origin)
447
		return (-1);
448
	if ((a->flags & ~F_ATTR_LINKED) > (b->flags & ~F_ATTR_LINKED))
449
		return (1);
450
	if ((a->flags & ~F_ATTR_LINKED) < (b->flags & ~F_ATTR_LINKED))
451
		return (-1);
452
	if (a->med > b->med)
453
		return (1);
454
	if (a->med < b->med)
455
		return (-1);
456
	if (a->lpref > b->lpref)
457
		return (1);
458
	if (a->lpref < b->lpref)
459
		return (-1);
460
	if (a->weight > b->weight)
461
		return (1);
462
	if (a->weight < b->weight)
463
		return (-1);
464
	if (a->rtlabelid > b->rtlabelid)
465
		return (1);
466
	if (a->rtlabelid < b->rtlabelid)
467
		return (-1);
468
	if (a->pftableid > b->pftableid)
469
		return (1);
470
	if (a->pftableid < b->pftableid)
471
		return (-1);
472
473
	r = aspath_compare(a->aspath, b->aspath);
474
	if (r == 0)
475
		r = nexthop_compare(a->nexthop, b->nexthop);
476
	if (r > 0)
477
		return (1);
478
	if (r < 0)
479
		return (-1);
480
481
	return (attr_compare(a, b));
482
}
483
484
struct rde_aspath *
485
path_lookup(struct rde_aspath *aspath, struct rde_peer *peer)
486
{
487
	struct aspath_head	*head;
488
	struct rde_aspath	*asp;
489
490
	head = PATH_HASH(aspath->aspath);
491
492
	LIST_FOREACH(asp, head, path_l) {
493
		if (peer == asp->peer && path_compare(aspath, asp) == 0)
494
			return (asp);
495
	}
496
	return (NULL);
497
}
498
499
void
500
path_remove(struct rde_aspath *asp)
501
{
502
	struct prefix	*p, *np;
503
504
	for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = np) {
505
		np = LIST_NEXT(p, path_l);
506
		if (asp->pftableid) {
507
			struct bgpd_addr addr;
508
509
			pt_getaddr(p->prefix, &addr);
510
			/* Commit is done in peer_down() */
511
			rde_send_pftable(p->aspath->pftableid, &addr,
512
			    p->prefix->prefixlen, 1);
513
		}
514
		prefix_destroy(p);
515
	}
516
}
517
518
/* remove all stale routes or if staletime is 0 remove all routes for
519
   a specified AID. */
520
u_int32_t
521
path_remove_stale(struct rde_aspath *asp, u_int8_t aid)
522
{
523
	struct prefix	*p, *np;
524
	time_t		 staletime;
525
	u_int32_t	 rprefixes;
526
527
	rprefixes=0;
528
	staletime = asp->peer->staletime[aid];
529
	for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = np) {
530
		np = LIST_NEXT(p, path_l);
531
		if (p->prefix->aid != aid)
532
			continue;
533
534
		if (staletime && p->lastchange > staletime)
535
			continue;
536
537
		if (asp->pftableid) {
538
			struct bgpd_addr addr;
539
540
			pt_getaddr(p->prefix, &addr);
541
			/* Commit is done in peer_flush() */
542
			rde_send_pftable(p->aspath->pftableid, &addr,
543
			    p->prefix->prefixlen, 1);
544
		}
545
546
		/* only count Adj-RIB-In */
547
		if (p->rib->ribid == 0)
548
			rprefixes++;
549
550
		prefix_destroy(p);
551
	}
552
	return (rprefixes);
553
}
554
555
556
/* this function is only called by prefix_remove and path_remove */
557
void
558
path_destroy(struct rde_aspath *asp)
559
{
560
	/* path_destroy can only unlink and free empty rde_aspath */
561
	if (asp->prefix_cnt != 0 || asp->active_cnt != 0)
562
		log_warnx("path_destroy: prefix count out of sync");
563
564
	nexthop_unlink(asp);
565
	LIST_REMOVE(asp, path_l);
566
	LIST_REMOVE(asp, peer_l);
567
	asp->peer = NULL;
568
	asp->nexthop = NULL;
569
	asp->flags &= ~F_ATTR_LINKED;
570
571
	path_put(asp);
572
}
573
574
int
575
path_empty(struct rde_aspath *asp)
576
{
577
	return LIST_EMPTY(&asp->prefix_h);
578
}
579
580
/*
581
 * the path object is linked into multiple lists for fast access.
582
 * These are peer_l, path_l and nexthop_l.
583
 * peer_l: list of all aspaths that belong to that peer
584
 * path_l: hash list to find paths quickly
585
 * nexthop_l: list of all aspaths with an equal exit nexthop
586
 */
587
static void
588
path_link(struct rde_aspath *asp, struct rde_peer *peer)
589
{
590
	struct aspath_head	*head;
591
592
	head = PATH_HASH(asp->aspath);
593
594
	LIST_INSERT_HEAD(head, asp, path_l);
595
	LIST_INSERT_HEAD(&peer->path_h, asp, peer_l);
596
	asp->peer = peer;
597
	nexthop_link(asp);
598
	asp->flags |= F_ATTR_LINKED;
599
}
600
601
/*
602
 * copy asp to a new UNLINKED one mainly for filtering
603
 */
604
struct rde_aspath *
605
path_copy(struct rde_aspath *asp)
606
{
607
	struct rde_aspath *nasp;
608
609
	nasp = path_get();
610
	nasp->aspath = asp->aspath;
611
	if (nasp->aspath != NULL) {
612
		nasp->aspath->refcnt++;
613
		rdemem.aspath_refs++;
614
	}
615
	nasp->nexthop = asp->nexthop;
616
	nasp->med = asp->med;
617
	nasp->lpref = asp->lpref;
618
	nasp->weight = asp->weight;
619
	nasp->origin = asp->origin;
620
	nasp->rtlabelid = asp->rtlabelid;
621
	rtlabel_ref(nasp->rtlabelid);
622
	nasp->pftableid = asp->pftableid;
623
	pftable_ref(nasp->pftableid);
624
625
	nasp->flags = asp->flags & ~F_ATTR_LINKED;
626
	attr_copy(nasp, asp);
627
628
	return (nasp);
629
}
630
631
/* alloc and initialize new entry. May not fail. */
632
struct rde_aspath *
633
path_get(void)
634
{
635
	struct rde_aspath *asp;
636
637
	asp = calloc(1, sizeof(*asp));
638
	if (asp == NULL)
639
		fatal("path_alloc");
640
	rdemem.path_cnt++;
641
642
	LIST_INIT(&asp->prefix_h);
643
	asp->origin = ORIGIN_INCOMPLETE;
644
	asp->lpref = DEFAULT_LPREF;
645
	/* med = 0 */
646
	/* weight = 0 */
647
	/* rtlabel = 0 */
648
649
	return (asp);
650
}
651
652
/* free an unlinked element */
653
void
654
path_put(struct rde_aspath *asp)
655
{
656
	if (asp == NULL)
657
		return;
658
659
	if (asp->flags & F_ATTR_LINKED)
660
		fatalx("path_put: linked object");
661
662
	rtlabel_unref(asp->rtlabelid);
663
	pftable_unref(asp->pftableid);
664
	aspath_put(asp->aspath);
665
	attr_freeall(asp);
666
	rdemem.path_cnt--;
667
	free(asp);
668
}
669
670
/* prefix specific functions */
671
672
static struct prefix	*prefix_alloc(void);
673
static void		 prefix_free(struct prefix *);
674
static void		 prefix_link(struct prefix *, struct rib_entry *,
675
			     struct rde_aspath *);
676
static void		 prefix_unlink(struct prefix *);
677
678
/*
679
 * search for specified prefix of a peer. Returns NULL if not found.
680
 */
681
struct prefix *
682
prefix_get(struct rib *rib, struct rde_peer *peer, struct bgpd_addr *prefix,
683
    int prefixlen, u_int32_t flags)
684
{
685
	struct rib_entry	*re;
686
687
	re = rib_get(rib, prefix, prefixlen);
688
	if (re == NULL)
689
		return (NULL);
690
	return (prefix_bypeer(re, peer, flags));
691
}
692
693
/*
694
 * Adds or updates a prefix.
695
 */
696
int
697
prefix_add(struct rib *rib, struct rde_aspath *asp, struct bgpd_addr *prefix,
698
    int prefixlen)
699
700
{
701
	struct prefix		*p;
702
	struct rib_entry	*re;
703
704
	re = rib_get(rib, prefix, prefixlen);
705
	if (re == NULL)
706
		re = rib_add(rib, prefix, prefixlen);
707
708
	p = prefix_bypeer(re, asp->peer, asp->flags);
709
	if (p == NULL) {
710
		p = prefix_alloc();
711
		prefix_link(p, re, asp);
712
		return (1);
713
	} else {
714
		if (p->aspath != asp) {
715
			/* prefix belongs to a different aspath so move */
716
			prefix_move(asp, p);
717
		} else
718
			p->lastchange = time(NULL);
719
		return (0);
720
	}
721
}
722
723
/*
724
 * Move the prefix to the specified as path, removes the old asp if needed.
725
 */
726
void
727
prefix_move(struct rde_aspath *asp, struct prefix *p)
728
{
729
	struct prefix		*np;
730
	struct rde_aspath	*oasp;
731
732
	if (asp->peer != p->aspath->peer)
733
		fatalx("prefix_move: cross peer move");
734
735
	/* create new prefix node */
736
	np = prefix_alloc();
737
	np->aspath = asp;
738
	/* peer and prefix pointers are still equal */
739
	np->prefix = p->prefix;
740
	np->rib = p->rib;
741
	np->lastchange = time(NULL);
742
743
	/* add to new as path */
744
	LIST_INSERT_HEAD(&asp->prefix_h, np, path_l);
745
	PREFIX_COUNT(asp, 1);
746
	/*
747
	 * no need to update the peer prefix count because we are only moving
748
	 * the prefix without changing the peer.
749
	 */
750
751
	/*
752
	 * First kick the old prefix node out of the prefix list,
753
	 * afterwards run the route decision for new prefix node.
754
	 * Because of this only one update is generated if the prefix
755
	 * was active.
756
	 * This is save because we create a new prefix and so the change
757
	 * is noticed by prefix_evaluate().
758
	 */
759
	LIST_REMOVE(p, rib_l);
760
	prefix_evaluate(np, np->rib);
761
762
	/* remove old prefix node */
763
	oasp = p->aspath;
764
	LIST_REMOVE(p, path_l);
765
	PREFIX_COUNT(oasp, -1);
766
	/* as before peer count needs no update because of move */
767
768
	/* destroy all references to other objects and free the old prefix */
769
	p->aspath = NULL;
770
	p->prefix = NULL;
771
	p->rib = NULL;
772
	prefix_free(p);
773
774
	/* destroy old path if empty */
775
	if (path_empty(oasp))
776
		path_destroy(oasp);
777
}
778
779
/*
780
 * Removes a prefix from all lists. If the parent objects -- path or
781
 * pt_entry -- become empty remove them too.
782
 */
783
int
784
prefix_remove(struct rib *rib, struct rde_peer *peer, struct bgpd_addr *prefix,
785
    int prefixlen, u_int32_t flags)
786
{
787
	struct prefix		*p;
788
	struct rib_entry	*re;
789
	struct rde_aspath	*asp;
790
791
	re = rib_get(rib, prefix, prefixlen);
792
	if (re == NULL)	/* Got a dummy withdrawn request */
793
		return (0);
794
795
	p = prefix_bypeer(re, peer, flags);
796
	if (p == NULL)		/* Got a dummy withdrawn request. */
797
		return (0);
798
799
	asp = p->aspath;
800
801
	if (asp->pftableid) {
802
		/* only prefixes in the local RIB were pushed into pf */
803
		rde_send_pftable(asp->pftableid, prefix, prefixlen, 1);
804
		rde_send_pftable_commit();
805
	}
806
807
	prefix_destroy(p);
808
809
	return (1);
810
}
811
812
/* dump a prefix into specified buffer */
813
int
814
prefix_write(u_char *buf, int len, struct bgpd_addr *prefix, u_int8_t plen)
815
{
816
	int	totlen;
817
818
	switch (prefix->aid) {
819
	case AID_INET:
820
	case AID_INET6:
821
		totlen = PREFIX_SIZE(plen);
822
823
		if (totlen > len)
824
			return (-1);
825
		*buf++ = plen;
826
		memcpy(buf, &prefix->ba, totlen - 1);
827
		return (totlen);
828
	case AID_VPN_IPv4:
829
		totlen = PREFIX_SIZE(plen) + sizeof(prefix->vpn4.rd) +
830
		    prefix->vpn4.labellen;
831
		plen += (sizeof(prefix->vpn4.rd) + prefix->vpn4.labellen) * 8;
832
833
		if (totlen > len)
834
			return (-1);
835
		*buf++ = plen;
836
		memcpy(buf, &prefix->vpn4.labelstack, prefix->vpn4.labellen);
837
		buf += prefix->vpn4.labellen;
838
		memcpy(buf, &prefix->vpn4.rd, sizeof(prefix->vpn4.rd));
839
		buf += sizeof(prefix->vpn4.rd);
840
		memcpy(buf, &prefix->vpn4.addr, PREFIX_SIZE(plen) - 1);
841
		return (totlen);
842
	default:
843
		return (-1);
844
	}
845
}
846
847
int
848
prefix_writebuf(struct ibuf *buf, struct bgpd_addr *prefix, u_int8_t plen)
849
{
850
	int	 totlen;
851
	void	*bptr;
852
853
	switch (prefix->aid) {
854
	case AID_INET:
855
	case AID_INET6:
856
		totlen = PREFIX_SIZE(plen);
857
		break;
858
	case AID_VPN_IPv4:
859
		totlen = PREFIX_SIZE(plen) + sizeof(prefix->vpn4.rd) +
860
		    prefix->vpn4.labellen;
861
		break;
862
	default:
863
		return (-1);
864
	}
865
866
	if ((bptr = ibuf_reserve(buf, totlen)) == NULL)
867
		return (-1);
868
	if (prefix_write(bptr, totlen, prefix, plen) == -1)
869
		return (-1);
870
	return (0);
871
}
872
873
/*
874
 * Searches in the prefix list of specified pt_entry for a prefix entry
875
 * belonging to the peer peer. Returns NULL if no match found.
876
 */
877
struct prefix *
878
prefix_bypeer(struct rib_entry *re, struct rde_peer *peer, u_int32_t flags)
879
{
880
	struct prefix	*p;
881
882
	LIST_FOREACH(p, &re->prefix_h, rib_l) {
883
		if (p->aspath->peer != peer)
884
			continue;
885
		if (p->aspath->flags & flags &&
886
		    (flags & F_ANN_DYNAMIC) !=
887
		    (p->aspath->flags & F_ANN_DYNAMIC))
888
			continue;
889
		return (p);
890
	}
891
	return (NULL);
892
}
893
894
void
895
prefix_updateall(struct rde_aspath *asp, enum nexthop_state state,
896
    enum nexthop_state oldstate)
897
{
898
	struct prefix	*p;
899
900
	LIST_FOREACH(p, &asp->prefix_h, path_l) {
901
		/*
902
		 * skip non local-RIBs or RIBs that are flagged as noeval.
903
		 */
904
		if (p->rib->flags & F_RIB_NOEVALUATE)
905
			continue;
906
907
		if (oldstate == state && state == NEXTHOP_REACH) {
908
			/*
909
			 * The state of the nexthop did not change. The only
910
			 * thing that may have changed is the true_nexthop
911
			 * or other internal infos. This will not change
912
			 * the routing decision so shortcut here.
913
			 */
914
			if ((p->rib->flags & F_RIB_NOFIB) == 0 &&
915
			    p == p->rib->active)
916
				rde_send_kroute(p, NULL, p->rib->ribid);
917
			continue;
918
		}
919
920
		/* redo the route decision */
921
		LIST_REMOVE(p, rib_l);
922
		/*
923
		 * If the prefix is the active one remove it first,
924
		 * this has to be done because we can not detect when
925
		 * the active prefix changes its state. In this case
926
		 * we know that this is a withdrawal and so the second
927
		 * prefix_evaluate() will generate no update because
928
		 * the nexthop is unreachable or ineligible.
929
		 */
930
		if (p == p->rib->active)
931
			prefix_evaluate(NULL, p->rib);
932
		prefix_evaluate(p, p->rib);
933
	}
934
}
935
936
/* kill a prefix. */
937
void
938
prefix_destroy(struct prefix *p)
939
{
940
	struct rde_aspath	*asp;
941
942
	asp = p->aspath;
943
	prefix_unlink(p);
944
	prefix_free(p);
945
946
	if (path_empty(asp))
947
		path_destroy(asp);
948
}
949
950
/*
951
 * helper function to clean up the connected networks after a reload
952
 */
953
void
954
prefix_network_clean(struct rde_peer *peer, time_t reloadtime, u_int32_t flags)
955
{
956
	struct rde_aspath	*asp, *xasp;
957
	struct prefix		*p, *xp;
958
959
	for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = xasp) {
960
		xasp = LIST_NEXT(asp, peer_l);
961
		if ((asp->flags & F_ANN_DYNAMIC) != flags)
962
			continue;
963
		for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = xp) {
964
			xp = LIST_NEXT(p, path_l);
965
			if (reloadtime > p->lastchange) {
966
				prefix_unlink(p);
967
				prefix_free(p);
968
			}
969
		}
970
		if (path_empty(asp))
971
			path_destroy(asp);
972
	}
973
}
974
975
/*
976
 * Link a prefix into the different parent objects.
977
 */
978
static void
979
prefix_link(struct prefix *pref, struct rib_entry *re, struct rde_aspath *asp)
980
{
981
	LIST_INSERT_HEAD(&asp->prefix_h, pref, path_l);
982
	PREFIX_COUNT(asp, 1);
983
984
	pref->aspath = asp;
985
	pref->rib = re;
986
	pref->prefix = re->prefix;
987
	pt_ref(pref->prefix);
988
	pref->lastchange = time(NULL);
989
990
	/* make route decision */
991
	prefix_evaluate(pref, re);
992
}
993
994
/*
995
 * Unlink a prefix from the different parent objects.
996
 */
997
static void
998
prefix_unlink(struct prefix *pref)
999
{
1000
	struct rib_entry	*re = pref->rib;
1001
1002
	/* make route decision */
1003
	LIST_REMOVE(pref, rib_l);
1004
	prefix_evaluate(NULL, re);
1005
1006
	LIST_REMOVE(pref, path_l);
1007
	PREFIX_COUNT(pref->aspath, -1);
1008
1009
	pt_unref(pref->prefix);
1010
	if (pt_empty(pref->prefix))
1011
		pt_remove(pref->prefix);
1012
	if (rib_empty(re))
1013
		rib_remove(re);
1014
1015
	/* destroy all references to other objects */
1016
	pref->aspath = NULL;
1017
	pref->prefix = NULL;
1018
	pref->rib = NULL;
1019
1020
	/*
1021
	 * It's the caller's duty to remove empty aspath structures.
1022
	 * Also freeing the unlinked prefix is the caller's duty.
1023
	 */
1024
}
1025
1026
/* alloc and bzero new entry. May not fail. */
1027
static struct prefix *
1028
prefix_alloc(void)
1029
{
1030
	struct prefix *p;
1031
1032
	p = calloc(1, sizeof(*p));
1033
	if (p == NULL)
1034
		fatal("prefix_alloc");
1035
	rdemem.prefix_cnt++;
1036
	return p;
1037
}
1038
1039
/* free a unlinked entry */
1040
static void
1041
prefix_free(struct prefix *pref)
1042
{
1043
	rdemem.prefix_cnt--;
1044
	free(pref);
1045
}
1046
1047
/*
1048
 * nexthop functions
1049
 */
1050
struct nexthop_head	*nexthop_hash(struct bgpd_addr *);
1051
struct nexthop		*nexthop_lookup(struct bgpd_addr *);
1052
1053
/*
1054
 * In BGP there exist two nexthops: the exit nexthop which was announced via
1055
 * BGP and the true nexthop which is used in the FIB -- forward information
1056
 * base a.k.a kernel routing table. When sending updates it is even more
1057
 * confusing. In IBGP we pass the unmodified exit nexthop to the neighbors
1058
 * while in EBGP normally the address of the router is sent. The exit nexthop
1059
 * may be passed to the external neighbor if the neighbor and the exit nexthop
1060
 * reside in the same subnet -- directly connected.
1061
 */
1062
struct nexthop_table {
1063
	LIST_HEAD(nexthop_head, nexthop)	*nexthop_hashtbl;
1064
	u_int32_t				 nexthop_hashmask;
1065
} nexthoptable;
1066
1067
SIPHASH_KEY nexthoptablekey;
1068
1069
void
1070
nexthop_init(u_int32_t hashsize)
1071
{
1072
	u_int32_t	 hs, i;
1073
1074
	for (hs = 1; hs < hashsize; hs <<= 1)
1075
		;
1076
	nexthoptable.nexthop_hashtbl = calloc(hs, sizeof(struct nexthop_head));
1077
	if (nexthoptable.nexthop_hashtbl == NULL)
1078
		fatal("nextop_init");
1079
1080
	for (i = 0; i < hs; i++)
1081
		LIST_INIT(&nexthoptable.nexthop_hashtbl[i]);
1082
	arc4random_buf(&nexthoptablekey, sizeof(nexthoptablekey));
1083
1084
	nexthoptable.nexthop_hashmask = hs - 1;
1085
}
1086
1087
void
1088
nexthop_shutdown(void)
1089
{
1090
	u_int32_t		 i;
1091
	struct nexthop		*nh, *nnh;
1092
1093
	for (i = 0; i <= nexthoptable.nexthop_hashmask; i++) {
1094
		for (nh = LIST_FIRST(&nexthoptable.nexthop_hashtbl[i]);
1095
		    nh != NULL; nh = nnh) {
1096
			nnh = LIST_NEXT(nh, nexthop_l);
1097
			nh->state = NEXTHOP_UNREACH;
1098
			(void)nexthop_delete(nh);
1099
		}
1100
		if (!LIST_EMPTY(&nexthoptable.nexthop_hashtbl[i]))
1101
			log_warnx("nexthop_shutdown: non-free table");
1102
	}
1103
1104
	free(nexthoptable.nexthop_hashtbl);
1105
}
1106
1107
void
1108
nexthop_update(struct kroute_nexthop *msg)
1109
{
1110
	struct nexthop		*nh;
1111
	struct rde_aspath	*asp;
1112
	enum nexthop_state	 oldstate;
1113
1114
	nh = nexthop_lookup(&msg->nexthop);
1115
	if (nh == NULL) {
1116
		log_warnx("nexthop_update: non-existent nexthop %s",
1117
		    log_addr(&msg->nexthop));
1118
		return;
1119
	}
1120
1121
	oldstate = nh->state;
1122
	if (msg->valid)
1123
		nh->state = NEXTHOP_REACH;
1124
	else
1125
		nh->state = NEXTHOP_UNREACH;
1126
1127
	if (msg->connected) {
1128
		nh->flags |= NEXTHOP_CONNECTED;
1129
		memcpy(&nh->true_nexthop, &nh->exit_nexthop,
1130
		    sizeof(nh->true_nexthop));
1131
	} else
1132
		memcpy(&nh->true_nexthop, &msg->gateway,
1133
		    sizeof(nh->true_nexthop));
1134
1135
	memcpy(&nh->nexthop_net, &msg->net,
1136
	    sizeof(nh->nexthop_net));
1137
	nh->nexthop_netlen = msg->netlen;
1138
1139
	if (nexthop_delete(nh))
1140
		/* nexthop no longer used */
1141
		return;
1142
1143
	if (rde_noevaluate())
1144
		/*
1145
		 * if the decision process is turned off there is no need
1146
		 * for the aspath list walk.
1147
		 */
1148
		return;
1149
1150
	LIST_FOREACH(asp, &nh->path_h, nexthop_l) {
1151
		prefix_updateall(asp, nh->state, oldstate);
1152
	}
1153
}
1154
1155
void
1156
nexthop_modify(struct rde_aspath *asp, struct bgpd_addr *nexthop,
1157
    enum action_types type, u_int8_t aid)
1158
{
1159
	struct nexthop	*nh;
1160
1161
	if (type == ACTION_SET_NEXTHOP && aid != nexthop->aid)
1162
		return;
1163
1164
	asp->flags &= ~F_NEXTHOP_MASK;
1165
	switch (type) {
1166
	case ACTION_SET_NEXTHOP_REJECT:
1167
		asp->flags |= F_NEXTHOP_REJECT;
1168
		break;
1169
	case ACTION_SET_NEXTHOP_BLACKHOLE:
1170
		asp->flags |= F_NEXTHOP_BLACKHOLE;
1171
		break;
1172
	case ACTION_SET_NEXTHOP_NOMODIFY:
1173
		asp->flags |= F_NEXTHOP_NOMODIFY;
1174
		break;
1175
	case ACTION_SET_NEXTHOP_SELF:
1176
		asp->flags |= F_NEXTHOP_SELF;
1177
		break;
1178
	case ACTION_SET_NEXTHOP:
1179
		nh = nexthop_get(nexthop);
1180
		if (asp->flags & F_ATTR_LINKED)
1181
			nexthop_unlink(asp);
1182
		asp->nexthop = nh;
1183
		if (asp->flags & F_ATTR_LINKED)
1184
			nexthop_link(asp);
1185
		break;
1186
	default:
1187
		break;
1188
	}
1189
}
1190
1191
void
1192
nexthop_link(struct rde_aspath *asp)
1193
{
1194
	if (asp->nexthop == NULL)
1195
		return;
1196
1197
	LIST_INSERT_HEAD(&asp->nexthop->path_h, asp, nexthop_l);
1198
}
1199
1200
void
1201
nexthop_unlink(struct rde_aspath *asp)
1202
{
1203
	struct nexthop	*nh;
1204
1205
	if (asp->nexthop == NULL)
1206
		return;
1207
1208
	LIST_REMOVE(asp, nexthop_l);
1209
1210
	/* see if list is empty */
1211
	nh = asp->nexthop;
1212
	asp->nexthop = NULL;
1213
1214
	(void)nexthop_delete(nh);
1215
}
1216
1217
int
1218
nexthop_delete(struct nexthop *nh)
1219
{
1220
	/* nexthop still used by some other aspath */
1221
	if (!LIST_EMPTY(&nh->path_h))
1222
		return (0);
1223
1224
	/* either pinned or in a state where it may not be deleted */
1225
	if (nh->refcnt > 0 || nh->state == NEXTHOP_LOOKUP)
1226
		return (0);
1227
1228
	LIST_REMOVE(nh, nexthop_l);
1229
	rde_send_nexthop(&nh->exit_nexthop, 0);
1230
1231
	rdemem.nexthop_cnt--;
1232
	free(nh);
1233
	return (1);
1234
}
1235
1236
struct nexthop *
1237
nexthop_get(struct bgpd_addr *nexthop)
1238
{
1239
	struct nexthop	*nh;
1240
1241
	nh = nexthop_lookup(nexthop);
1242
	if (nh == NULL) {
1243
		nh = calloc(1, sizeof(*nh));
1244
		if (nh == NULL)
1245
			fatal("nexthop_alloc");
1246
		rdemem.nexthop_cnt++;
1247
1248
		LIST_INIT(&nh->path_h);
1249
		nh->state = NEXTHOP_LOOKUP;
1250
		nh->exit_nexthop = *nexthop;
1251
		LIST_INSERT_HEAD(nexthop_hash(nexthop), nh,
1252
		    nexthop_l);
1253
1254
		rde_send_nexthop(&nh->exit_nexthop, 1);
1255
	}
1256
1257
	return (nh);
1258
}
1259
1260
int
1261
nexthop_compare(struct nexthop *na, struct nexthop *nb)
1262
{
1263
	struct bgpd_addr	*a, *b;
1264
1265
	if (na == nb)
1266
		return (0);
1267
	if (na == NULL)
1268
		return (-1);
1269
	if (nb == NULL)
1270
		return (1);
1271
1272
	a = &na->exit_nexthop;
1273
	b = &nb->exit_nexthop;
1274
1275
	if (a->aid != b->aid)
1276
		return (a->aid - b->aid);
1277
1278
	switch (a->aid) {
1279
	case AID_INET:
1280
		if (ntohl(a->v4.s_addr) > ntohl(b->v4.s_addr))
1281
			return (1);
1282
		if (ntohl(a->v4.s_addr) < ntohl(b->v4.s_addr))
1283
			return (-1);
1284
		return (0);
1285
	case AID_INET6:
1286
		return (memcmp(&a->v6, &b->v6, sizeof(struct in6_addr)));
1287
	default:
1288
		fatalx("nexthop_cmp: unknown af");
1289
	}
1290
	return (-1);
1291
}
1292
1293
struct nexthop *
1294
nexthop_lookup(struct bgpd_addr *nexthop)
1295
{
1296
	struct nexthop	*nh;
1297
1298
	LIST_FOREACH(nh, nexthop_hash(nexthop), nexthop_l) {
1299
		if (memcmp(&nh->exit_nexthop, nexthop,
1300
		    sizeof(struct bgpd_addr)) == 0)
1301
			return (nh);
1302
	}
1303
	return (NULL);
1304
}
1305
1306
struct nexthop_head *
1307
nexthop_hash(struct bgpd_addr *nexthop)
1308
{
1309
	u_int32_t	 h = 0;
1310
1311
	switch (nexthop->aid) {
1312
	case AID_INET:
1313
		h = SipHash24(&nexthoptablekey, &nexthop->v4.s_addr,
1314
		    sizeof(nexthop->v4.s_addr));
1315
		break;
1316
	case AID_INET6:
1317
		h = SipHash24(&nexthoptablekey, &nexthop->v6,
1318
		    sizeof(struct in6_addr));
1319
		break;
1320
	default:
1321
		fatalx("nexthop_hash: unsupported AF");
1322
	}
1323
	return (&nexthoptable.nexthop_hashtbl[h & nexthoptable.nexthop_hashmask]);
1324
}
1325