GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.sbin/bgpd/rde_rib.c Lines: 0 514 0.0 %
Date: 2017-11-07 Branches: 0 526 0.0 %

Line Branch Exec Source
1
/*	$OpenBSD: rde_rib.c,v 1.154 2017/05/28 12:21:36 claudio Exp $ */
2
3
/*
4
 * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org>
5
 *
6
 * Permission to use, copy, modify, and distribute this software for any
7
 * purpose with or without fee is hereby granted, provided that the above
8
 * copyright notice and this permission notice appear in all copies.
9
 *
10
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
 */
18
19
#include <sys/types.h>
20
#include <sys/queue.h>
21
22
#include <stdlib.h>
23
#include <string.h>
24
#include <siphash.h>
25
#include <time.h>
26
27
#include "bgpd.h"
28
#include "rde.h"
29
#include "log.h"
30
31
/*
32
 * BGP RIB -- Routing Information Base
33
 *
34
 * The RIB is build with one aspect in mind. Speed -- actually update speed.
35
 * Therefore one thing needs to be absolutely avoided, long table walks.
36
 * This is achieved by heavily linking the different parts together.
37
 */
38
u_int16_t rib_size;
39
struct rib_desc *ribs;
40
41
struct rib_entry *rib_add(struct rib *, struct bgpd_addr *, int);
42
int rib_compare(const struct rib_entry *, const struct rib_entry *);
43
void rib_remove(struct rib_entry *);
44
int rib_empty(struct rib_entry *);
45
struct rib_entry *rib_restart(struct rib_context *);
46
47
RB_PROTOTYPE(rib_tree, rib_entry, rib_e, rib_compare);
48
RB_GENERATE(rib_tree, rib_entry, rib_e, rib_compare);
49
50
static inline void
51
re_lock(struct rib_entry *re)
52
{
53
	re->__rib = (struct rib *)((intptr_t)re->__rib | 1);
54
}
55
56
static inline void
57
re_unlock(struct rib_entry *re)
58
{
59
	re->__rib = (struct rib *)((intptr_t)re->__rib & ~1);
60
}
61
62
static inline int
63
re_is_locked(struct rib_entry *re)
64
{
65
	return ((intptr_t)re->__rib & 1);
66
}
67
68
static inline struct rib_tree *
69
rib_tree(struct rib *rib)
70
{
71
	return (&rib->tree);
72
}
73
74
/* RIB specific functions */
75
struct rib *
76
rib_new(char *name, u_int rtableid, u_int16_t flags)
77
{
78
	struct rib_desc	*xribs;
79
	u_int16_t	id;
80
81
	for (id = 0; id < rib_size; id++) {
82
		if (*ribs[id].name == '\0')
83
			break;
84
	}
85
86
	if (id >= rib_size) {
87
		if ((xribs = reallocarray(ribs, id + 1,
88
		    sizeof(struct rib_desc))) == NULL) {
89
			/* XXX this is not clever */
90
			fatal("rib_add");
91
		}
92
		ribs = xribs;
93
		rib_size = id + 1;
94
	}
95
96
	bzero(&ribs[id], sizeof(struct rib_desc));
97
	strlcpy(ribs[id].name, name, sizeof(ribs[id].name));
98
	RB_INIT(rib_tree(&ribs[id].rib));
99
	ribs[id].state = RECONF_REINIT;
100
	ribs[id].rib.id = id;
101
	ribs[id].rib.flags = flags;
102
	ribs[id].rib.rtableid = rtableid;
103
104
	ribs[id].in_rules = calloc(1, sizeof(struct filter_head));
105
	if (ribs[id].in_rules == NULL)
106
		fatal(NULL);
107
	TAILQ_INIT(ribs[id].in_rules);
108
109
	return (&ribs[id].rib);
110
}
111
112
struct rib *
113
rib_find(char *name)
114
{
115
	u_int16_t id;
116
117
	if (name == NULL || *name == '\0')
118
		return (&ribs[1].rib);	/* no name returns the Loc-RIB */
119
120
	for (id = 0; id < rib_size; id++) {
121
		if (!strcmp(ribs[id].name, name))
122
			return (&ribs[id].rib);
123
	}
124
125
	return (NULL);
126
}
127
128
struct rib_desc *
129
rib_desc(struct rib *rib)
130
{
131
	return (&ribs[rib->id]);
132
}
133
134
void
135
rib_free(struct rib *rib)
136
{
137
	struct rib_desc *rd;
138
	struct rib_entry *re, *xre;
139
	struct prefix *p, *np;
140
141
	for (re = RB_MIN(rib_tree, rib_tree(rib)); re != NULL; re = xre) {
142
		xre = RB_NEXT(rib_tree, rib_tree(rib), re);
143
144
		/*
145
		 * Removing the prefixes is tricky because the last one
146
		 * will remove the rib_entry as well and because we do
147
		 * an empty check in prefix_destroy() it is not possible to
148
		 * use the default for loop.
149
		 */
150
		while ((p = LIST_FIRST(&re->prefix_h))) {
151
			np = LIST_NEXT(p, rib_l);
152
			if (p->aspath->pftableid) {
153
				struct bgpd_addr addr;
154
155
				pt_getaddr(p->prefix, &addr);
156
				/* Commit is done in peer_down() */
157
				rde_send_pftable(p->aspath->pftableid, &addr,
158
				    p->prefix->prefixlen, 1);
159
			}
160
			prefix_destroy(p);
161
			if (np == NULL)
162
				break;
163
		}
164
	}
165
	rd = &ribs[rib->id];
166
	filterlist_free(rd->in_rules_tmp);
167
	filterlist_free(rd->in_rules);
168
	bzero(rib, sizeof(struct rib_desc));
169
}
170
171
int
172
rib_compare(const struct rib_entry *a, const struct rib_entry *b)
173
{
174
	return (pt_prefix_cmp(a->prefix, b->prefix));
175
}
176
177
struct rib_entry *
178
rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen)
179
{
180
	struct rib_entry xre;
181
	struct pt_entry	*pte;
182
183
	pte = pt_fill(prefix, prefixlen);
184
	bzero(&xre, sizeof(xre));
185
	xre.prefix = pte;
186
187
	return (RB_FIND(rib_tree, rib_tree(rib), &xre));
188
}
189
190
struct rib_entry *
191
rib_lookup(struct rib *rib, struct bgpd_addr *addr)
192
{
193
	struct rib_entry *re;
194
	int		 i;
195
196
	switch (addr->aid) {
197
	case AID_INET:
198
	case AID_VPN_IPv4:
199
		for (i = 32; i >= 0; i--) {
200
			re = rib_get(rib, addr, i);
201
			if (re != NULL)
202
				return (re);
203
		}
204
		break;
205
	case AID_INET6:
206
		for (i = 128; i >= 0; i--) {
207
			re = rib_get(rib, addr, i);
208
			if (re != NULL)
209
				return (re);
210
		}
211
		break;
212
	default:
213
		fatalx("rib_lookup: unknown af");
214
	}
215
	return (NULL);
216
}
217
218
219
struct rib_entry *
220
rib_add(struct rib *rib, struct bgpd_addr *prefix, int prefixlen)
221
{
222
	struct pt_entry	*pte;
223
	struct rib_entry *re;
224
225
	pte = pt_get(prefix, prefixlen);
226
	if (pte == NULL)
227
		pte = pt_add(prefix, prefixlen);
228
229
	if ((re = calloc(1, sizeof(*re))) == NULL)
230
		fatal("rib_add");
231
232
	LIST_INIT(&re->prefix_h);
233
	re->prefix = pte;
234
	re->__rib = rib;
235
236
        if (RB_INSERT(rib_tree, rib_tree(rib), re) != NULL) {
237
		log_warnx("rib_add: insert failed");
238
		free(re);
239
		return (NULL);
240
	}
241
242
	pt_ref(pte);
243
244
	rdemem.rib_cnt++;
245
246
	return (re);
247
}
248
249
void
250
rib_remove(struct rib_entry *re)
251
{
252
	if (!rib_empty(re))
253
		fatalx("rib_remove: entry not empty");
254
255
	if (re_is_locked(re))
256
		/* entry is locked, don't free it. */
257
		return;
258
259
	pt_unref(re->prefix);
260
	if (pt_empty(re->prefix))
261
		pt_remove(re->prefix);
262
263
	if (RB_REMOVE(rib_tree, rib_tree(re_rib(re)), re) == NULL)
264
		log_warnx("rib_remove: remove failed.");
265
266
	free(re);
267
	rdemem.rib_cnt--;
268
}
269
270
int
271
rib_empty(struct rib_entry *re)
272
{
273
	return LIST_EMPTY(&re->prefix_h);
274
}
275
276
void
277
rib_dump(struct rib *rib, void (*upcall)(struct rib_entry *, void *),
278
    void *arg, u_int8_t aid)
279
{
280
	struct rib_context	*ctx;
281
282
	if ((ctx = calloc(1, sizeof(*ctx))) == NULL)
283
		fatal("rib_dump");
284
	ctx->ctx_rib = rib;
285
	ctx->ctx_upcall = upcall;
286
	ctx->ctx_arg = arg;
287
	ctx->ctx_aid = aid;
288
	rib_dump_r(ctx);
289
}
290
291
void
292
rib_dump_r(struct rib_context *ctx)
293
{
294
	struct rib_entry	*re;
295
	unsigned int		 i;
296
297
	if (ctx->ctx_re == NULL)
298
		re = RB_MIN(rib_tree, rib_tree(ctx->ctx_rib));
299
	else
300
		re = rib_restart(ctx);
301
302
	for (i = 0; re != NULL; re = RB_NEXT(rib_tree, unused, re)) {
303
		if (ctx->ctx_aid != AID_UNSPEC &&
304
		    ctx->ctx_aid != re->prefix->aid)
305
			continue;
306
		if (ctx->ctx_count && i++ >= ctx->ctx_count &&
307
		    !re_is_locked(re)) {
308
			/* store and lock last element */
309
			ctx->ctx_re = re;
310
			re_lock(re);
311
			return;
312
		}
313
		ctx->ctx_upcall(re, ctx->ctx_arg);
314
	}
315
316
	if (ctx->ctx_done)
317
		ctx->ctx_done(ctx->ctx_arg);
318
	else
319
		free(ctx);
320
}
321
322
struct rib_entry *
323
rib_restart(struct rib_context *ctx)
324
{
325
	struct rib_entry *re;
326
327
	re = ctx->ctx_re;
328
	re_unlock(re);
329
330
	/* find first non empty element */
331
	while (re && rib_empty(re))
332
		re = RB_NEXT(rib_tree, unused, re);
333
334
	/* free the previously locked rib element if empty */
335
	if (rib_empty(ctx->ctx_re))
336
		rib_remove(ctx->ctx_re);
337
	ctx->ctx_re = NULL;
338
	return (re);
339
}
340
341
/* used to bump correct prefix counters */
342
#define PREFIX_COUNT(x, op)			\
343
	do {					\
344
		(x)->prefix_cnt += (op);	\
345
	} while (0)
346
347
/* path specific functions */
348
349
static void	path_link(struct rde_aspath *, struct rde_peer *);
350
351
struct path_table pathtable;
352
353
SIPHASH_KEY pathtablekey;
354
355
/* XXX the hash should also include communities and the other attrs */
356
#define PATH_HASH(x)				\
357
	&pathtable.path_hashtbl[SipHash24(&pathtablekey, (x)->data, (x)->len) & \
358
	    pathtable.path_hashmask]
359
360
void
361
path_init(u_int32_t hashsize)
362
{
363
	u_int32_t	hs, i;
364
365
	for (hs = 1; hs < hashsize; hs <<= 1)
366
		;
367
	pathtable.path_hashtbl = calloc(hs, sizeof(struct aspath_head));
368
	if (pathtable.path_hashtbl == NULL)
369
		fatal("path_init");
370
371
	for (i = 0; i < hs; i++)
372
		LIST_INIT(&pathtable.path_hashtbl[i]);
373
374
	pathtable.path_hashmask = hs - 1;
375
	arc4random_buf(&pathtablekey, sizeof(pathtablekey));
376
}
377
378
void
379
path_shutdown(void)
380
{
381
	u_int32_t	i;
382
383
	for (i = 0; i <= pathtable.path_hashmask; i++)
384
		if (!LIST_EMPTY(&pathtable.path_hashtbl[i]))
385
			log_warnx("path_free: free non-free table");
386
387
	free(pathtable.path_hashtbl);
388
}
389
390
int
391
path_update(struct rib *rib, struct rde_peer *peer, struct rde_aspath *nasp,
392
    struct bgpd_addr *prefix, int prefixlen)
393
{
394
	struct rde_aspath	*asp;
395
	struct prefix		*p;
396
397
	if (nasp->pftableid) {
398
		rde_send_pftable(nasp->pftableid, prefix, prefixlen, 0);
399
		rde_send_pftable_commit();
400
	}
401
402
	/*
403
	 * First try to find a prefix in the specified RIB.
404
	 */
405
	if ((p = prefix_get(rib, peer, prefix, prefixlen, 0)) != NULL) {
406
		if (path_compare(nasp, p->aspath) == 0) {
407
			/* no change, update last change */
408
			p->lastchange = time(NULL);
409
			return (0);
410
		}
411
	}
412
413
	/*
414
	 * Either the prefix does not exist or the path changed.
415
	 * In both cases lookup the new aspath to make sure it is not
416
	 * already in the RIB.
417
	 */
418
	if ((asp = path_lookup(nasp, peer)) == NULL) {
419
		/* Path not available, create and link a new one. */
420
		asp = path_copy(nasp);
421
		path_link(asp, peer);
422
	}
423
424
	/* If the prefix was found move it else add it to the aspath. */
425
	if (p != NULL)
426
		prefix_move(asp, p);
427
	else
428
		return (prefix_add(rib, asp, prefix, prefixlen));
429
	return (0);
430
}
431
432
int
433
path_compare(struct rde_aspath *a, struct rde_aspath *b)
434
{
435
	int		 r;
436
437
	if (a->origin > b->origin)
438
		return (1);
439
	if (a->origin < b->origin)
440
		return (-1);
441
	if ((a->flags & ~F_ATTR_LINKED) > (b->flags & ~F_ATTR_LINKED))
442
		return (1);
443
	if ((a->flags & ~F_ATTR_LINKED) < (b->flags & ~F_ATTR_LINKED))
444
		return (-1);
445
	if (a->med > b->med)
446
		return (1);
447
	if (a->med < b->med)
448
		return (-1);
449
	if (a->lpref > b->lpref)
450
		return (1);
451
	if (a->lpref < b->lpref)
452
		return (-1);
453
	if (a->weight > b->weight)
454
		return (1);
455
	if (a->weight < b->weight)
456
		return (-1);
457
	if (a->rtlabelid > b->rtlabelid)
458
		return (1);
459
	if (a->rtlabelid < b->rtlabelid)
460
		return (-1);
461
	if (a->pftableid > b->pftableid)
462
		return (1);
463
	if (a->pftableid < b->pftableid)
464
		return (-1);
465
466
	r = aspath_compare(a->aspath, b->aspath);
467
	if (r == 0)
468
		r = nexthop_compare(a->nexthop, b->nexthop);
469
	if (r > 0)
470
		return (1);
471
	if (r < 0)
472
		return (-1);
473
474
	return (attr_compare(a, b));
475
}
476
477
struct rde_aspath *
478
path_lookup(struct rde_aspath *aspath, struct rde_peer *peer)
479
{
480
	struct aspath_head	*head;
481
	struct rde_aspath	*asp;
482
483
	head = PATH_HASH(aspath->aspath);
484
485
	LIST_FOREACH(asp, head, path_l) {
486
		if (peer == asp->peer && path_compare(aspath, asp) == 0)
487
			return (asp);
488
	}
489
	return (NULL);
490
}
491
492
void
493
path_remove(struct rde_aspath *asp)
494
{
495
	struct prefix	*p, *np;
496
497
	for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = np) {
498
		np = LIST_NEXT(p, path_l);
499
		if (asp->pftableid) {
500
			struct bgpd_addr addr;
501
502
			pt_getaddr(p->prefix, &addr);
503
			/* Commit is done in peer_down() */
504
			rde_send_pftable(p->aspath->pftableid, &addr,
505
			    p->prefix->prefixlen, 1);
506
		}
507
		prefix_destroy(p);
508
	}
509
}
510
511
/* remove all stale routes or if staletime is 0 remove all routes for
512
   a specified AID. */
513
u_int32_t
514
path_remove_stale(struct rde_aspath *asp, u_int8_t aid)
515
{
516
	struct prefix	*p, *np;
517
	time_t		 staletime;
518
	u_int32_t	 rprefixes;
519
520
	rprefixes=0;
521
	staletime = asp->peer->staletime[aid];
522
	for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = np) {
523
		np = LIST_NEXT(p, path_l);
524
		if (p->prefix->aid != aid)
525
			continue;
526
527
		if (staletime && p->lastchange > staletime)
528
			continue;
529
530
		if (asp->pftableid) {
531
			struct bgpd_addr addr;
532
533
			pt_getaddr(p->prefix, &addr);
534
			/* Commit is done in peer_flush() */
535
			rde_send_pftable(p->aspath->pftableid, &addr,
536
			    p->prefix->prefixlen, 1);
537
		}
538
539
		/* only count Adj-RIB-In */
540
		if (re_rib(p->re) == &ribs[0].rib)
541
			rprefixes++;
542
543
		prefix_destroy(p);
544
	}
545
	return (rprefixes);
546
}
547
548
549
/* this function is only called by prefix_remove and path_remove */
550
void
551
path_destroy(struct rde_aspath *asp)
552
{
553
	/* path_destroy can only unlink and free empty rde_aspath */
554
	if (asp->prefix_cnt != 0 || asp->active_cnt != 0)
555
		log_warnx("path_destroy: prefix count out of sync");
556
557
	nexthop_unlink(asp);
558
	LIST_REMOVE(asp, path_l);
559
	LIST_REMOVE(asp, peer_l);
560
	asp->peer = NULL;
561
	asp->nexthop = NULL;
562
	asp->flags &= ~F_ATTR_LINKED;
563
564
	path_put(asp);
565
}
566
567
int
568
path_empty(struct rde_aspath *asp)
569
{
570
	return LIST_EMPTY(&asp->prefix_h);
571
}
572
573
/*
574
 * the path object is linked into multiple lists for fast access.
575
 * These are peer_l, path_l and nexthop_l.
576
 * peer_l: list of all aspaths that belong to that peer
577
 * path_l: hash list to find paths quickly
578
 * nexthop_l: list of all aspaths with an equal exit nexthop
579
 */
580
static void
581
path_link(struct rde_aspath *asp, struct rde_peer *peer)
582
{
583
	struct aspath_head	*head;
584
585
	head = PATH_HASH(asp->aspath);
586
587
	LIST_INSERT_HEAD(head, asp, path_l);
588
	LIST_INSERT_HEAD(&peer->path_h, asp, peer_l);
589
	asp->peer = peer;
590
	nexthop_link(asp);
591
	asp->flags |= F_ATTR_LINKED;
592
}
593
594
/*
595
 * copy asp to a new UNLINKED one mainly for filtering
596
 */
597
struct rde_aspath *
598
path_copy(struct rde_aspath *asp)
599
{
600
	struct rde_aspath *nasp;
601
602
	nasp = path_get();
603
	nasp->aspath = asp->aspath;
604
	if (nasp->aspath != NULL) {
605
		nasp->aspath->refcnt++;
606
		rdemem.aspath_refs++;
607
	}
608
	nasp->nexthop = asp->nexthop;
609
	nasp->med = asp->med;
610
	nasp->lpref = asp->lpref;
611
	nasp->weight = asp->weight;
612
	nasp->origin = asp->origin;
613
	nasp->rtlabelid = asp->rtlabelid;
614
	rtlabel_ref(nasp->rtlabelid);
615
	nasp->pftableid = asp->pftableid;
616
	pftable_ref(nasp->pftableid);
617
618
	nasp->flags = asp->flags & ~F_ATTR_LINKED;
619
	attr_copy(nasp, asp);
620
621
	return (nasp);
622
}
623
624
/* alloc and initialize new entry. May not fail. */
625
struct rde_aspath *
626
path_get(void)
627
{
628
	struct rde_aspath *asp;
629
630
	asp = calloc(1, sizeof(*asp));
631
	if (asp == NULL)
632
		fatal("path_alloc");
633
	rdemem.path_cnt++;
634
635
	LIST_INIT(&asp->prefix_h);
636
	asp->origin = ORIGIN_INCOMPLETE;
637
	asp->lpref = DEFAULT_LPREF;
638
	/* med = 0 */
639
	/* weight = 0 */
640
	/* rtlabel = 0 */
641
642
	return (asp);
643
}
644
645
/* free an unlinked element */
646
void
647
path_put(struct rde_aspath *asp)
648
{
649
	if (asp == NULL)
650
		return;
651
652
	if (asp->flags & F_ATTR_LINKED)
653
		fatalx("path_put: linked object");
654
655
	rtlabel_unref(asp->rtlabelid);
656
	pftable_unref(asp->pftableid);
657
	aspath_put(asp->aspath);
658
	attr_freeall(asp);
659
	rdemem.path_cnt--;
660
	free(asp);
661
}
662
663
/* prefix specific functions */
664
665
static struct prefix	*prefix_alloc(void);
666
static void		 prefix_free(struct prefix *);
667
static void		 prefix_link(struct prefix *, struct rib_entry *,
668
			     struct rde_aspath *);
669
static void		 prefix_unlink(struct prefix *);
670
671
/*
672
 * search for specified prefix of a peer. Returns NULL if not found.
673
 */
674
struct prefix *
675
prefix_get(struct rib *rib, struct rde_peer *peer, struct bgpd_addr *prefix,
676
    int prefixlen, u_int32_t flags)
677
{
678
	struct rib_entry	*re;
679
680
	re = rib_get(rib, prefix, prefixlen);
681
	if (re == NULL)
682
		return (NULL);
683
	return (prefix_bypeer(re, peer, flags));
684
}
685
686
/*
687
 * Adds or updates a prefix.
688
 */
689
int
690
prefix_add(struct rib *rib, struct rde_aspath *asp, struct bgpd_addr *prefix,
691
    int prefixlen)
692
693
{
694
	struct prefix		*p;
695
	struct rib_entry	*re;
696
697
	re = rib_get(rib, prefix, prefixlen);
698
	if (re == NULL)
699
		re = rib_add(rib, prefix, prefixlen);
700
701
	p = prefix_bypeer(re, asp->peer, asp->flags);
702
	if (p == NULL) {
703
		p = prefix_alloc();
704
		prefix_link(p, re, asp);
705
		return (1);
706
	} else {
707
		if (p->aspath != asp) {
708
			/* prefix belongs to a different aspath so move */
709
			prefix_move(asp, p);
710
		} else
711
			p->lastchange = time(NULL);
712
		return (0);
713
	}
714
}
715
716
/*
717
 * Move the prefix to the specified as path, removes the old asp if needed.
718
 */
719
void
720
prefix_move(struct rde_aspath *asp, struct prefix *p)
721
{
722
	struct prefix		*np;
723
	struct rde_aspath	*oasp;
724
725
	if (asp->peer != p->aspath->peer)
726
		fatalx("prefix_move: cross peer move");
727
728
	/* create new prefix node */
729
	np = prefix_alloc();
730
	np->aspath = asp;
731
	/* peer and prefix pointers are still equal */
732
	np->prefix = p->prefix;
733
	np->re = p->re;
734
	np->lastchange = time(NULL);
735
736
	/* add to new as path */
737
	LIST_INSERT_HEAD(&asp->prefix_h, np, path_l);
738
	PREFIX_COUNT(asp, 1);
739
	/*
740
	 * no need to update the peer prefix count because we are only moving
741
	 * the prefix without changing the peer.
742
	 */
743
744
	/*
745
	 * First kick the old prefix node out of the prefix list,
746
	 * afterwards run the route decision for new prefix node.
747
	 * Because of this only one update is generated if the prefix
748
	 * was active.
749
	 * This is save because we create a new prefix and so the change
750
	 * is noticed by prefix_evaluate().
751
	 */
752
	LIST_REMOVE(p, rib_l);
753
	prefix_evaluate(np, np->re);
754
755
	/* remove old prefix node */
756
	oasp = p->aspath;
757
	LIST_REMOVE(p, path_l);
758
	PREFIX_COUNT(oasp, -1);
759
	/* as before peer count needs no update because of move */
760
761
	/* destroy all references to other objects and free the old prefix */
762
	p->aspath = NULL;
763
	p->prefix = NULL;
764
	p->re = NULL;
765
	prefix_free(p);
766
767
	/* destroy old path if empty */
768
	if (path_empty(oasp))
769
		path_destroy(oasp);
770
}
771
772
/*
773
 * Removes a prefix from all lists. If the parent objects -- path or
774
 * pt_entry -- become empty remove them too.
775
 */
776
int
777
prefix_remove(struct rib *rib, struct rde_peer *peer, struct bgpd_addr *prefix,
778
    int prefixlen, u_int32_t flags)
779
{
780
	struct prefix		*p;
781
	struct rib_entry	*re;
782
	struct rde_aspath	*asp;
783
784
	re = rib_get(rib, prefix, prefixlen);
785
	if (re == NULL)	/* Got a dummy withdrawn request */
786
		return (0);
787
788
	p = prefix_bypeer(re, peer, flags);
789
	if (p == NULL)		/* Got a dummy withdrawn request. */
790
		return (0);
791
792
	asp = p->aspath;
793
794
	if (asp->pftableid) {
795
		/* only prefixes in the local RIB were pushed into pf */
796
		rde_send_pftable(asp->pftableid, prefix, prefixlen, 1);
797
		rde_send_pftable_commit();
798
	}
799
800
	prefix_destroy(p);
801
802
	return (1);
803
}
804
805
/* dump a prefix into specified buffer */
806
int
807
prefix_write(u_char *buf, int len, struct bgpd_addr *prefix, u_int8_t plen)
808
{
809
	int	totlen;
810
811
	switch (prefix->aid) {
812
	case AID_INET:
813
	case AID_INET6:
814
		totlen = PREFIX_SIZE(plen);
815
816
		if (totlen > len)
817
			return (-1);
818
		*buf++ = plen;
819
		memcpy(buf, &prefix->ba, totlen - 1);
820
		return (totlen);
821
	case AID_VPN_IPv4:
822
		totlen = PREFIX_SIZE(plen) + sizeof(prefix->vpn4.rd) +
823
		    prefix->vpn4.labellen;
824
		plen += (sizeof(prefix->vpn4.rd) + prefix->vpn4.labellen) * 8;
825
826
		if (totlen > len)
827
			return (-1);
828
		*buf++ = plen;
829
		memcpy(buf, &prefix->vpn4.labelstack, prefix->vpn4.labellen);
830
		buf += prefix->vpn4.labellen;
831
		memcpy(buf, &prefix->vpn4.rd, sizeof(prefix->vpn4.rd));
832
		buf += sizeof(prefix->vpn4.rd);
833
		memcpy(buf, &prefix->vpn4.addr, PREFIX_SIZE(plen) - 1);
834
		return (totlen);
835
	default:
836
		return (-1);
837
	}
838
}
839
840
int
841
prefix_writebuf(struct ibuf *buf, struct bgpd_addr *prefix, u_int8_t plen)
842
{
843
	int	 totlen;
844
	void	*bptr;
845
846
	switch (prefix->aid) {
847
	case AID_INET:
848
	case AID_INET6:
849
		totlen = PREFIX_SIZE(plen);
850
		break;
851
	case AID_VPN_IPv4:
852
		totlen = PREFIX_SIZE(plen) + sizeof(prefix->vpn4.rd) +
853
		    prefix->vpn4.labellen;
854
		break;
855
	default:
856
		return (-1);
857
	}
858
859
	if ((bptr = ibuf_reserve(buf, totlen)) == NULL)
860
		return (-1);
861
	if (prefix_write(bptr, totlen, prefix, plen) == -1)
862
		return (-1);
863
	return (0);
864
}
865
866
/*
867
 * Searches in the prefix list of specified pt_entry for a prefix entry
868
 * belonging to the peer peer. Returns NULL if no match found.
869
 */
870
struct prefix *
871
prefix_bypeer(struct rib_entry *re, struct rde_peer *peer, u_int32_t flags)
872
{
873
	struct prefix	*p;
874
875
	LIST_FOREACH(p, &re->prefix_h, rib_l) {
876
		if (p->aspath->peer != peer)
877
			continue;
878
		if (p->aspath->flags & flags &&
879
		    (flags & F_ANN_DYNAMIC) !=
880
		    (p->aspath->flags & F_ANN_DYNAMIC))
881
			continue;
882
		return (p);
883
	}
884
	return (NULL);
885
}
886
887
void
888
prefix_updateall(struct rde_aspath *asp, enum nexthop_state state,
889
    enum nexthop_state oldstate)
890
{
891
	struct prefix	*p;
892
893
	LIST_FOREACH(p, &asp->prefix_h, path_l) {
894
		/*
895
		 * skip non local-RIBs or RIBs that are flagged as noeval.
896
		 */
897
		if (re_rib(p->re)->flags & F_RIB_NOEVALUATE)
898
			continue;
899
900
		if (oldstate == state && state == NEXTHOP_REACH) {
901
			/*
902
			 * The state of the nexthop did not change. The only
903
			 * thing that may have changed is the true_nexthop
904
			 * or other internal infos. This will not change
905
			 * the routing decision so shortcut here.
906
			 */
907
			if ((re_rib(p->re)->flags & F_RIB_NOFIB) == 0 &&
908
			    p == p->re->active)
909
				rde_send_kroute(re_rib(p->re), p, NULL);
910
			continue;
911
		}
912
913
		/* redo the route decision */
914
		LIST_REMOVE(p, rib_l);
915
		/*
916
		 * If the prefix is the active one remove it first,
917
		 * this has to be done because we can not detect when
918
		 * the active prefix changes its state. In this case
919
		 * we know that this is a withdrawal and so the second
920
		 * prefix_evaluate() will generate no update because
921
		 * the nexthop is unreachable or ineligible.
922
		 */
923
		if (p == p->re->active)
924
			prefix_evaluate(NULL, p->re);
925
		prefix_evaluate(p, p->re);
926
	}
927
}
928
929
/* kill a prefix. */
930
void
931
prefix_destroy(struct prefix *p)
932
{
933
	struct rde_aspath	*asp;
934
935
	asp = p->aspath;
936
	prefix_unlink(p);
937
	prefix_free(p);
938
939
	if (path_empty(asp))
940
		path_destroy(asp);
941
}
942
943
/*
944
 * helper function to clean up the connected networks after a reload
945
 */
946
void
947
prefix_network_clean(struct rde_peer *peer, time_t reloadtime, u_int32_t flags)
948
{
949
	struct rde_aspath	*asp, *xasp;
950
	struct prefix		*p, *xp;
951
952
	for (asp = LIST_FIRST(&peer->path_h); asp != NULL; asp = xasp) {
953
		xasp = LIST_NEXT(asp, peer_l);
954
		if ((asp->flags & F_ANN_DYNAMIC) != flags)
955
			continue;
956
		for (p = LIST_FIRST(&asp->prefix_h); p != NULL; p = xp) {
957
			xp = LIST_NEXT(p, path_l);
958
			if (reloadtime > p->lastchange) {
959
				prefix_unlink(p);
960
				prefix_free(p);
961
			}
962
		}
963
		if (path_empty(asp))
964
			path_destroy(asp);
965
	}
966
}
967
968
/*
969
 * Link a prefix into the different parent objects.
970
 */
971
static void
972
prefix_link(struct prefix *pref, struct rib_entry *re, struct rde_aspath *asp)
973
{
974
	LIST_INSERT_HEAD(&asp->prefix_h, pref, path_l);
975
	PREFIX_COUNT(asp, 1);
976
977
	pref->aspath = asp;
978
	pref->re = re;
979
	pref->prefix = re->prefix;
980
	pt_ref(pref->prefix);
981
	pref->lastchange = time(NULL);
982
983
	/* make route decision */
984
	prefix_evaluate(pref, re);
985
}
986
987
/*
988
 * Unlink a prefix from the different parent objects.
989
 */
990
static void
991
prefix_unlink(struct prefix *pref)
992
{
993
	struct rib_entry	*re = pref->re;
994
995
	/* make route decision */
996
	LIST_REMOVE(pref, rib_l);
997
	prefix_evaluate(NULL, re);
998
999
	LIST_REMOVE(pref, path_l);
1000
	PREFIX_COUNT(pref->aspath, -1);
1001
1002
	pt_unref(pref->prefix);
1003
	if (pt_empty(pref->prefix))
1004
		pt_remove(pref->prefix);
1005
	if (rib_empty(re))
1006
		rib_remove(re);
1007
1008
	/* destroy all references to other objects */
1009
	pref->aspath = NULL;
1010
	pref->prefix = NULL;
1011
	pref->re = NULL;
1012
1013
	/*
1014
	 * It's the caller's duty to remove empty aspath structures.
1015
	 * Also freeing the unlinked prefix is the caller's duty.
1016
	 */
1017
}
1018
1019
/* alloc and bzero new entry. May not fail. */
1020
static struct prefix *
1021
prefix_alloc(void)
1022
{
1023
	struct prefix *p;
1024
1025
	p = calloc(1, sizeof(*p));
1026
	if (p == NULL)
1027
		fatal("prefix_alloc");
1028
	rdemem.prefix_cnt++;
1029
	return p;
1030
}
1031
1032
/* free a unlinked entry */
1033
static void
1034
prefix_free(struct prefix *pref)
1035
{
1036
	rdemem.prefix_cnt--;
1037
	free(pref);
1038
}
1039
1040
/*
1041
 * nexthop functions
1042
 */
1043
struct nexthop_head	*nexthop_hash(struct bgpd_addr *);
1044
struct nexthop		*nexthop_lookup(struct bgpd_addr *);
1045
1046
/*
1047
 * In BGP there exist two nexthops: the exit nexthop which was announced via
1048
 * BGP and the true nexthop which is used in the FIB -- forward information
1049
 * base a.k.a kernel routing table. When sending updates it is even more
1050
 * confusing. In IBGP we pass the unmodified exit nexthop to the neighbors
1051
 * while in EBGP normally the address of the router is sent. The exit nexthop
1052
 * may be passed to the external neighbor if the neighbor and the exit nexthop
1053
 * reside in the same subnet -- directly connected.
1054
 */
1055
struct nexthop_table {
1056
	LIST_HEAD(nexthop_head, nexthop)	*nexthop_hashtbl;
1057
	u_int32_t				 nexthop_hashmask;
1058
} nexthoptable;
1059
1060
SIPHASH_KEY nexthoptablekey;
1061
1062
void
1063
nexthop_init(u_int32_t hashsize)
1064
{
1065
	u_int32_t	 hs, i;
1066
1067
	for (hs = 1; hs < hashsize; hs <<= 1)
1068
		;
1069
	nexthoptable.nexthop_hashtbl = calloc(hs, sizeof(struct nexthop_head));
1070
	if (nexthoptable.nexthop_hashtbl == NULL)
1071
		fatal("nextop_init");
1072
1073
	for (i = 0; i < hs; i++)
1074
		LIST_INIT(&nexthoptable.nexthop_hashtbl[i]);
1075
	arc4random_buf(&nexthoptablekey, sizeof(nexthoptablekey));
1076
1077
	nexthoptable.nexthop_hashmask = hs - 1;
1078
}
1079
1080
void
1081
nexthop_shutdown(void)
1082
{
1083
	u_int32_t		 i;
1084
	struct nexthop		*nh, *nnh;
1085
1086
	for (i = 0; i <= nexthoptable.nexthop_hashmask; i++) {
1087
		for (nh = LIST_FIRST(&nexthoptable.nexthop_hashtbl[i]);
1088
		    nh != NULL; nh = nnh) {
1089
			nnh = LIST_NEXT(nh, nexthop_l);
1090
			nh->state = NEXTHOP_UNREACH;
1091
			(void)nexthop_delete(nh);
1092
		}
1093
		if (!LIST_EMPTY(&nexthoptable.nexthop_hashtbl[i]))
1094
			log_warnx("nexthop_shutdown: non-free table");
1095
	}
1096
1097
	free(nexthoptable.nexthop_hashtbl);
1098
}
1099
1100
void
1101
nexthop_update(struct kroute_nexthop *msg)
1102
{
1103
	struct nexthop		*nh;
1104
	struct rde_aspath	*asp;
1105
	enum nexthop_state	 oldstate;
1106
1107
	nh = nexthop_lookup(&msg->nexthop);
1108
	if (nh == NULL) {
1109
		log_warnx("nexthop_update: non-existent nexthop %s",
1110
		    log_addr(&msg->nexthop));
1111
		return;
1112
	}
1113
1114
	oldstate = nh->state;
1115
	if (msg->valid)
1116
		nh->state = NEXTHOP_REACH;
1117
	else
1118
		nh->state = NEXTHOP_UNREACH;
1119
1120
	if (msg->connected) {
1121
		nh->flags |= NEXTHOP_CONNECTED;
1122
		memcpy(&nh->true_nexthop, &nh->exit_nexthop,
1123
		    sizeof(nh->true_nexthop));
1124
	} else
1125
		memcpy(&nh->true_nexthop, &msg->gateway,
1126
		    sizeof(nh->true_nexthop));
1127
1128
	memcpy(&nh->nexthop_net, &msg->net,
1129
	    sizeof(nh->nexthop_net));
1130
	nh->nexthop_netlen = msg->netlen;
1131
1132
	if (nexthop_delete(nh))
1133
		/* nexthop no longer used */
1134
		return;
1135
1136
	if (rde_noevaluate())
1137
		/*
1138
		 * if the decision process is turned off there is no need
1139
		 * for the aspath list walk.
1140
		 */
1141
		return;
1142
1143
	LIST_FOREACH(asp, &nh->path_h, nexthop_l) {
1144
		prefix_updateall(asp, nh->state, oldstate);
1145
	}
1146
}
1147
1148
void
1149
nexthop_modify(struct rde_aspath *asp, struct bgpd_addr *nexthop,
1150
    enum action_types type, u_int8_t aid)
1151
{
1152
	struct nexthop	*nh;
1153
1154
	if (type == ACTION_SET_NEXTHOP && aid != nexthop->aid)
1155
		return;
1156
1157
	asp->flags &= ~F_NEXTHOP_MASK;
1158
	switch (type) {
1159
	case ACTION_SET_NEXTHOP_REJECT:
1160
		asp->flags |= F_NEXTHOP_REJECT;
1161
		break;
1162
	case ACTION_SET_NEXTHOP_BLACKHOLE:
1163
		asp->flags |= F_NEXTHOP_BLACKHOLE;
1164
		break;
1165
	case ACTION_SET_NEXTHOP_NOMODIFY:
1166
		asp->flags |= F_NEXTHOP_NOMODIFY;
1167
		break;
1168
	case ACTION_SET_NEXTHOP_SELF:
1169
		asp->flags |= F_NEXTHOP_SELF;
1170
		break;
1171
	case ACTION_SET_NEXTHOP:
1172
		nh = nexthop_get(nexthop);
1173
		if (asp->flags & F_ATTR_LINKED)
1174
			nexthop_unlink(asp);
1175
		asp->nexthop = nh;
1176
		if (asp->flags & F_ATTR_LINKED)
1177
			nexthop_link(asp);
1178
		break;
1179
	default:
1180
		break;
1181
	}
1182
}
1183
1184
void
1185
nexthop_link(struct rde_aspath *asp)
1186
{
1187
	if (asp->nexthop == NULL)
1188
		return;
1189
1190
	LIST_INSERT_HEAD(&asp->nexthop->path_h, asp, nexthop_l);
1191
}
1192
1193
void
1194
nexthop_unlink(struct rde_aspath *asp)
1195
{
1196
	struct nexthop	*nh;
1197
1198
	if (asp->nexthop == NULL)
1199
		return;
1200
1201
	LIST_REMOVE(asp, nexthop_l);
1202
1203
	/* see if list is empty */
1204
	nh = asp->nexthop;
1205
	asp->nexthop = NULL;
1206
1207
	(void)nexthop_delete(nh);
1208
}
1209
1210
int
1211
nexthop_delete(struct nexthop *nh)
1212
{
1213
	/* nexthop still used by some other aspath */
1214
	if (!LIST_EMPTY(&nh->path_h))
1215
		return (0);
1216
1217
	/* either pinned or in a state where it may not be deleted */
1218
	if (nh->refcnt > 0 || nh->state == NEXTHOP_LOOKUP)
1219
		return (0);
1220
1221
	LIST_REMOVE(nh, nexthop_l);
1222
	rde_send_nexthop(&nh->exit_nexthop, 0);
1223
1224
	rdemem.nexthop_cnt--;
1225
	free(nh);
1226
	return (1);
1227
}
1228
1229
struct nexthop *
1230
nexthop_get(struct bgpd_addr *nexthop)
1231
{
1232
	struct nexthop	*nh;
1233
1234
	nh = nexthop_lookup(nexthop);
1235
	if (nh == NULL) {
1236
		nh = calloc(1, sizeof(*nh));
1237
		if (nh == NULL)
1238
			fatal("nexthop_alloc");
1239
		rdemem.nexthop_cnt++;
1240
1241
		LIST_INIT(&nh->path_h);
1242
		nh->state = NEXTHOP_LOOKUP;
1243
		nh->exit_nexthop = *nexthop;
1244
		LIST_INSERT_HEAD(nexthop_hash(nexthop), nh,
1245
		    nexthop_l);
1246
1247
		rde_send_nexthop(&nh->exit_nexthop, 1);
1248
	}
1249
1250
	return (nh);
1251
}
1252
1253
int
1254
nexthop_compare(struct nexthop *na, struct nexthop *nb)
1255
{
1256
	struct bgpd_addr	*a, *b;
1257
1258
	if (na == nb)
1259
		return (0);
1260
	if (na == NULL)
1261
		return (-1);
1262
	if (nb == NULL)
1263
		return (1);
1264
1265
	a = &na->exit_nexthop;
1266
	b = &nb->exit_nexthop;
1267
1268
	if (a->aid != b->aid)
1269
		return (a->aid - b->aid);
1270
1271
	switch (a->aid) {
1272
	case AID_INET:
1273
		if (ntohl(a->v4.s_addr) > ntohl(b->v4.s_addr))
1274
			return (1);
1275
		if (ntohl(a->v4.s_addr) < ntohl(b->v4.s_addr))
1276
			return (-1);
1277
		return (0);
1278
	case AID_INET6:
1279
		return (memcmp(&a->v6, &b->v6, sizeof(struct in6_addr)));
1280
	default:
1281
		fatalx("nexthop_cmp: unknown af");
1282
	}
1283
	return (-1);
1284
}
1285
1286
struct nexthop *
1287
nexthop_lookup(struct bgpd_addr *nexthop)
1288
{
1289
	struct nexthop	*nh;
1290
1291
	LIST_FOREACH(nh, nexthop_hash(nexthop), nexthop_l) {
1292
		if (memcmp(&nh->exit_nexthop, nexthop,
1293
		    sizeof(struct bgpd_addr)) == 0)
1294
			return (nh);
1295
	}
1296
	return (NULL);
1297
}
1298
1299
struct nexthop_head *
1300
nexthop_hash(struct bgpd_addr *nexthop)
1301
{
1302
	u_int32_t	 h = 0;
1303
1304
	switch (nexthop->aid) {
1305
	case AID_INET:
1306
		h = SipHash24(&nexthoptablekey, &nexthop->v4.s_addr,
1307
		    sizeof(nexthop->v4.s_addr));
1308
		break;
1309
	case AID_INET6:
1310
		h = SipHash24(&nexthoptablekey, &nexthop->v6,
1311
		    sizeof(struct in6_addr));
1312
		break;
1313
	default:
1314
		fatalx("nexthop_hash: unsupported AF");
1315
	}
1316
	return (&nexthoptable.nexthop_hashtbl[h & nexthoptable.nexthop_hashmask]);
1317
}
1318