GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: usr.sbin/bgpd/session.c Lines: 0 1758 0.0 %
Date: 2017-11-13 Branches: 0 1124 0.0 %

Line Branch Exec Source
1
/*	$OpenBSD: session.c,v 1.364 2017/05/29 14:22:51 benno Exp $ */
2
3
/*
4
 * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5
 * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com>
6
 *
7
 * Permission to use, copy, modify, and distribute this software for any
8
 * purpose with or without fee is hereby granted, provided that the above
9
 * copyright notice and this permission notice appear in all copies.
10
 *
11
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
 */
19
20
#include <sys/types.h>
21
22
#include <sys/mman.h>
23
#include <sys/socket.h>
24
#include <sys/time.h>
25
#include <sys/resource.h>
26
#include <sys/un.h>
27
#include <net/if_types.h>
28
#include <netinet/in.h>
29
#include <netinet/ip.h>
30
#include <netinet/tcp.h>
31
#include <arpa/inet.h>
32
#include <limits.h>
33
34
#include <err.h>
35
#include <errno.h>
36
#include <fcntl.h>
37
#include <poll.h>
38
#include <pwd.h>
39
#include <signal.h>
40
#include <stdio.h>
41
#include <stdlib.h>
42
#include <string.h>
43
#include <syslog.h>
44
#include <unistd.h>
45
46
#include "bgpd.h"
47
#include "mrt.h"
48
#include "session.h"
49
#include "log.h"
50
51
#define PFD_PIPE_MAIN		0
52
#define PFD_PIPE_ROUTE		1
53
#define PFD_PIPE_ROUTE_CTL	2
54
#define PFD_SOCK_CTL		3
55
#define PFD_SOCK_RCTL		4
56
#define PFD_SOCK_PFKEY		5
57
#define PFD_LISTENERS_START	6
58
59
void	session_sighdlr(int);
60
int	setup_listeners(u_int *);
61
void	init_conf(struct bgpd_config *);
62
void	init_peer(struct peer *);
63
void	start_timer_holdtime(struct peer *);
64
void	start_timer_keepalive(struct peer *);
65
void	session_close_connection(struct peer *);
66
void	change_state(struct peer *, enum session_state, enum session_events);
67
int	session_setup_socket(struct peer *);
68
void	session_accept(int);
69
int	session_connect(struct peer *);
70
void	session_tcp_established(struct peer *);
71
void	session_capa_ann_none(struct peer *);
72
int	session_capa_add(struct ibuf *, u_int8_t, u_int8_t);
73
int	session_capa_add_mp(struct ibuf *, u_int8_t);
74
int	session_capa_add_gr(struct peer *, struct ibuf *, u_int8_t);
75
struct bgp_msg	*session_newmsg(enum msg_type, u_int16_t);
76
int	session_sendmsg(struct bgp_msg *, struct peer *);
77
void	session_open(struct peer *);
78
void	session_keepalive(struct peer *);
79
void	session_update(u_int32_t, void *, size_t);
80
void	session_notification(struct peer *, u_int8_t, u_int8_t, void *,
81
	    ssize_t);
82
void	session_rrefresh(struct peer *, u_int8_t);
83
int	session_graceful_restart(struct peer *);
84
int	session_graceful_stop(struct peer *);
85
int	session_dispatch_msg(struct pollfd *, struct peer *);
86
int	session_process_msg(struct peer *);
87
int	parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *);
88
int	parse_open(struct peer *);
89
int	parse_update(struct peer *);
90
int	parse_refresh(struct peer *);
91
int	parse_notification(struct peer *);
92
int	parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *);
93
int	capa_neg_calc(struct peer *);
94
void	session_dispatch_imsg(struct imsgbuf *, int, u_int *);
95
void	session_up(struct peer *);
96
void	session_down(struct peer *);
97
void	session_demote(struct peer *, int);
98
int	session_link_state_is_up(int, int, int);
99
100
int		 la_cmp(struct listen_addr *, struct listen_addr *);
101
struct peer	*getpeerbyip(struct sockaddr *);
102
void		 session_template_clone(struct peer *, struct sockaddr *,
103
		    u_int32_t, u_int32_t);
104
int		 session_match_mask(struct peer *, struct bgpd_addr *);
105
struct peer	*getpeerbyid(u_int32_t);
106
107
struct bgpd_config	*conf, *nconf;
108
struct bgpd_sysdep	 sysdep;
109
struct peer		*peers, *npeers;
110
volatile sig_atomic_t	 session_quit;
111
int			 pending_reconf;
112
int			 csock = -1, rcsock = -1;
113
u_int			 peer_cnt;
114
struct imsgbuf		*ibuf_rde;
115
struct imsgbuf		*ibuf_rde_ctl;
116
struct imsgbuf		*ibuf_main;
117
118
struct mrt_head		 mrthead;
119
time_t			 pauseaccept;
120
121
void
122
session_sighdlr(int sig)
123
{
124
	switch (sig) {
125
	case SIGINT:
126
	case SIGTERM:
127
		session_quit = 1;
128
		break;
129
	}
130
}
131
132
int
133
setup_listeners(u_int *la_cnt)
134
{
135
	int			 ttl = 255;
136
	int			 opt;
137
	struct listen_addr	*la;
138
	u_int			 cnt = 0;
139
140
	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
141
		la->reconf = RECONF_NONE;
142
		cnt++;
143
144
		if (la->flags & LISTENER_LISTENING)
145
			continue;
146
147
		if (la->fd == -1) {
148
			log_warn("cannot establish listener on %s: invalid fd",
149
			    log_sockaddr((struct sockaddr *)&la->sa));
150
			continue;
151
		}
152
153
		opt = 1;
154
		if (setsockopt(la->fd, IPPROTO_TCP, TCP_MD5SIG,
155
		    &opt, sizeof(opt)) == -1) {
156
			if (errno == ENOPROTOOPT) {	/* system w/o md5sig */
157
				log_warnx("md5sig not available, disabling");
158
				sysdep.no_md5sig = 1;
159
			} else
160
				fatal("setsockopt TCP_MD5SIG");
161
		}
162
163
		/* set ttl to 255 so that ttl-security works */
164
		if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
165
		    IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
166
			log_warn("setup_listeners setsockopt TTL");
167
			continue;
168
		}
169
		if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd,
170
		    IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) {
171
			log_warn("setup_listeners setsockopt hoplimit");
172
			continue;
173
		}
174
175
		if (listen(la->fd, MAX_BACKLOG)) {
176
			close(la->fd);
177
			fatal("listen");
178
		}
179
180
		la->flags |= LISTENER_LISTENING;
181
182
		log_info("listening on %s",
183
		    log_sockaddr((struct sockaddr *)&la->sa));
184
	}
185
186
	*la_cnt = cnt;
187
188
	return (0);
189
}
190
191
void
192
session_main(int debug, int verbose)
193
{
194
	int			 timeout, pfkeysock;
195
	unsigned int		 i, j, idx_peers, idx_listeners, idx_mrts;
196
	u_int			 pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
197
	u_int			 listener_cnt, ctl_cnt, mrt_cnt;
198
	u_int			 new_cnt;
199
	struct passwd		*pw;
200
	struct peer		*p, **peer_l = NULL, *last, *next;
201
	struct mrt		*m, *xm, **mrt_l = NULL;
202
	struct pollfd		*pfd = NULL;
203
	struct ctl_conn		*ctl_conn;
204
	struct listen_addr	*la;
205
	void			*newp;
206
	short			 events;
207
208
	log_init(debug, LOG_DAEMON);
209
	log_setverbose(verbose);
210
211
	bgpd_process = PROC_SE;
212
	log_procinit(log_procnames[bgpd_process]);
213
214
	if ((pw = getpwnam(BGPD_USER)) == NULL)
215
		fatal(NULL);
216
217
	if (chroot(pw->pw_dir) == -1)
218
		fatal("chroot");
219
	if (chdir("/") == -1)
220
		fatal("chdir(\"/\")");
221
222
	setproctitle("session engine");
223
	pfkeysock = pfkey_init(&sysdep);
224
225
	if (setgroups(1, &pw->pw_gid) ||
226
	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
227
	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
228
		fatal("can't drop privileges");
229
230
	if (pledge("stdio inet recvfd flock rpath cpath wpath", NULL) == -1)
231
		fatal("pledge");
232
233
	signal(SIGTERM, session_sighdlr);
234
	signal(SIGINT, session_sighdlr);
235
	signal(SIGPIPE, SIG_IGN);
236
	signal(SIGHUP, SIG_IGN);
237
	signal(SIGALRM, SIG_IGN);
238
	signal(SIGUSR1, SIG_IGN);
239
240
	if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
241
		fatal(NULL);
242
	imsg_init(ibuf_main, 3);
243
244
	TAILQ_INIT(&ctl_conns);
245
	LIST_INIT(&mrthead);
246
	listener_cnt = 0;
247
	peer_cnt = 0;
248
	ctl_cnt = 0;
249
250
	if ((conf = calloc(1, sizeof(struct bgpd_config))) == NULL)
251
		fatal(NULL);
252
	if ((conf->listen_addrs = calloc(1, sizeof(struct listen_addrs))) ==
253
	    NULL)
254
		fatal(NULL);
255
	TAILQ_INIT(conf->listen_addrs);
256
257
	log_info("session engine ready");
258
259
	while (session_quit == 0) {
260
		/* check for peers to be initialized or deleted */
261
		last = NULL;
262
		if (!pending_reconf) {
263
			for (p = peers; p != NULL; p = next) {
264
				next = p->next;
265
				/* cloned peer that idled out? */
266
				if (p->template && (p->state == STATE_IDLE ||
267
				    p->state == STATE_ACTIVE) &&
268
				    time(NULL) - p->stats.last_updown >=
269
				    INTERVAL_HOLD_CLONED)
270
					p->conf.reconf_action = RECONF_DELETE;
271
272
				/* new peer that needs init? */
273
				if (p->state == STATE_NONE)
274
					init_peer(p);
275
276
				/* reinit due? */
277
				if (p->conf.reconf_action == RECONF_REINIT) {
278
					session_stop(p, ERR_CEASE_ADMIN_RESET);
279
					if (!p->conf.down)
280
						timer_set(p, Timer_IdleHold, 0);
281
				}
282
283
				/* deletion due? */
284
				if (p->conf.reconf_action == RECONF_DELETE) {
285
					if (p->demoted)
286
						session_demote(p, -1);
287
					p->conf.demote_group[0] = 0;
288
					session_stop(p, ERR_CEASE_PEER_UNCONF);
289
					log_peer_warnx(&p->conf, "removed");
290
					if (last != NULL)
291
						last->next = next;
292
					else
293
						peers = next;
294
					timer_remove_all(p);
295
					free(p);
296
					peer_cnt--;
297
					continue;
298
				}
299
				p->conf.reconf_action = RECONF_NONE;
300
				last = p;
301
			}
302
		}
303
304
		if (peer_cnt > peer_l_elms) {
305
			if ((newp = reallocarray(peer_l, peer_cnt,
306
			    sizeof(struct peer *))) == NULL) {
307
				/* panic for now  */
308
				log_warn("could not resize peer_l from %u -> %u"
309
				    " entries", peer_l_elms, peer_cnt);
310
				fatalx("exiting");
311
			}
312
			peer_l = newp;
313
			peer_l_elms = peer_cnt;
314
		}
315
316
		mrt_cnt = 0;
317
		for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) {
318
			xm = LIST_NEXT(m, entry);
319
			if (m->state == MRT_STATE_REMOVE) {
320
				mrt_clean(m);
321
				LIST_REMOVE(m, entry);
322
				free(m);
323
				continue;
324
			}
325
			if (m->wbuf.queued)
326
				mrt_cnt++;
327
		}
328
329
		if (mrt_cnt > mrt_l_elms) {
330
			if ((newp = reallocarray(mrt_l, mrt_cnt,
331
			    sizeof(struct mrt *))) == NULL) {
332
				/* panic for now  */
333
				log_warn("could not resize mrt_l from %u -> %u"
334
				    " entries", mrt_l_elms, mrt_cnt);
335
				fatalx("exiting");
336
			}
337
			mrt_l = newp;
338
			mrt_l_elms = mrt_cnt;
339
		}
340
341
		new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
342
		    ctl_cnt + mrt_cnt;
343
		if (new_cnt > pfd_elms) {
344
			if ((newp = reallocarray(pfd, new_cnt,
345
			    sizeof(struct pollfd))) == NULL) {
346
				/* panic for now  */
347
				log_warn("could not resize pfd from %u -> %u"
348
				    " entries", pfd_elms, new_cnt);
349
				fatalx("exiting");
350
			}
351
			pfd = newp;
352
			pfd_elms = new_cnt;
353
		}
354
355
		bzero(pfd, sizeof(struct pollfd) * pfd_elms);
356
357
		set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main);
358
		set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde);
359
		set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl);
360
361
		if (pauseaccept == 0) {
362
			pfd[PFD_SOCK_CTL].fd = csock;
363
			pfd[PFD_SOCK_CTL].events = POLLIN;
364
			pfd[PFD_SOCK_RCTL].fd = rcsock;
365
			pfd[PFD_SOCK_RCTL].events = POLLIN;
366
		} else {
367
			pfd[PFD_SOCK_CTL].fd = -1;
368
			pfd[PFD_SOCK_RCTL].fd = -1;
369
		}
370
		pfd[PFD_SOCK_PFKEY].fd = pfkeysock;
371
		pfd[PFD_SOCK_PFKEY].events = POLLIN;
372
373
		i = PFD_LISTENERS_START;
374
		TAILQ_FOREACH(la, conf->listen_addrs, entry) {
375
			if (pauseaccept == 0) {
376
				pfd[i].fd = la->fd;
377
				pfd[i].events = POLLIN;
378
			} else
379
				pfd[i].fd = -1;
380
			i++;
381
		}
382
		idx_listeners = i;
383
		timeout = 240;	/* loop every 240s at least */
384
385
		for (p = peers; p != NULL; p = p->next) {
386
			time_t	nextaction;
387
			struct peer_timer *pt;
388
389
			/* check timers */
390
			if ((pt = timer_nextisdue(p)) != NULL) {
391
				switch (pt->type) {
392
				case Timer_Hold:
393
					bgp_fsm(p, EVNT_TIMER_HOLDTIME);
394
					break;
395
				case Timer_ConnectRetry:
396
					bgp_fsm(p, EVNT_TIMER_CONNRETRY);
397
					break;
398
				case Timer_Keepalive:
399
					bgp_fsm(p, EVNT_TIMER_KEEPALIVE);
400
					break;
401
				case Timer_IdleHold:
402
					bgp_fsm(p, EVNT_START);
403
					break;
404
				case Timer_IdleHoldReset:
405
					p->IdleHoldTime /= 2;
406
					if (p->IdleHoldTime <=
407
					    INTERVAL_IDLE_HOLD_INITIAL) {
408
						p->IdleHoldTime =
409
						    INTERVAL_IDLE_HOLD_INITIAL;
410
						timer_stop(p,
411
						    Timer_IdleHoldReset);
412
						p->errcnt = 0;
413
					} else
414
						timer_set(p,
415
						    Timer_IdleHoldReset,
416
						    p->IdleHoldTime);
417
					break;
418
				case Timer_CarpUndemote:
419
					timer_stop(p, Timer_CarpUndemote);
420
					if (p->demoted &&
421
					    p->state == STATE_ESTABLISHED)
422
						session_demote(p, -1);
423
					break;
424
				case Timer_RestartTimeout:
425
					timer_stop(p, Timer_RestartTimeout);
426
					session_graceful_stop(p);
427
					break;
428
				default:
429
					fatalx("King Bula lost in time");
430
				}
431
			}
432
			if ((nextaction = timer_nextduein(p)) != -1 &&
433
			    nextaction < timeout)
434
				timeout = nextaction;
435
436
			/* are we waiting for a write? */
437
			events = POLLIN;
438
			if (p->wbuf.queued > 0 || p->state == STATE_CONNECT)
439
				events |= POLLOUT;
440
			/* is there still work to do? */
441
			if (p->rbuf && p->rbuf->wpos)
442
				timeout = 0;
443
444
			/* poll events */
445
			if (p->fd != -1 && events != 0) {
446
				pfd[i].fd = p->fd;
447
				pfd[i].events = events;
448
				peer_l[i - idx_listeners] = p;
449
				i++;
450
			}
451
		}
452
453
		idx_peers = i;
454
455
		LIST_FOREACH(m, &mrthead, entry)
456
			if (m->wbuf.queued) {
457
				pfd[i].fd = m->wbuf.fd;
458
				pfd[i].events = POLLOUT;
459
				mrt_l[i - idx_peers] = m;
460
				i++;
461
			}
462
463
		idx_mrts = i;
464
465
		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) {
466
			pfd[i].fd = ctl_conn->ibuf.fd;
467
			pfd[i].events = POLLIN;
468
			if (ctl_conn->ibuf.w.queued > 0)
469
				pfd[i].events |= POLLOUT;
470
			i++;
471
		}
472
473
		if (pauseaccept && timeout > 1)
474
			timeout = 1;
475
		if (timeout < 0)
476
			timeout = 0;
477
		if (poll(pfd, i, timeout * 1000) == -1)
478
			if (errno != EINTR)
479
				fatal("poll error");
480
481
		/*
482
		 * If we previously saw fd exhaustion, we stop accept()
483
		 * for 1 second to throttle the accept() loop.
484
		 */
485
		if (pauseaccept && getmonotime() > pauseaccept + 1)
486
			pauseaccept = 0;
487
488
		if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) {
489
			log_warnx("SE: Lost connection to parent");
490
			session_quit = 1;
491
			continue;
492
		} else
493
			session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
494
			    &listener_cnt);
495
496
		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) {
497
			log_warnx("SE: Lost connection to RDE");
498
			msgbuf_clear(&ibuf_rde->w);
499
			free(ibuf_rde);
500
			ibuf_rde = NULL;
501
		} else
502
			session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
503
			    &listener_cnt);
504
505
		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) ==
506
		    -1) {
507
			log_warnx("SE: Lost connection to RDE control");
508
			msgbuf_clear(&ibuf_rde_ctl->w);
509
			free(ibuf_rde_ctl);
510
			ibuf_rde_ctl = NULL;
511
		} else
512
			session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
513
			    &listener_cnt);
514
515
		if (pfd[PFD_SOCK_CTL].revents & POLLIN)
516
			ctl_cnt += control_accept(csock, 0);
517
518
		if (pfd[PFD_SOCK_RCTL].revents & POLLIN)
519
			ctl_cnt += control_accept(rcsock, 1);
520
521
		if (pfd[PFD_SOCK_PFKEY].revents & POLLIN) {
522
			if (pfkey_read(pfkeysock, NULL) == -1) {
523
				log_warnx("pfkey_read failed, exiting...");
524
				session_quit = 1;
525
			}
526
		}
527
528
		for (j = PFD_LISTENERS_START; j < idx_listeners; j++)
529
			if (pfd[j].revents & POLLIN)
530
				session_accept(pfd[j].fd);
531
532
		for (; j < idx_peers; j++)
533
			session_dispatch_msg(&pfd[j],
534
			    peer_l[j - idx_listeners]);
535
536
		for (p = peers; p != NULL; p = p->next)
537
			if (p->rbuf && p->rbuf->wpos)
538
				session_process_msg(p);
539
540
		for (; j < idx_mrts; j++)
541
			if (pfd[j].revents & POLLOUT)
542
				mrt_write(mrt_l[j - idx_peers]);
543
544
		for (; j < i; j++)
545
			control_dispatch_msg(&pfd[j], &ctl_cnt);
546
	}
547
548
	while ((p = peers) != NULL) {
549
		peers = p->next;
550
		strlcpy(p->conf.shutcomm,
551
		    "bgpd shutting down",
552
		    sizeof(p->conf.shutcomm));
553
		session_stop(p, ERR_CEASE_ADMIN_DOWN);
554
		pfkey_remove(p);
555
		free(p);
556
	}
557
558
	while ((m = LIST_FIRST(&mrthead)) != NULL) {
559
		mrt_clean(m);
560
		LIST_REMOVE(m, entry);
561
		free(m);
562
	}
563
564
	while ((la = TAILQ_FIRST(conf->listen_addrs)) != NULL) {
565
		TAILQ_REMOVE(conf->listen_addrs, la, entry);
566
		free(la);
567
	}
568
	free(conf->listen_addrs);
569
	free(peer_l);
570
	free(mrt_l);
571
	free(pfd);
572
573
	/* close pipes */
574
	if (ibuf_rde) {
575
		msgbuf_write(&ibuf_rde->w);
576
		msgbuf_clear(&ibuf_rde->w);
577
		close(ibuf_rde->fd);
578
		free(ibuf_rde);
579
	}
580
	if (ibuf_rde_ctl) {
581
		msgbuf_clear(&ibuf_rde_ctl->w);
582
		close(ibuf_rde_ctl->fd);
583
		free(ibuf_rde_ctl);
584
	}
585
	msgbuf_write(&ibuf_main->w);
586
	msgbuf_clear(&ibuf_main->w);
587
	close(ibuf_main->fd);
588
	free(ibuf_main);
589
590
	control_shutdown(csock);
591
	control_shutdown(rcsock);
592
	log_info("session engine exiting");
593
	exit(0);
594
}
595
596
void
597
init_conf(struct bgpd_config *c)
598
{
599
	if (!c->holdtime)
600
		c->holdtime = INTERVAL_HOLD;
601
	if (!c->connectretry)
602
		c->connectretry = INTERVAL_CONNECTRETRY;
603
}
604
605
void
606
init_peer(struct peer *p)
607
{
608
	TAILQ_INIT(&p->timers);
609
	p->fd = p->wbuf.fd = -1;
610
611
	if (p->conf.if_depend[0])
612
		imsg_compose(ibuf_main, IMSG_IFINFO, 0, 0, -1,
613
		    p->conf.if_depend, sizeof(p->conf.if_depend));
614
	else
615
		p->depend_ok = 1;
616
617
	peer_cnt++;
618
619
	change_state(p, STATE_IDLE, EVNT_NONE);
620
	if (p->conf.down)
621
		timer_stop(p, Timer_IdleHold);		/* no autostart */
622
	else
623
		timer_set(p, Timer_IdleHold, 0);	/* start ASAP */
624
625
	/*
626
	 * on startup, demote if requested.
627
	 * do not handle new peers. they must reach ESTABLISHED beforehands.
628
	 * peers added at runtime have reconf_action set to RECONF_REINIT.
629
	 */
630
	if (p->conf.reconf_action != RECONF_REINIT && p->conf.demote_group[0])
631
		session_demote(p, +1);
632
}
633
634
void
635
bgp_fsm(struct peer *peer, enum session_events event)
636
{
637
	switch (peer->state) {
638
	case STATE_NONE:
639
		/* nothing */
640
		break;
641
	case STATE_IDLE:
642
		switch (event) {
643
		case EVNT_START:
644
			timer_stop(peer, Timer_Hold);
645
			timer_stop(peer, Timer_Keepalive);
646
			timer_stop(peer, Timer_IdleHold);
647
648
			/* allocate read buffer */
649
			peer->rbuf = calloc(1, sizeof(struct ibuf_read));
650
			if (peer->rbuf == NULL)
651
				fatal(NULL);
652
653
			/* init write buffer */
654
			msgbuf_init(&peer->wbuf);
655
656
			/* init pfkey - remove old if any, load new ones */
657
			pfkey_remove(peer);
658
			if (pfkey_establish(peer) == -1) {
659
				log_peer_warnx(&peer->conf,
660
				    "pfkey setup failed");
661
				return;
662
			}
663
664
			peer->stats.last_sent_errcode = 0;
665
			peer->stats.last_sent_suberr = 0;
666
667
			if (!peer->depend_ok)
668
				timer_stop(peer, Timer_ConnectRetry);
669
			else if (peer->passive || peer->conf.passive ||
670
			    peer->conf.template) {
671
				change_state(peer, STATE_ACTIVE, event);
672
				timer_stop(peer, Timer_ConnectRetry);
673
			} else {
674
				change_state(peer, STATE_CONNECT, event);
675
				timer_set(peer, Timer_ConnectRetry,
676
				    conf->connectretry);
677
				session_connect(peer);
678
			}
679
			peer->passive = 0;
680
			break;
681
		default:
682
			/* ignore */
683
			break;
684
		}
685
		break;
686
	case STATE_CONNECT:
687
		switch (event) {
688
		case EVNT_START:
689
			/* ignore */
690
			break;
691
		case EVNT_CON_OPEN:
692
			session_tcp_established(peer);
693
			session_open(peer);
694
			timer_stop(peer, Timer_ConnectRetry);
695
			peer->holdtime = INTERVAL_HOLD_INITIAL;
696
			start_timer_holdtime(peer);
697
			change_state(peer, STATE_OPENSENT, event);
698
			break;
699
		case EVNT_CON_OPENFAIL:
700
			timer_set(peer, Timer_ConnectRetry,
701
			    conf->connectretry);
702
			session_close_connection(peer);
703
			change_state(peer, STATE_ACTIVE, event);
704
			break;
705
		case EVNT_TIMER_CONNRETRY:
706
			timer_set(peer, Timer_ConnectRetry,
707
			    conf->connectretry);
708
			session_connect(peer);
709
			break;
710
		default:
711
			change_state(peer, STATE_IDLE, event);
712
			break;
713
		}
714
		break;
715
	case STATE_ACTIVE:
716
		switch (event) {
717
		case EVNT_START:
718
			/* ignore */
719
			break;
720
		case EVNT_CON_OPEN:
721
			session_tcp_established(peer);
722
			session_open(peer);
723
			timer_stop(peer, Timer_ConnectRetry);
724
			peer->holdtime = INTERVAL_HOLD_INITIAL;
725
			start_timer_holdtime(peer);
726
			change_state(peer, STATE_OPENSENT, event);
727
			break;
728
		case EVNT_CON_OPENFAIL:
729
			timer_set(peer, Timer_ConnectRetry,
730
			    conf->connectretry);
731
			session_close_connection(peer);
732
			change_state(peer, STATE_ACTIVE, event);
733
			break;
734
		case EVNT_TIMER_CONNRETRY:
735
			timer_set(peer, Timer_ConnectRetry,
736
			    peer->holdtime);
737
			change_state(peer, STATE_CONNECT, event);
738
			session_connect(peer);
739
			break;
740
		default:
741
			change_state(peer, STATE_IDLE, event);
742
			break;
743
		}
744
		break;
745
	case STATE_OPENSENT:
746
		switch (event) {
747
		case EVNT_START:
748
			/* ignore */
749
			break;
750
		case EVNT_STOP:
751
			change_state(peer, STATE_IDLE, event);
752
			break;
753
		case EVNT_CON_CLOSED:
754
			session_close_connection(peer);
755
			timer_set(peer, Timer_ConnectRetry,
756
			    conf->connectretry);
757
			change_state(peer, STATE_ACTIVE, event);
758
			break;
759
		case EVNT_CON_FATAL:
760
			change_state(peer, STATE_IDLE, event);
761
			break;
762
		case EVNT_TIMER_HOLDTIME:
763
			session_notification(peer, ERR_HOLDTIMEREXPIRED,
764
			    0, NULL, 0);
765
			change_state(peer, STATE_IDLE, event);
766
			break;
767
		case EVNT_RCVD_OPEN:
768
			/* parse_open calls change_state itself on failure */
769
			if (parse_open(peer))
770
				break;
771
			session_keepalive(peer);
772
			change_state(peer, STATE_OPENCONFIRM, event);
773
			break;
774
		case EVNT_RCVD_NOTIFICATION:
775
			if (parse_notification(peer)) {
776
				change_state(peer, STATE_IDLE, event);
777
				/* don't punish, capa negotiation */
778
				timer_set(peer, Timer_IdleHold, 0);
779
				peer->IdleHoldTime /= 2;
780
			} else
781
				change_state(peer, STATE_IDLE, event);
782
			break;
783
		default:
784
			session_notification(peer,
785
			    ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL, 0);
786
			change_state(peer, STATE_IDLE, event);
787
			break;
788
		}
789
		break;
790
	case STATE_OPENCONFIRM:
791
		switch (event) {
792
		case EVNT_START:
793
			/* ignore */
794
			break;
795
		case EVNT_STOP:
796
			change_state(peer, STATE_IDLE, event);
797
			break;
798
		case EVNT_CON_CLOSED:
799
		case EVNT_CON_FATAL:
800
			change_state(peer, STATE_IDLE, event);
801
			break;
802
		case EVNT_TIMER_HOLDTIME:
803
			session_notification(peer, ERR_HOLDTIMEREXPIRED,
804
			    0, NULL, 0);
805
			change_state(peer, STATE_IDLE, event);
806
			break;
807
		case EVNT_TIMER_KEEPALIVE:
808
			session_keepalive(peer);
809
			break;
810
		case EVNT_RCVD_KEEPALIVE:
811
			start_timer_holdtime(peer);
812
			change_state(peer, STATE_ESTABLISHED, event);
813
			break;
814
		case EVNT_RCVD_NOTIFICATION:
815
			parse_notification(peer);
816
			change_state(peer, STATE_IDLE, event);
817
			break;
818
		default:
819
			session_notification(peer,
820
			    ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL, 0);
821
			change_state(peer, STATE_IDLE, event);
822
			break;
823
		}
824
		break;
825
	case STATE_ESTABLISHED:
826
		switch (event) {
827
		case EVNT_START:
828
			/* ignore */
829
			break;
830
		case EVNT_STOP:
831
			change_state(peer, STATE_IDLE, event);
832
			break;
833
		case EVNT_CON_CLOSED:
834
		case EVNT_CON_FATAL:
835
			change_state(peer, STATE_IDLE, event);
836
			break;
837
		case EVNT_TIMER_HOLDTIME:
838
			session_notification(peer, ERR_HOLDTIMEREXPIRED,
839
			    0, NULL, 0);
840
			change_state(peer, STATE_IDLE, event);
841
			break;
842
		case EVNT_TIMER_KEEPALIVE:
843
			session_keepalive(peer);
844
			break;
845
		case EVNT_RCVD_KEEPALIVE:
846
			start_timer_holdtime(peer);
847
			break;
848
		case EVNT_RCVD_UPDATE:
849
			start_timer_holdtime(peer);
850
			if (parse_update(peer))
851
				change_state(peer, STATE_IDLE, event);
852
			else
853
				start_timer_holdtime(peer);
854
			break;
855
		case EVNT_RCVD_NOTIFICATION:
856
			parse_notification(peer);
857
			change_state(peer, STATE_IDLE, event);
858
			break;
859
		default:
860
			session_notification(peer,
861
			    ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL, 0);
862
			change_state(peer, STATE_IDLE, event);
863
			break;
864
		}
865
		break;
866
	}
867
}
868
869
void
870
start_timer_holdtime(struct peer *peer)
871
{
872
	if (peer->holdtime > 0)
873
		timer_set(peer, Timer_Hold, peer->holdtime);
874
	else
875
		timer_stop(peer, Timer_Hold);
876
}
877
878
void
879
start_timer_keepalive(struct peer *peer)
880
{
881
	if (peer->holdtime > 0)
882
		timer_set(peer, Timer_Keepalive, peer->holdtime / 3);
883
	else
884
		timer_stop(peer, Timer_Keepalive);
885
}
886
887
void
888
session_close_connection(struct peer *peer)
889
{
890
	if (peer->fd != -1) {
891
		close(peer->fd);
892
		pauseaccept = 0;
893
	}
894
	peer->fd = peer->wbuf.fd = -1;
895
}
896
897
void
898
change_state(struct peer *peer, enum session_state state,
899
    enum session_events event)
900
{
901
	struct mrt	*mrt;
902
903
	switch (state) {
904
	case STATE_IDLE:
905
		/* carp demotion first. new peers handled in init_peer */
906
		if (peer->state == STATE_ESTABLISHED &&
907
		    peer->conf.demote_group[0] && !peer->demoted)
908
			session_demote(peer, +1);
909
910
		/*
911
		 * try to write out what's buffered (maybe a notification),
912
		 * don't bother if it fails
913
		 */
914
		if (peer->state >= STATE_OPENSENT && peer->wbuf.queued)
915
			msgbuf_write(&peer->wbuf);
916
917
		/*
918
		 * we must start the timer for the next EVNT_START
919
		 * if we are coming here due to an error and the
920
		 * session was not established successfully before, the
921
		 * starttimerinterval needs to be exponentially increased
922
		 */
923
		if (peer->IdleHoldTime == 0)
924
			peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
925
		peer->holdtime = INTERVAL_HOLD_INITIAL;
926
		timer_stop(peer, Timer_ConnectRetry);
927
		timer_stop(peer, Timer_Keepalive);
928
		timer_stop(peer, Timer_Hold);
929
		timer_stop(peer, Timer_IdleHold);
930
		timer_stop(peer, Timer_IdleHoldReset);
931
		session_close_connection(peer);
932
		msgbuf_clear(&peer->wbuf);
933
		free(peer->rbuf);
934
		peer->rbuf = NULL;
935
		bzero(&peer->capa.peer, sizeof(peer->capa.peer));
936
937
		if (event != EVNT_STOP) {
938
			timer_set(peer, Timer_IdleHold, peer->IdleHoldTime);
939
			if (event != EVNT_NONE &&
940
			    peer->IdleHoldTime < MAX_IDLE_HOLD/2)
941
				peer->IdleHoldTime *= 2;
942
		}
943
		if (peer->state == STATE_ESTABLISHED) {
944
			if (peer->capa.neg.grestart.restart == 2 &&
945
			    (event == EVNT_CON_CLOSED ||
946
			    event == EVNT_CON_FATAL)) {
947
				/* don't punish graceful restart */
948
				timer_set(peer, Timer_IdleHold, 0);
949
				peer->IdleHoldTime /= 2;
950
				session_graceful_restart(peer);
951
			} else
952
				session_down(peer);
953
		}
954
		if (peer->state == STATE_NONE ||
955
		    peer->state == STATE_ESTABLISHED) {
956
			/* initialize capability negotiation structures */
957
			memcpy(&peer->capa.ann, &peer->conf.capabilities,
958
			    sizeof(peer->capa.ann));
959
			if (!peer->conf.announce_capa)
960
				session_capa_ann_none(peer);
961
		}
962
		break;
963
	case STATE_CONNECT:
964
		if (peer->state == STATE_ESTABLISHED &&
965
		    peer->capa.neg.grestart.restart == 2) {
966
			/* do the graceful restart dance */
967
			session_graceful_restart(peer);
968
			peer->holdtime = INTERVAL_HOLD_INITIAL;
969
			timer_stop(peer, Timer_ConnectRetry);
970
			timer_stop(peer, Timer_Keepalive);
971
			timer_stop(peer, Timer_Hold);
972
			timer_stop(peer, Timer_IdleHold);
973
			timer_stop(peer, Timer_IdleHoldReset);
974
			session_close_connection(peer);
975
			msgbuf_clear(&peer->wbuf);
976
			bzero(&peer->capa.peer, sizeof(peer->capa.peer));
977
		}
978
		break;
979
	case STATE_ACTIVE:
980
		break;
981
	case STATE_OPENSENT:
982
		break;
983
	case STATE_OPENCONFIRM:
984
		break;
985
	case STATE_ESTABLISHED:
986
		timer_set(peer, Timer_IdleHoldReset, peer->IdleHoldTime);
987
		if (peer->demoted)
988
			timer_set(peer, Timer_CarpUndemote,
989
			    INTERVAL_HOLD_DEMOTED);
990
		session_up(peer);
991
		break;
992
	default:		/* something seriously fucked */
993
		break;
994
	}
995
996
	log_statechange(peer, state, event);
997
	LIST_FOREACH(mrt, &mrthead, entry) {
998
		if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT))
999
			continue;
1000
		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1001
		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
1002
		    mrt->group_id == peer->conf.groupid))
1003
			mrt_dump_state(mrt, peer->state, state, peer);
1004
	}
1005
	peer->prev_state = peer->state;
1006
	peer->state = state;
1007
}
1008
1009
void
1010
session_accept(int listenfd)
1011
{
1012
	int			 connfd;
1013
	int			 opt;
1014
	socklen_t		 len;
1015
	struct sockaddr_storage	 cliaddr;
1016
	struct peer		*p = NULL;
1017
1018
	len = sizeof(cliaddr);
1019
	if ((connfd = accept4(listenfd,
1020
	    (struct sockaddr *)&cliaddr, &len,
1021
	    SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) {
1022
		if (errno == ENFILE || errno == EMFILE)
1023
			pauseaccept = getmonotime();
1024
		else if (errno != EWOULDBLOCK && errno != EINTR &&
1025
		    errno != ECONNABORTED)
1026
			log_warn("accept");
1027
		return;
1028
	}
1029
1030
	p = getpeerbyip((struct sockaddr *)&cliaddr);
1031
1032
	if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
1033
		if (timer_running(p, Timer_IdleHold, NULL)) {
1034
			/* fast reconnect after clear */
1035
			p->passive = 1;
1036
			bgp_fsm(p, EVNT_START);
1037
		}
1038
	}
1039
1040
	if (p != NULL &&
1041
	    (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
1042
		if (p->fd != -1) {
1043
			if (p->state == STATE_CONNECT)
1044
				session_close_connection(p);
1045
			else {
1046
				close(connfd);
1047
				return;
1048
			}
1049
		}
1050
1051
open:
1052
		if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1053
			log_peer_warnx(&p->conf,
1054
			    "ipsec or md5sig configured but not available");
1055
			close(connfd);
1056
			return;
1057
		}
1058
1059
		if (p->conf.auth.method == AUTH_MD5SIG) {
1060
			if (sysdep.no_md5sig) {
1061
				log_peer_warnx(&p->conf,
1062
				    "md5sig configured but not available");
1063
				close(connfd);
1064
				return;
1065
			}
1066
			len = sizeof(opt);
1067
			if (getsockopt(connfd, IPPROTO_TCP, TCP_MD5SIG,
1068
			    &opt, &len) == -1)
1069
				fatal("getsockopt TCP_MD5SIG");
1070
			if (!opt) {	/* non-md5'd connection! */
1071
				log_peer_warnx(&p->conf,
1072
				    "connection attempt without md5 signature");
1073
				close(connfd);
1074
				return;
1075
			}
1076
		}
1077
		p->fd = p->wbuf.fd = connfd;
1078
		if (session_setup_socket(p)) {
1079
			close(connfd);
1080
			return;
1081
		}
1082
		bgp_fsm(p, EVNT_CON_OPEN);
1083
		return;
1084
	} else if (p != NULL && p->state == STATE_ESTABLISHED &&
1085
	    p->capa.neg.grestart.restart == 2) {
1086
		/* first do the graceful restart dance */
1087
		change_state(p, STATE_CONNECT, EVNT_CON_CLOSED);
1088
		/* then do part of the open dance */
1089
		goto open;
1090
	} else {
1091
		log_conn_attempt(p, (struct sockaddr *)&cliaddr);
1092
		close(connfd);
1093
	}
1094
}
1095
1096
int
1097
session_connect(struct peer *peer)
1098
{
1099
	int			 opt = 1;
1100
	struct sockaddr		*sa;
1101
1102
	/*
1103
	 * we do not need the overcomplicated collision detection RFC 1771
1104
	 * describes; we simply make sure there is only ever one concurrent
1105
	 * tcp connection per peer.
1106
	 */
1107
	if (peer->fd != -1)
1108
		return (-1);
1109
1110
	if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid),
1111
	    SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) {
1112
		log_peer_warn(&peer->conf, "session_connect socket");
1113
		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1114
		return (-1);
1115
	}
1116
1117
	if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1118
		log_peer_warnx(&peer->conf,
1119
		    "ipsec or md5sig configured but not available");
1120
		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1121
		return (-1);
1122
	}
1123
1124
	if (peer->conf.auth.method == AUTH_MD5SIG) {
1125
		if (sysdep.no_md5sig) {
1126
			log_peer_warnx(&peer->conf,
1127
			    "md5sig configured but not available");
1128
			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1129
			return (-1);
1130
		}
1131
		if (setsockopt(peer->fd, IPPROTO_TCP, TCP_MD5SIG,
1132
		    &opt, sizeof(opt)) == -1) {
1133
			log_peer_warn(&peer->conf, "setsockopt md5sig");
1134
			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1135
			return (-1);
1136
		}
1137
	}
1138
	peer->wbuf.fd = peer->fd;
1139
1140
	/* if update source is set we need to bind() */
1141
	if ((sa = addr2sa(&peer->conf.local_addr, 0)) != NULL) {
1142
		if (bind(peer->fd, sa, sa->sa_len) == -1) {
1143
			log_peer_warn(&peer->conf, "session_connect bind");
1144
			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1145
			return (-1);
1146
		}
1147
	}
1148
1149
	if (session_setup_socket(peer)) {
1150
		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1151
		return (-1);
1152
	}
1153
1154
	sa = addr2sa(&peer->conf.remote_addr, BGP_PORT);
1155
	if (connect(peer->fd, sa, sa->sa_len) == -1) {
1156
		if (errno != EINPROGRESS) {
1157
			if (errno != peer->lasterr)
1158
				log_peer_warn(&peer->conf, "connect");
1159
			peer->lasterr = errno;
1160
			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1161
			return (-1);
1162
		}
1163
	} else
1164
		bgp_fsm(peer, EVNT_CON_OPEN);
1165
1166
	return (0);
1167
}
1168
1169
int
1170
session_setup_socket(struct peer *p)
1171
{
1172
	int	ttl = p->conf.distance;
1173
	int	pre = IPTOS_PREC_INTERNETCONTROL;
1174
	int	nodelay = 1;
1175
	int	bsize;
1176
1177
	switch (p->conf.remote_addr.aid) {
1178
	case AID_INET:
1179
		/* set precedence, see RFC 1771 appendix 5 */
1180
		if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) ==
1181
		    -1) {
1182
			log_peer_warn(&p->conf,
1183
			    "session_setup_socket setsockopt TOS");
1184
			return (-1);
1185
		}
1186
1187
		if (p->conf.ebgp) {
1188
			/* set TTL to foreign router's distance
1189
			   1=direct n=multihop with ttlsec, we always use 255 */
1190
			if (p->conf.ttlsec) {
1191
				ttl = 256 - p->conf.distance;
1192
				if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL,
1193
				    &ttl, sizeof(ttl)) == -1) {
1194
					log_peer_warn(&p->conf,
1195
					    "session_setup_socket: "
1196
					    "setsockopt MINTTL");
1197
					return (-1);
1198
				}
1199
				ttl = 255;
1200
			}
1201
1202
			if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1203
			    sizeof(ttl)) == -1) {
1204
				log_peer_warn(&p->conf,
1205
				    "session_setup_socket setsockopt TTL");
1206
				return (-1);
1207
			}
1208
		}
1209
		break;
1210
	case AID_INET6:
1211
		if (p->conf.ebgp) {
1212
			/* set hoplimit to foreign router's distance
1213
			   1=direct n=multihop with ttlsec, we always use 255 */
1214
			if (p->conf.ttlsec) {
1215
				ttl = 256 - p->conf.distance;
1216
				if (setsockopt(p->fd, IPPROTO_IPV6,
1217
				    IPV6_MINHOPCOUNT, &ttl, sizeof(ttl))
1218
				    == -1) {
1219
					log_peer_warn(&p->conf,
1220
					    "session_setup_socket: "
1221
					    "setsockopt MINHOPCOUNT");
1222
					return (-1);
1223
				}
1224
				ttl = 255;
1225
			}
1226
			if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
1227
			    &ttl, sizeof(ttl)) == -1) {
1228
				log_peer_warn(&p->conf,
1229
				    "session_setup_socket setsockopt hoplimit");
1230
				return (-1);
1231
			}
1232
		}
1233
		break;
1234
	}
1235
1236
	/* set TCP_NODELAY */
1237
	if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1238
	    sizeof(nodelay)) == -1) {
1239
		log_peer_warn(&p->conf,
1240
		    "session_setup_socket setsockopt TCP_NODELAY");
1241
		return (-1);
1242
	}
1243
1244
	/* only increase bufsize (and thus window) if md5 or ipsec is in use */
1245
	if (p->conf.auth.method != AUTH_NONE) {
1246
		/* try to increase bufsize. no biggie if it fails */
1247
		bsize = 65535;
1248
		while (bsize > 8192 &&
1249
		    setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize,
1250
		    sizeof(bsize)) == -1 && errno != EINVAL)
1251
			bsize /= 2;
1252
		bsize = 65535;
1253
		while (bsize > 8192 &&
1254
		    setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize,
1255
		    sizeof(bsize)) == -1 && errno != EINVAL)
1256
			bsize /= 2;
1257
	}
1258
1259
	return (0);
1260
}
1261
1262
void
1263
session_tcp_established(struct peer *peer)
1264
{
1265
	socklen_t	len;
1266
1267
	len = sizeof(peer->sa_local);
1268
	if (getsockname(peer->fd, (struct sockaddr *)&peer->sa_local,
1269
	    &len) == -1)
1270
		log_warn("getsockname");
1271
	len = sizeof(peer->sa_remote);
1272
	if (getpeername(peer->fd, (struct sockaddr *)&peer->sa_remote,
1273
	    &len) == -1)
1274
		log_warn("getpeername");
1275
}
1276
1277
void
1278
session_capa_ann_none(struct peer *peer)
1279
{
1280
	bzero(&peer->capa.ann, sizeof(peer->capa.ann));
1281
}
1282
1283
int
1284
session_capa_add(struct ibuf *opb, u_int8_t capa_code, u_int8_t capa_len)
1285
{
1286
	int errs = 0;
1287
1288
	errs += ibuf_add(opb, &capa_code, sizeof(capa_code));
1289
	errs += ibuf_add(opb, &capa_len, sizeof(capa_len));
1290
	return (errs);
1291
}
1292
1293
int
1294
session_capa_add_mp(struct ibuf *buf, u_int8_t aid)
1295
{
1296
	u_int8_t		 safi, pad = 0;
1297
	u_int16_t		 afi;
1298
	int			 errs = 0;
1299
1300
	if (aid2afi(aid, &afi, &safi) == -1)
1301
		fatalx("session_capa_add_mp: bad afi/safi pair");
1302
	afi = htons(afi);
1303
	errs += ibuf_add(buf, &afi, sizeof(afi));
1304
	errs += ibuf_add(buf, &pad, sizeof(pad));
1305
	errs += ibuf_add(buf, &safi, sizeof(safi));
1306
1307
	return (errs);
1308
}
1309
1310
int
1311
session_capa_add_gr(struct peer *p, struct ibuf *b, u_int8_t aid)
1312
{
1313
	u_int		errs = 0;
1314
	u_int16_t	afi;
1315
	u_int8_t	flags, safi;
1316
1317
	if (aid2afi(aid, &afi, &safi)) {
1318
		log_warn("session_capa_add_gr: bad AID");
1319
		return (1);
1320
	}
1321
	if (p->capa.neg.grestart.flags[aid] & CAPA_GR_RESTARTING)
1322
		flags = CAPA_GR_F_FLAG;
1323
	else
1324
		flags = 0;
1325
1326
	afi = htons(afi);
1327
	errs += ibuf_add(b, &afi, sizeof(afi));
1328
	errs += ibuf_add(b, &safi, sizeof(safi));
1329
	errs += ibuf_add(b, &flags, sizeof(flags));
1330
1331
	return (errs);
1332
}
1333
1334
struct bgp_msg *
1335
session_newmsg(enum msg_type msgtype, u_int16_t len)
1336
{
1337
	struct bgp_msg		*msg;
1338
	struct msg_header	 hdr;
1339
	struct ibuf		*buf;
1340
	int			 errs = 0;
1341
1342
	memset(&hdr.marker, 0xff, sizeof(hdr.marker));
1343
	hdr.len = htons(len);
1344
	hdr.type = msgtype;
1345
1346
	if ((buf = ibuf_open(len)) == NULL)
1347
		return (NULL);
1348
1349
	errs += ibuf_add(buf, &hdr.marker, sizeof(hdr.marker));
1350
	errs += ibuf_add(buf, &hdr.len, sizeof(hdr.len));
1351
	errs += ibuf_add(buf, &hdr.type, sizeof(hdr.type));
1352
1353
	if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) {
1354
		ibuf_free(buf);
1355
		return (NULL);
1356
	}
1357
1358
	msg->buf = buf;
1359
	msg->type = msgtype;
1360
	msg->len = len;
1361
1362
	return (msg);
1363
}
1364
1365
int
1366
session_sendmsg(struct bgp_msg *msg, struct peer *p)
1367
{
1368
	struct mrt		*mrt;
1369
1370
	LIST_FOREACH(mrt, &mrthead, entry) {
1371
		if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE &&
1372
		    mrt->type == MRT_UPDATE_OUT)))
1373
			continue;
1374
		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1375
		    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
1376
		    mrt->group_id == p->conf.groupid))
1377
			mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p);
1378
	}
1379
1380
	ibuf_close(&p->wbuf, msg->buf);
1381
	if (!p->throttled && p->wbuf.queued > SESS_MSG_HIGH_MARK) {
1382
		if (imsg_compose(ibuf_rde, IMSG_XOFF, p->conf.id, 0, -1,
1383
		    NULL, 0) == -1)
1384
			log_peer_warn(&p->conf, "imsg_compose XOFF");
1385
		p->throttled = 1;
1386
	}
1387
1388
	free(msg);
1389
	return (0);
1390
}
1391
1392
void
1393
session_open(struct peer *p)
1394
{
1395
	struct bgp_msg		*buf;
1396
	struct ibuf		*opb;
1397
	struct msg_open		 msg;
1398
	u_int16_t		 len;
1399
	u_int8_t		 i, op_type, optparamlen = 0;
1400
	int			 errs = 0;
1401
	int			 mpcapa = 0;
1402
1403
1404
	if ((opb = ibuf_dynamic(0, UCHAR_MAX - sizeof(op_type) -
1405
	    sizeof(optparamlen))) == NULL) {
1406
		bgp_fsm(p, EVNT_CON_FATAL);
1407
		return;
1408
	}
1409
1410
	/* multiprotocol extensions, RFC 4760 */
1411
	for (i = 0; i < AID_MAX; i++)
1412
		if (p->capa.ann.mp[i]) {	/* 4 bytes data */
1413
			errs += session_capa_add(opb, CAPA_MP, 4);
1414
			errs += session_capa_add_mp(opb, i);
1415
			mpcapa++;
1416
		}
1417
1418
	/* route refresh, RFC 2918 */
1419
	if (p->capa.ann.refresh)	/* no data */
1420
		errs += session_capa_add(opb, CAPA_REFRESH, 0);
1421
1422
	/* graceful restart and End-of-RIB marker, RFC 4724 */
1423
	if (p->capa.ann.grestart.restart) {
1424
		int		rst = 0;
1425
		u_int16_t	hdr;
1426
		u_int8_t	grlen;
1427
1428
		if (mpcapa) {
1429
			grlen = 2 + 4 * mpcapa;
1430
			for (i = 0; i < AID_MAX; i++) {
1431
				if (p->capa.neg.grestart.flags[i] &
1432
				    CAPA_GR_RESTARTING)
1433
					rst++;
1434
			}
1435
		} else {	/* AID_INET */
1436
			grlen = 2 + 4;
1437
			if (p->capa.neg.grestart.flags[AID_INET] &
1438
			    CAPA_GR_RESTARTING)
1439
				rst++;
1440
		}
1441
1442
		hdr = conf->holdtime;		/* default timeout */
1443
		/* if client does graceful restart don't set R flag */
1444
		if (!rst)
1445
			hdr |= CAPA_GR_R_FLAG;
1446
		hdr = htons(hdr);
1447
1448
		errs += session_capa_add(opb, CAPA_RESTART, grlen);
1449
		errs += ibuf_add(opb, &hdr, sizeof(hdr));
1450
1451
		if (mpcapa) {
1452
			for (i = 0; i < AID_MAX; i++) {
1453
				if (p->capa.ann.mp[i]) {
1454
					errs += session_capa_add_gr(p, opb, i);
1455
				}
1456
			}
1457
		} else {	/* AID_INET */
1458
			errs += session_capa_add_gr(p, opb, AID_INET);
1459
		}
1460
	}
1461
1462
	/* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */
1463
	if (p->capa.ann.as4byte) {	/* 4 bytes data */
1464
		u_int32_t	nas;
1465
1466
		nas = htonl(p->conf.local_as);
1467
		errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas));
1468
		errs += ibuf_add(opb, &nas, sizeof(nas));
1469
	}
1470
1471
	if (ibuf_size(opb))
1472
		optparamlen = ibuf_size(opb) + sizeof(op_type) +
1473
		    sizeof(optparamlen);
1474
1475
	len = MSGSIZE_OPEN_MIN + optparamlen;
1476
	if (errs || (buf = session_newmsg(OPEN, len)) == NULL) {
1477
		ibuf_free(opb);
1478
		bgp_fsm(p, EVNT_CON_FATAL);
1479
		return;
1480
	}
1481
1482
	msg.version = 4;
1483
	msg.myas = htons(p->conf.local_short_as);
1484
	if (p->conf.holdtime)
1485
		msg.holdtime = htons(p->conf.holdtime);
1486
	else
1487
		msg.holdtime = htons(conf->holdtime);
1488
	msg.bgpid = conf->bgpid;	/* is already in network byte order */
1489
	msg.optparamlen = optparamlen;
1490
1491
	errs += ibuf_add(buf->buf, &msg.version, sizeof(msg.version));
1492
	errs += ibuf_add(buf->buf, &msg.myas, sizeof(msg.myas));
1493
	errs += ibuf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime));
1494
	errs += ibuf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid));
1495
	errs += ibuf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen));
1496
1497
	if (optparamlen) {
1498
		op_type = OPT_PARAM_CAPABILITIES;
1499
		optparamlen = ibuf_size(opb);
1500
		errs += ibuf_add(buf->buf, &op_type, sizeof(op_type));
1501
		errs += ibuf_add(buf->buf, &optparamlen, sizeof(optparamlen));
1502
		errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb));
1503
	}
1504
1505
	ibuf_free(opb);
1506
1507
	if (errs) {
1508
		ibuf_free(buf->buf);
1509
		free(buf);
1510
		bgp_fsm(p, EVNT_CON_FATAL);
1511
		return;
1512
	}
1513
1514
	if (session_sendmsg(buf, p) == -1) {
1515
		bgp_fsm(p, EVNT_CON_FATAL);
1516
		return;
1517
	}
1518
1519
	p->stats.msg_sent_open++;
1520
}
1521
1522
void
1523
session_keepalive(struct peer *p)
1524
{
1525
	struct bgp_msg		*buf;
1526
1527
	if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL ||
1528
	    session_sendmsg(buf, p) == -1) {
1529
		bgp_fsm(p, EVNT_CON_FATAL);
1530
		return;
1531
	}
1532
1533
	start_timer_keepalive(p);
1534
	p->stats.msg_sent_keepalive++;
1535
}
1536
1537
void
1538
session_update(u_int32_t peerid, void *data, size_t datalen)
1539
{
1540
	struct peer		*p;
1541
	struct bgp_msg		*buf;
1542
1543
	if ((p = getpeerbyid(peerid)) == NULL) {
1544
		log_warnx("no such peer: id=%u", peerid);
1545
		return;
1546
	}
1547
1548
	if (p->state != STATE_ESTABLISHED)
1549
		return;
1550
1551
	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) {
1552
		bgp_fsm(p, EVNT_CON_FATAL);
1553
		return;
1554
	}
1555
1556
	if (ibuf_add(buf->buf, data, datalen)) {
1557
		ibuf_free(buf->buf);
1558
		free(buf);
1559
		bgp_fsm(p, EVNT_CON_FATAL);
1560
		return;
1561
	}
1562
1563
	if (session_sendmsg(buf, p) == -1) {
1564
		bgp_fsm(p, EVNT_CON_FATAL);
1565
		return;
1566
	}
1567
1568
	start_timer_keepalive(p);
1569
	p->stats.msg_sent_update++;
1570
}
1571
1572
void
1573
session_notification(struct peer *p, u_int8_t errcode, u_int8_t subcode,
1574
    void *data, ssize_t datalen)
1575
{
1576
	struct bgp_msg		*buf;
1577
	int			 errs = 0;
1578
1579
	if (p->stats.last_sent_errcode)	/* some notification already sent */
1580
		return;
1581
1582
	log_notification(p, errcode, subcode, data, datalen, "sending");
1583
1584
	if ((buf = session_newmsg(NOTIFICATION,
1585
	    MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1586
		bgp_fsm(p, EVNT_CON_FATAL);
1587
		return;
1588
	}
1589
1590
	errs += ibuf_add(buf->buf, &errcode, sizeof(errcode));
1591
	errs += ibuf_add(buf->buf, &subcode, sizeof(subcode));
1592
1593
	if (datalen > 0)
1594
		errs += ibuf_add(buf->buf, data, datalen);
1595
1596
	if (errs) {
1597
		ibuf_free(buf->buf);
1598
		free(buf);
1599
		bgp_fsm(p, EVNT_CON_FATAL);
1600
		return;
1601
	}
1602
1603
	if (session_sendmsg(buf, p) == -1) {
1604
		bgp_fsm(p, EVNT_CON_FATAL);
1605
		return;
1606
	}
1607
1608
	p->stats.msg_sent_notification++;
1609
	p->stats.last_sent_errcode = errcode;
1610
	p->stats.last_sent_suberr = subcode;
1611
}
1612
1613
int
1614
session_neighbor_rrefresh(struct peer *p)
1615
{
1616
	u_int8_t	i;
1617
1618
	if (!p->capa.peer.refresh)
1619
		return (-1);
1620
1621
	for (i = 0; i < AID_MAX; i++) {
1622
		if (p->capa.peer.mp[i] != 0)
1623
			session_rrefresh(p, i);
1624
	}
1625
1626
	return (0);
1627
}
1628
1629
void
1630
session_rrefresh(struct peer *p, u_int8_t aid)
1631
{
1632
	struct bgp_msg		*buf;
1633
	int			 errs = 0;
1634
	u_int16_t		 afi;
1635
	u_int8_t		 safi, null8 = 0;
1636
1637
	if (aid2afi(aid, &afi, &safi) == -1)
1638
		fatalx("session_rrefresh: bad afi/safi pair");
1639
1640
	if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1641
		bgp_fsm(p, EVNT_CON_FATAL);
1642
		return;
1643
	}
1644
1645
	afi = htons(afi);
1646
	errs += ibuf_add(buf->buf, &afi, sizeof(afi));
1647
	errs += ibuf_add(buf->buf, &null8, sizeof(null8));
1648
	errs += ibuf_add(buf->buf, &safi, sizeof(safi));
1649
1650
	if (errs) {
1651
		ibuf_free(buf->buf);
1652
		free(buf);
1653
		bgp_fsm(p, EVNT_CON_FATAL);
1654
		return;
1655
	}
1656
1657
	if (session_sendmsg(buf, p) == -1) {
1658
		bgp_fsm(p, EVNT_CON_FATAL);
1659
		return;
1660
	}
1661
1662
	p->stats.msg_sent_rrefresh++;
1663
}
1664
1665
int
1666
session_graceful_restart(struct peer *p)
1667
{
1668
	u_int8_t	i;
1669
1670
	timer_set(p, Timer_RestartTimeout, p->capa.neg.grestart.timeout);
1671
1672
	for (i = 0; i < AID_MAX; i++) {
1673
		if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) {
1674
			if (imsg_compose(ibuf_rde, IMSG_SESSION_STALE,
1675
			    p->conf.id, 0, -1, &i, sizeof(i)) == -1)
1676
				return (-1);
1677
			log_peer_warnx(&p->conf,
1678
			    "graceful restart of %s, keeping routes",
1679
			    aid2str(i));
1680
			p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING;
1681
		} else if (p->capa.neg.mp[i]) {
1682
			if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH,
1683
			    p->conf.id, 0, -1, &i, sizeof(i)) == -1)
1684
				return (-1);
1685
			log_peer_warnx(&p->conf,
1686
			    "graceful restart of %s, flushing routes",
1687
			    aid2str(i));
1688
		}
1689
	}
1690
	return (0);
1691
}
1692
1693
int
1694
session_graceful_stop(struct peer *p)
1695
{
1696
	u_int8_t	i;
1697
1698
	for (i = 0; i < AID_MAX; i++) {
1699
		/*
1700
		 * Only flush if the peer is restarting and the timeout fired.
1701
		 * In all other cases the session was already flushed when the
1702
		 * session went down or when the new open message was parsed.
1703
		 */
1704
		if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) {
1705
			log_peer_warnx(&p->conf, "graceful restart of %s, "
1706
			    "time-out, flushing", aid2str(i));
1707
			if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH,
1708
			    p->conf.id, 0, -1, &i, sizeof(i)) == -1)
1709
				return (-1);
1710
		}
1711
		p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING;
1712
	}
1713
	return (0);
1714
}
1715
1716
int
1717
session_dispatch_msg(struct pollfd *pfd, struct peer *p)
1718
{
1719
	ssize_t		n;
1720
	socklen_t	len;
1721
	int		error;
1722
1723
	if (p->state == STATE_CONNECT) {
1724
		if (pfd->revents & POLLOUT) {
1725
			if (pfd->revents & POLLIN) {
1726
				/* error occurred */
1727
				len = sizeof(error);
1728
				if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1729
				    &error, &len) == -1 || error) {
1730
					if (error)
1731
						errno = error;
1732
					if (errno != p->lasterr) {
1733
						log_peer_warn(&p->conf,
1734
						    "socket error");
1735
						p->lasterr = errno;
1736
					}
1737
					bgp_fsm(p, EVNT_CON_OPENFAIL);
1738
					return (1);
1739
				}
1740
			}
1741
			bgp_fsm(p, EVNT_CON_OPEN);
1742
			return (1);
1743
		}
1744
		if (pfd->revents & POLLHUP) {
1745
			bgp_fsm(p, EVNT_CON_OPENFAIL);
1746
			return (1);
1747
		}
1748
		if (pfd->revents & (POLLERR|POLLNVAL)) {
1749
			bgp_fsm(p, EVNT_CON_FATAL);
1750
			return (1);
1751
		}
1752
		return (0);
1753
	}
1754
1755
	if (pfd->revents & POLLHUP) {
1756
		bgp_fsm(p, EVNT_CON_CLOSED);
1757
		return (1);
1758
	}
1759
	if (pfd->revents & (POLLERR|POLLNVAL)) {
1760
		bgp_fsm(p, EVNT_CON_FATAL);
1761
		return (1);
1762
	}
1763
1764
	if (pfd->revents & POLLOUT && p->wbuf.queued) {
1765
		if ((error = msgbuf_write(&p->wbuf)) <= 0 && errno != EAGAIN) {
1766
			if (error == 0)
1767
				log_peer_warnx(&p->conf, "Connection closed");
1768
			else if (error == -1)
1769
				log_peer_warn(&p->conf, "write error");
1770
			bgp_fsm(p, EVNT_CON_FATAL);
1771
			return (1);
1772
		}
1773
		if (p->throttled && p->wbuf.queued < SESS_MSG_LOW_MARK) {
1774
			if (imsg_compose(ibuf_rde, IMSG_XON, p->conf.id, 0, -1,
1775
			    NULL, 0) == -1)
1776
				log_peer_warn(&p->conf, "imsg_compose XON");
1777
			p->throttled = 0;
1778
		}
1779
		if (!(pfd->revents & POLLIN))
1780
			return (1);
1781
	}
1782
1783
	if (p->rbuf && pfd->revents & POLLIN) {
1784
		if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos,
1785
		    sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) {
1786
			if (errno != EINTR && errno != EAGAIN) {
1787
				log_peer_warn(&p->conf, "read error");
1788
				bgp_fsm(p, EVNT_CON_FATAL);
1789
			}
1790
			return (1);
1791
		}
1792
		if (n == 0) {	/* connection closed */
1793
			bgp_fsm(p, EVNT_CON_CLOSED);
1794
			return (1);
1795
		}
1796
1797
		p->rbuf->wpos += n;
1798
		p->stats.last_read = time(NULL);
1799
		return (1);
1800
	}
1801
	return (0);
1802
}
1803
1804
int
1805
session_process_msg(struct peer *p)
1806
{
1807
	struct mrt	*mrt;
1808
	ssize_t		rpos, av, left;
1809
	int		processed = 0;
1810
	u_int16_t	msglen;
1811
	u_int8_t	msgtype;
1812
1813
	rpos = 0;
1814
	av = p->rbuf->wpos;
1815
1816
	/*
1817
	 * session might drop to IDLE -> buffers deallocated
1818
	 * we MUST check rbuf != NULL before use
1819
	 */
1820
	for (;;) {
1821
		if (rpos + MSGSIZE_HEADER > av)
1822
			break;
1823
		if (p->rbuf == NULL)
1824
			break;
1825
		if (parse_header(p, p->rbuf->buf + rpos, &msglen,
1826
		    &msgtype) == -1)
1827
			return (0);
1828
		if (rpos + msglen > av)
1829
			break;
1830
		p->rbuf->rptr = p->rbuf->buf + rpos;
1831
1832
		/* dump to MRT as soon as we have a full packet */
1833
		LIST_FOREACH(mrt, &mrthead, entry) {
1834
			if (!(mrt->type == MRT_ALL_IN || (msgtype == UPDATE &&
1835
			    mrt->type == MRT_UPDATE_IN)))
1836
				continue;
1837
			if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1838
			    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
1839
			    mrt->group_id == p->conf.groupid))
1840
				mrt_dump_bgp_msg(mrt, p->rbuf->rptr, msglen, p);
1841
		}
1842
1843
		switch (msgtype) {
1844
		case OPEN:
1845
			bgp_fsm(p, EVNT_RCVD_OPEN);
1846
			p->stats.msg_rcvd_open++;
1847
			break;
1848
		case UPDATE:
1849
			bgp_fsm(p, EVNT_RCVD_UPDATE);
1850
			p->stats.msg_rcvd_update++;
1851
			break;
1852
		case NOTIFICATION:
1853
			bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
1854
			p->stats.msg_rcvd_notification++;
1855
			break;
1856
		case KEEPALIVE:
1857
			bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
1858
			p->stats.msg_rcvd_keepalive++;
1859
			break;
1860
		case RREFRESH:
1861
			parse_refresh(p);
1862
			p->stats.msg_rcvd_rrefresh++;
1863
			break;
1864
		default:	/* cannot happen */
1865
			session_notification(p, ERR_HEADER, ERR_HDR_TYPE,
1866
			    &msgtype, 1);
1867
			log_warnx("received message with unknown type %u",
1868
			    msgtype);
1869
			bgp_fsm(p, EVNT_CON_FATAL);
1870
		}
1871
		rpos += msglen;
1872
		if (++processed > MSG_PROCESS_LIMIT)
1873
			break;
1874
	}
1875
	if (p->rbuf == NULL)
1876
		return (1);
1877
1878
	if (rpos < av) {
1879
		left = av - rpos;
1880
		memmove(&p->rbuf->buf, p->rbuf->buf + rpos, left);
1881
		p->rbuf->wpos = left;
1882
	} else
1883
		p->rbuf->wpos = 0;
1884
1885
	return (1);
1886
}
1887
1888
int
1889
parse_header(struct peer *peer, u_char *data, u_int16_t *len, u_int8_t *type)
1890
{
1891
	u_char			*p;
1892
	u_int16_t		 olen;
1893
	static const u_int8_t	 marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff,
1894
				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1895
				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
1896
1897
	/* caller MUST make sure we are getting 19 bytes! */
1898
	p = data;
1899
	if (memcmp(p, marker, sizeof(marker))) {
1900
		log_peer_warnx(&peer->conf, "sync error");
1901
		session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0);
1902
		bgp_fsm(peer, EVNT_CON_FATAL);
1903
		return (-1);
1904
	}
1905
	p += MSGSIZE_HEADER_MARKER;
1906
1907
	memcpy(&olen, p, 2);
1908
	*len = ntohs(olen);
1909
	p += 2;
1910
	memcpy(type, p, 1);
1911
1912
	if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) {
1913
		log_peer_warnx(&peer->conf,
1914
		    "received message: illegal length: %u byte", *len);
1915
		session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1916
		    &olen, sizeof(olen));
1917
		bgp_fsm(peer, EVNT_CON_FATAL);
1918
		return (-1);
1919
	}
1920
1921
	switch (*type) {
1922
	case OPEN:
1923
		if (*len < MSGSIZE_OPEN_MIN) {
1924
			log_peer_warnx(&peer->conf,
1925
			    "received OPEN: illegal len: %u byte", *len);
1926
			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1927
			    &olen, sizeof(olen));
1928
			bgp_fsm(peer, EVNT_CON_FATAL);
1929
			return (-1);
1930
		}
1931
		break;
1932
	case NOTIFICATION:
1933
		if (*len < MSGSIZE_NOTIFICATION_MIN) {
1934
			log_peer_warnx(&peer->conf,
1935
			    "received NOTIFICATION: illegal len: %u byte",
1936
			    *len);
1937
			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1938
			    &olen, sizeof(olen));
1939
			bgp_fsm(peer, EVNT_CON_FATAL);
1940
			return (-1);
1941
		}
1942
		break;
1943
	case UPDATE:
1944
		if (*len < MSGSIZE_UPDATE_MIN) {
1945
			log_peer_warnx(&peer->conf,
1946
			    "received UPDATE: illegal len: %u byte", *len);
1947
			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1948
			    &olen, sizeof(olen));
1949
			bgp_fsm(peer, EVNT_CON_FATAL);
1950
			return (-1);
1951
		}
1952
		break;
1953
	case KEEPALIVE:
1954
		if (*len != MSGSIZE_KEEPALIVE) {
1955
			log_peer_warnx(&peer->conf,
1956
			    "received KEEPALIVE: illegal len: %u byte", *len);
1957
			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1958
			    &olen, sizeof(olen));
1959
			bgp_fsm(peer, EVNT_CON_FATAL);
1960
			return (-1);
1961
		}
1962
		break;
1963
	case RREFRESH:
1964
		if (*len != MSGSIZE_RREFRESH) {
1965
			log_peer_warnx(&peer->conf,
1966
			    "received RREFRESH: illegal len: %u byte", *len);
1967
			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1968
			    &olen, sizeof(olen));
1969
			bgp_fsm(peer, EVNT_CON_FATAL);
1970
			return (-1);
1971
		}
1972
		break;
1973
	default:
1974
		log_peer_warnx(&peer->conf,
1975
		    "received msg with unknown type %u", *type);
1976
		session_notification(peer, ERR_HEADER, ERR_HDR_TYPE,
1977
		    type, 1);
1978
		bgp_fsm(peer, EVNT_CON_FATAL);
1979
		return (-1);
1980
	}
1981
	return (0);
1982
}
1983
1984
int
1985
parse_open(struct peer *peer)
1986
{
1987
	u_char		*p, *op_val;
1988
	u_int8_t	 version, rversion;
1989
	u_int16_t	 short_as, msglen;
1990
	u_int16_t	 holdtime, oholdtime, myholdtime;
1991
	u_int32_t	 as, bgpid;
1992
	u_int8_t	 optparamlen, plen;
1993
	u_int8_t	 op_type, op_len;
1994
1995
	p = peer->rbuf->rptr;
1996
	p += MSGSIZE_HEADER_MARKER;
1997
	memcpy(&msglen, p, sizeof(msglen));
1998
	msglen = ntohs(msglen);
1999
2000
	p = peer->rbuf->rptr;
2001
	p += MSGSIZE_HEADER;	/* header is already checked */
2002
2003
	memcpy(&version, p, sizeof(version));
2004
	p += sizeof(version);
2005
2006
	if (version != BGP_VERSION) {
2007
		log_peer_warnx(&peer->conf,
2008
		    "peer wants unrecognized version %u", version);
2009
		if (version > BGP_VERSION)
2010
			rversion = version - BGP_VERSION;
2011
		else
2012
			rversion = BGP_VERSION;
2013
		session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION,
2014
		    &rversion, sizeof(rversion));
2015
		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2016
		return (-1);
2017
	}
2018
2019
	memcpy(&short_as, p, sizeof(short_as));
2020
	p += sizeof(short_as);
2021
	as = peer->short_as = ntohs(short_as);
2022
	if (as == 0) {
2023
		log_peer_warnx(&peer->conf,
2024
		    "peer requests unacceptable AS %u", as);
2025
		session_notification(peer, ERR_OPEN, ERR_OPEN_AS,
2026
		    NULL, 0);
2027
		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2028
		return (-1);
2029
	}
2030
2031
	memcpy(&oholdtime, p, sizeof(oholdtime));
2032
	p += sizeof(oholdtime);
2033
2034
	holdtime = ntohs(oholdtime);
2035
	if (holdtime && holdtime < peer->conf.min_holdtime) {
2036
		log_peer_warnx(&peer->conf,
2037
		    "peer requests unacceptable holdtime %u", holdtime);
2038
		session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME,
2039
		    NULL, 0);
2040
		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2041
		return (-1);
2042
	}
2043
2044
	myholdtime = peer->conf.holdtime;
2045
	if (!myholdtime)
2046
		myholdtime = conf->holdtime;
2047
	if (holdtime < myholdtime)
2048
		peer->holdtime = holdtime;
2049
	else
2050
		peer->holdtime = myholdtime;
2051
2052
	memcpy(&bgpid, p, sizeof(bgpid));
2053
	p += sizeof(bgpid);
2054
2055
	/* check bgpid for validity - just disallow 0 */
2056
	if (ntohl(bgpid) == 0) {
2057
		log_peer_warnx(&peer->conf, "peer BGPID %u unacceptable",
2058
		    ntohl(bgpid));
2059
		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID,
2060
		    NULL, 0);
2061
		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2062
		return (-1);
2063
	}
2064
	peer->remote_bgpid = bgpid;
2065
2066
	memcpy(&optparamlen, p, sizeof(optparamlen));
2067
	p += sizeof(optparamlen);
2068
2069
	if (optparamlen != msglen - MSGSIZE_OPEN_MIN) {
2070
			log_peer_warnx(&peer->conf,
2071
			    "corrupt OPEN message received: length mismatch");
2072
			session_notification(peer, ERR_OPEN, 0, NULL, 0);
2073
			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2074
			return (-1);
2075
	}
2076
2077
	plen = optparamlen;
2078
	while (plen > 0) {
2079
		if (plen < 2) {
2080
			log_peer_warnx(&peer->conf,
2081
			    "corrupt OPEN message received, len wrong");
2082
			session_notification(peer, ERR_OPEN, 0, NULL, 0);
2083
			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2084
			return (-1);
2085
		}
2086
		memcpy(&op_type, p, sizeof(op_type));
2087
		p += sizeof(op_type);
2088
		plen -= sizeof(op_type);
2089
		memcpy(&op_len, p, sizeof(op_len));
2090
		p += sizeof(op_len);
2091
		plen -= sizeof(op_len);
2092
		if (op_len > 0) {
2093
			if (plen < op_len) {
2094
				log_peer_warnx(&peer->conf,
2095
				    "corrupt OPEN message received, len wrong");
2096
				session_notification(peer, ERR_OPEN, 0,
2097
				    NULL, 0);
2098
				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2099
				return (-1);
2100
			}
2101
			op_val = p;
2102
			p += op_len;
2103
			plen -= op_len;
2104
		} else
2105
			op_val = NULL;
2106
2107
		switch (op_type) {
2108
		case OPT_PARAM_CAPABILITIES:		/* RFC 3392 */
2109
			if (parse_capabilities(peer, op_val, op_len,
2110
			    &as) == -1) {
2111
				session_notification(peer, ERR_OPEN, 0,
2112
				    NULL, 0);
2113
				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2114
				return (-1);
2115
			}
2116
			break;
2117
		case OPT_PARAM_AUTH:			/* deprecated */
2118
		default:
2119
			/*
2120
			 * unsupported type
2121
			 * the RFCs tell us to leave the data section empty
2122
			 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT.
2123
			 * How the peer should know _which_ optional parameter
2124
			 * we don't support is beyond me.
2125
			 */
2126
			log_peer_warnx(&peer->conf,
2127
			    "received OPEN message with unsupported optional "
2128
			    "parameter: type %u", op_type);
2129
			session_notification(peer, ERR_OPEN, ERR_OPEN_OPT,
2130
				NULL, 0);
2131
			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2132
			timer_set(peer, Timer_IdleHold, 0);	/* no punish */
2133
			peer->IdleHoldTime /= 2;
2134
			return (-1);
2135
		}
2136
	}
2137
2138
	/* if remote-as is zero and it's a cloned neighbor, accept any */
2139
	if (peer->template && !peer->conf.remote_as && as != AS_TRANS) {
2140
		peer->conf.remote_as = as;
2141
		peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as);
2142
		if (!peer->conf.ebgp)
2143
			/* force enforce_as off for iBGP sessions */
2144
			peer->conf.enforce_as = ENFORCE_AS_OFF;
2145
	}
2146
2147
	if (peer->conf.remote_as != as) {
2148
		log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
2149
		    log_as(as));
2150
		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0);
2151
		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2152
		return (-1);
2153
	}
2154
2155
	if (capa_neg_calc(peer) == -1) {
2156
		log_peer_warnx(&peer->conf,
2157
		    "capability negotiation calculation failed");
2158
		session_notification(peer, ERR_OPEN, 0, NULL, 0);
2159
		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2160
		return (-1);
2161
	}
2162
2163
	return (0);
2164
}
2165
2166
int
2167
parse_update(struct peer *peer)
2168
{
2169
	u_char		*p;
2170
	u_int16_t	 datalen;
2171
2172
	/*
2173
	 * we pass the message verbatim to the rde.
2174
	 * in case of errors the whole session is reset with a
2175
	 * notification anyway, we only need to know the peer
2176
	 */
2177
	p = peer->rbuf->rptr;
2178
	p += MSGSIZE_HEADER_MARKER;
2179
	memcpy(&datalen, p, sizeof(datalen));
2180
	datalen = ntohs(datalen);
2181
2182
	p = peer->rbuf->rptr;
2183
	p += MSGSIZE_HEADER;	/* header is already checked */
2184
	datalen -= MSGSIZE_HEADER;
2185
2186
	if (imsg_compose(ibuf_rde, IMSG_UPDATE, peer->conf.id, 0, -1, p,
2187
	    datalen) == -1)
2188
		return (-1);
2189
2190
	return (0);
2191
}
2192
2193
int
2194
parse_refresh(struct peer *peer)
2195
{
2196
	u_char		*p;
2197
	u_int16_t	 afi;
2198
	u_int8_t	 aid, safi;
2199
2200
	p = peer->rbuf->rptr;
2201
	p += MSGSIZE_HEADER;	/* header is already checked */
2202
2203
	/*
2204
	 * We could check if we actually announced the capability but
2205
	 * as long as the message is correctly encoded we don't care.
2206
	 */
2207
2208
	/* afi, 2 byte */
2209
	memcpy(&afi, p, sizeof(afi));
2210
	afi = ntohs(afi);
2211
	p += 2;
2212
	/* reserved, 1 byte */
2213
	p += 1;
2214
	/* safi, 1 byte */
2215
	memcpy(&safi, p, sizeof(safi));
2216
2217
	/* afi/safi unchecked -	unrecognized values will be ignored anyway */
2218
	if (afi2aid(afi, safi, &aid) == -1) {
2219
		log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2220
		    "invalid afi/safi pair");
2221
		return (0);
2222
	}
2223
2224
	if (imsg_compose(ibuf_rde, IMSG_REFRESH, peer->conf.id, 0, -1, &aid,
2225
	    sizeof(aid)) == -1)
2226
		return (-1);
2227
2228
	return (0);
2229
}
2230
2231
int
2232
parse_notification(struct peer *peer)
2233
{
2234
	u_char		*p;
2235
	u_int16_t	 datalen;
2236
	u_int8_t	 errcode;
2237
	u_int8_t	 subcode;
2238
	u_int8_t	 capa_code;
2239
	u_int8_t	 capa_len;
2240
	u_int8_t	 shutcomm_len;
2241
	u_int8_t	 i;
2242
2243
	/* just log */
2244
	p = peer->rbuf->rptr;
2245
	p += MSGSIZE_HEADER_MARKER;
2246
	memcpy(&datalen, p, sizeof(datalen));
2247
	datalen = ntohs(datalen);
2248
2249
	p = peer->rbuf->rptr;
2250
	p += MSGSIZE_HEADER;	/* header is already checked */
2251
	datalen -= MSGSIZE_HEADER;
2252
2253
	memcpy(&errcode, p, sizeof(errcode));
2254
	p += sizeof(errcode);
2255
	datalen -= sizeof(errcode);
2256
2257
	memcpy(&subcode, p, sizeof(subcode));
2258
	p += sizeof(subcode);
2259
	datalen -= sizeof(subcode);
2260
2261
	log_notification(peer, errcode, subcode, p, datalen, "received");
2262
	peer->errcnt++;
2263
2264
	if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) {
2265
		if (datalen == 0) {	/* zebra likes to send those.. humbug */
2266
			log_peer_warnx(&peer->conf, "received \"unsupported "
2267
			    "capability\" notification without data part, "
2268
			    "disabling capability announcements altogether");
2269
			session_capa_ann_none(peer);
2270
		}
2271
2272
		while (datalen > 0) {
2273
			if (datalen < 2) {
2274
				log_peer_warnx(&peer->conf,
2275
				    "parse_notification: "
2276
				    "expect len >= 2, len is %u", datalen);
2277
				return (-1);
2278
			}
2279
			memcpy(&capa_code, p, sizeof(capa_code));
2280
			p += sizeof(capa_code);
2281
			datalen -= sizeof(capa_code);
2282
			memcpy(&capa_len, p, sizeof(capa_len));
2283
			p += sizeof(capa_len);
2284
			datalen -= sizeof(capa_len);
2285
			if (datalen < capa_len) {
2286
				log_peer_warnx(&peer->conf,
2287
				    "parse_notification: capa_len %u exceeds "
2288
				    "remaining msg length %u", capa_len,
2289
				    datalen);
2290
				return (-1);
2291
			}
2292
			p += capa_len;
2293
			datalen -= capa_len;
2294
			switch (capa_code) {
2295
			case CAPA_MP:
2296
				for (i = 0; i < AID_MAX; i++)
2297
					peer->capa.ann.mp[i] = 0;
2298
				log_peer_warnx(&peer->conf,
2299
				    "disabling multiprotocol capability");
2300
				break;
2301
			case CAPA_REFRESH:
2302
				peer->capa.ann.refresh = 0;
2303
				log_peer_warnx(&peer->conf,
2304
				    "disabling route refresh capability");
2305
				break;
2306
			case CAPA_RESTART:
2307
				peer->capa.ann.grestart.restart = 0;
2308
				log_peer_warnx(&peer->conf,
2309
				    "disabling restart capability");
2310
				break;
2311
			case CAPA_AS4BYTE:
2312
				peer->capa.ann.as4byte = 0;
2313
				log_peer_warnx(&peer->conf,
2314
				    "disabling 4-byte AS num capability");
2315
				break;
2316
			default:	/* should not happen... */
2317
				log_peer_warnx(&peer->conf, "received "
2318
				    "\"unsupported capability\" notification "
2319
				    "for unknown capability %u, disabling "
2320
				    "capability announcements altogether",
2321
				    capa_code);
2322
				session_capa_ann_none(peer);
2323
				break;
2324
			}
2325
		}
2326
2327
		return (1);
2328
	}
2329
2330
	if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) {
2331
		session_capa_ann_none(peer);
2332
		return (1);
2333
	}
2334
2335
	if (errcode == ERR_CEASE &&
2336
	    (subcode == ERR_CEASE_ADMIN_DOWN ||
2337
	     subcode == ERR_CEASE_ADMIN_RESET)) {
2338
		if (datalen >= sizeof(shutcomm_len)) {
2339
			memcpy(&shutcomm_len, p, sizeof(shutcomm_len));
2340
			p += sizeof(shutcomm_len);
2341
			datalen -= sizeof(shutcomm_len);
2342
			if(datalen < shutcomm_len) {
2343
			    log_peer_warnx(&peer->conf,
2344
				"received truncated shutdown reason");
2345
			    return (0);
2346
			}
2347
			if (shutcomm_len > (SHUT_COMM_LEN-1)) {
2348
			    log_peer_warnx(&peer->conf,
2349
				"received overly long shutdown reason");
2350
			    return (0);
2351
			}
2352
			memcpy(peer->stats.last_shutcomm, p, shutcomm_len);
2353
			peer->stats.last_shutcomm[shutcomm_len] = '\0';
2354
			log_peer_warnx(&peer->conf,
2355
			    "received shutdown reason: \"%s\"",
2356
			    log_shutcomm(peer->stats.last_shutcomm));
2357
			p += shutcomm_len;
2358
			datalen -= shutcomm_len;
2359
		}
2360
	}
2361
2362
	return (0);
2363
}
2364
2365
int
2366
parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as)
2367
{
2368
	u_char		*capa_val;
2369
	u_int32_t	 remote_as;
2370
	u_int16_t	 len;
2371
	u_int16_t	 afi;
2372
	u_int16_t	 gr_header;
2373
	u_int8_t	 safi;
2374
	u_int8_t	 aid;
2375
	u_int8_t	 gr_flags;
2376
	u_int8_t	 capa_code;
2377
	u_int8_t	 capa_len;
2378
	u_int8_t	 i;
2379
2380
	len = dlen;
2381
	while (len > 0) {
2382
		if (len < 2) {
2383
			log_peer_warnx(&peer->conf, "Bad capabilities attr "
2384
			    "length: %u, too short", len);
2385
			return (-1);
2386
		}
2387
		memcpy(&capa_code, d, sizeof(capa_code));
2388
		d += sizeof(capa_code);
2389
		len -= sizeof(capa_code);
2390
		memcpy(&capa_len, d, sizeof(capa_len));
2391
		d += sizeof(capa_len);
2392
		len -= sizeof(capa_len);
2393
		if (capa_len > 0) {
2394
			if (len < capa_len) {
2395
				log_peer_warnx(&peer->conf,
2396
				    "Bad capabilities attr length: "
2397
				    "len %u smaller than capa_len %u",
2398
				    len, capa_len);
2399
				return (-1);
2400
			}
2401
			capa_val = d;
2402
			d += capa_len;
2403
			len -= capa_len;
2404
		} else
2405
			capa_val = NULL;
2406
2407
		switch (capa_code) {
2408
		case CAPA_MP:			/* RFC 4760 */
2409
			if (capa_len != 4) {
2410
				log_peer_warnx(&peer->conf,
2411
				    "Bad multi protocol capability length: "
2412
				    "%u", capa_len);
2413
				break;
2414
			}
2415
			memcpy(&afi, capa_val, sizeof(afi));
2416
			afi = ntohs(afi);
2417
			memcpy(&safi, capa_val + 3, sizeof(safi));
2418
			if (afi2aid(afi, safi, &aid) == -1) {
2419
				log_peer_warnx(&peer->conf,
2420
				    "Received multi protocol capability: "
2421
				    " unknown AFI %u, safi %u pair",
2422
				    afi, safi);
2423
				break;
2424
			}
2425
			peer->capa.peer.mp[aid] = 1;
2426
			break;
2427
		case CAPA_REFRESH:
2428
			peer->capa.peer.refresh = 1;
2429
			break;
2430
		case CAPA_RESTART:
2431
			if (capa_len == 2) {
2432
				/* peer only supports EoR marker */
2433
				peer->capa.peer.grestart.restart = 1;
2434
				peer->capa.peer.grestart.timeout = 0;
2435
				break;
2436
			} else if (capa_len % 4 != 2) {
2437
				log_peer_warnx(&peer->conf,
2438
				    "Bad graceful restart capability length: "
2439
				    "%u", capa_len);
2440
				peer->capa.peer.grestart.restart = 0;
2441
				peer->capa.peer.grestart.timeout = 0;
2442
				break;
2443
			}
2444
2445
			memcpy(&gr_header, capa_val, sizeof(gr_header));
2446
			gr_header = ntohs(gr_header);
2447
			peer->capa.peer.grestart.timeout =
2448
			    gr_header & CAPA_GR_TIMEMASK;
2449
			if (peer->capa.peer.grestart.timeout == 0) {
2450
				log_peer_warnx(&peer->conf, "Received "
2451
				    "graceful restart timeout is zero");
2452
				peer->capa.peer.grestart.restart = 0;
2453
				break;
2454
			}
2455
2456
			for (i = 2; i <= capa_len - 4; i += 4) {
2457
				memcpy(&afi, capa_val + i, sizeof(afi));
2458
				afi = ntohs(afi);
2459
				memcpy(&safi, capa_val + i + 2, sizeof(safi));
2460
				if (afi2aid(afi, safi, &aid) == -1) {
2461
					log_peer_warnx(&peer->conf,
2462
					    "Received graceful restart capa: "
2463
					    " unknown AFI %u, safi %u pair",
2464
					    afi, safi);
2465
					continue;
2466
				}
2467
				memcpy(&gr_flags, capa_val + i + 3,
2468
				    sizeof(gr_flags));
2469
				peer->capa.peer.grestart.flags[aid] |=
2470
				    CAPA_GR_PRESENT;
2471
				if (gr_flags & CAPA_GR_F_FLAG)
2472
					peer->capa.peer.grestart.flags[aid] |=
2473
					    CAPA_GR_FORWARD;
2474
				if (gr_header & CAPA_GR_R_FLAG)
2475
					peer->capa.peer.grestart.flags[aid] |=
2476
					    CAPA_GR_RESTART;
2477
				peer->capa.peer.grestart.restart = 2;
2478
			}
2479
			break;
2480
		case CAPA_AS4BYTE:
2481
			if (capa_len != 4) {
2482
				log_peer_warnx(&peer->conf,
2483
				    "Bad AS4BYTE capability length: "
2484
				    "%u", capa_len);
2485
				peer->capa.peer.as4byte = 0;
2486
				break;
2487
			}
2488
			memcpy(&remote_as, capa_val, sizeof(remote_as));
2489
			*as = ntohl(remote_as);
2490
			if (*as == 0) {
2491
				log_peer_warnx(&peer->conf,
2492
				    "peer requests unacceptable AS %u", *as);
2493
				session_notification(peer, ERR_OPEN, ERR_OPEN_AS,
2494
				    NULL, 0);
2495
				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2496
				return (-1);
2497
			}
2498
			peer->capa.peer.as4byte = 1;
2499
			break;
2500
		default:
2501
			break;
2502
		}
2503
	}
2504
2505
	return (0);
2506
}
2507
2508
int
2509
capa_neg_calc(struct peer *p)
2510
{
2511
	u_int8_t	i, hasmp = 0;
2512
2513
	/* refresh: does not realy matter here, use peer setting */
2514
	p->capa.neg.refresh = p->capa.peer.refresh;
2515
2516
	/* as4byte: both side must announce capability */
2517
	if (p->capa.ann.as4byte && p->capa.peer.as4byte)
2518
		p->capa.neg.as4byte = 1;
2519
	else
2520
		p->capa.neg.as4byte = 0;
2521
2522
	/* MP: both side must announce capability */
2523
	for (i = 0; i < AID_MAX; i++) {
2524
		if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) {
2525
			p->capa.neg.mp[i] = 1;
2526
			hasmp = 1;
2527
		} else
2528
			p->capa.neg.mp[i] = 0;
2529
	}
2530
	/* if no MP capability present default to IPv4 unicast mode */
2531
	if (!hasmp)
2532
		p->capa.neg.mp[AID_INET] = 1;
2533
2534
	/*
2535
	 * graceful restart: only the peer capabilities are of interest here.
2536
	 * It is necessary to compare the new values with the previous ones
2537
	 * and act acordingly. AFI/SAFI that are not part in the MP capability
2538
	 * are treated as not being present.
2539
	 */
2540
2541
	for (i = 0; i < AID_MAX; i++) {
2542
		int8_t	negflags;
2543
2544
		/* disable GR if the AFI/SAFI is not present */
2545
		if (p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT &&
2546
		    p->capa.neg.mp[i] == 0)
2547
			p->capa.peer.grestart.flags[i] = 0;	/* disable */
2548
		/* look at current GR state and decide what to do */
2549
		negflags = p->capa.neg.grestart.flags[i];
2550
		p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i];
2551
		if (negflags & CAPA_GR_RESTARTING) {
2552
			if (!(p->capa.peer.grestart.flags[i] &
2553
			    CAPA_GR_FORWARD)) {
2554
				if (imsg_compose(ibuf_rde, IMSG_SESSION_FLUSH,
2555
				    p->conf.id, 0, -1, &i, sizeof(i)) == -1)
2556
					return (-1);
2557
				log_peer_warnx(&p->conf, "graceful restart of "
2558
				    "%s, not restarted, flushing", aid2str(i));
2559
			} else
2560
				p->capa.neg.grestart.flags[i] |=
2561
				    CAPA_GR_RESTARTING;
2562
		}
2563
	}
2564
	p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout;
2565
	p->capa.neg.grestart.restart = p->capa.peer.grestart.restart;
2566
2567
	return (0);
2568
}
2569
2570
void
2571
session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt)
2572
{
2573
	struct imsg		 imsg;
2574
	struct mrt		 xmrt;
2575
	struct mrt		*mrt;
2576
	struct imsgbuf		*i;
2577
	struct peer_config	*pconf;
2578
	struct peer		*p, *next;
2579
	struct listen_addr	*la, *nla;
2580
	struct kif		*kif;
2581
	u_char			*data;
2582
	enum reconf_action	 reconf;
2583
	int			 n, fd, depend_ok, restricted;
2584
	u_int8_t		 aid, errcode, subcode;
2585
2586
	while (ibuf) {
2587
		if ((n = imsg_get(ibuf, &imsg)) == -1)
2588
			fatal("session_dispatch_imsg: imsg_get error");
2589
2590
		if (n == 0)
2591
			break;
2592
2593
		switch (imsg.hdr.type) {
2594
		case IMSG_SOCKET_CONN:
2595
		case IMSG_SOCKET_CONN_CTL:
2596
			if (idx != PFD_PIPE_MAIN)
2597
				fatalx("reconf request not from parent");
2598
			if ((fd = imsg.fd) == -1) {
2599
				log_warnx("expected to receive imsg fd to "
2600
				    "RDE but didn't receive any");
2601
				break;
2602
			}
2603
			if ((i = malloc(sizeof(struct imsgbuf))) == NULL)
2604
				fatal(NULL);
2605
			imsg_init(i, fd);
2606
			if (imsg.hdr.type == IMSG_SOCKET_CONN) {
2607
				if (ibuf_rde) {
2608
					log_warnx("Unexpected imsg connection "
2609
					    "to RDE received");
2610
					msgbuf_clear(&ibuf_rde->w);
2611
					free(ibuf_rde);
2612
				}
2613
				ibuf_rde = i;
2614
			} else {
2615
				if (ibuf_rde_ctl) {
2616
					log_warnx("Unexpected imsg ctl "
2617
					    "connection to RDE received");
2618
					msgbuf_clear(&ibuf_rde_ctl->w);
2619
					free(ibuf_rde_ctl);
2620
				}
2621
				ibuf_rde_ctl = i;
2622
			}
2623
			break;
2624
		case IMSG_RECONF_CONF:
2625
			if (idx != PFD_PIPE_MAIN)
2626
				fatalx("reconf request not from parent");
2627
			if ((nconf = malloc(sizeof(struct bgpd_config))) ==
2628
			    NULL)
2629
				fatal(NULL);
2630
			memcpy(nconf, imsg.data, sizeof(struct bgpd_config));
2631
			if ((nconf->listen_addrs = calloc(1,
2632
			    sizeof(struct listen_addrs))) == NULL)
2633
				fatal(NULL);
2634
			TAILQ_INIT(nconf->listen_addrs);
2635
			npeers = NULL;
2636
			init_conf(nconf);
2637
			pending_reconf = 1;
2638
			break;
2639
		case IMSG_RECONF_PEER:
2640
			if (idx != PFD_PIPE_MAIN)
2641
				fatalx("reconf request not from parent");
2642
			pconf = imsg.data;
2643
			p = getpeerbyaddr(&pconf->remote_addr);
2644
			if (p == NULL) {
2645
				if ((p = calloc(1, sizeof(struct peer))) ==
2646
				    NULL)
2647
					fatal("new_peer");
2648
				p->state = p->prev_state = STATE_NONE;
2649
				p->next = npeers;
2650
				npeers = p;
2651
				reconf = RECONF_REINIT;
2652
			} else
2653
				reconf = RECONF_KEEP;
2654
2655
			memcpy(&p->conf, pconf, sizeof(struct peer_config));
2656
			p->conf.reconf_action = reconf;
2657
2658
			/* sync the RDE in case we keep the peer */
2659
			if (reconf == RECONF_KEEP) {
2660
				if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD,
2661
				    p->conf.id, 0, -1, &p->conf,
2662
				    sizeof(struct peer_config)) == -1)
2663
					fatalx("imsg_compose error");
2664
				if (p->conf.template) {
2665
					/* apply the conf to all clones */
2666
					struct peer *np;
2667
					for (np = peers; np; np = np->next) {
2668
						if (np->template != p)
2669
							continue;
2670
						session_template_clone(np,
2671
						    NULL, np->conf.id,
2672
						    np->conf.remote_as);
2673
						if (imsg_compose(ibuf_rde,
2674
						    IMSG_SESSION_ADD,
2675
						    np->conf.id, 0, -1,
2676
						    &np->conf,
2677
						    sizeof(struct peer_config))
2678
						    == -1)
2679
							fatalx("imsg_compose error");
2680
					}
2681
				}
2682
			}
2683
			break;
2684
		case IMSG_RECONF_LISTENER:
2685
			if (idx != PFD_PIPE_MAIN)
2686
				fatalx("reconf request not from parent");
2687
			if (nconf == NULL)
2688
				fatalx("IMSG_RECONF_LISTENER but no config");
2689
			nla = imsg.data;
2690
			TAILQ_FOREACH(la, conf->listen_addrs, entry)
2691
				if (!la_cmp(la, nla))
2692
					break;
2693
2694
			if (la == NULL) {
2695
				if (nla->reconf != RECONF_REINIT)
2696
					fatalx("king bula sez: "
2697
					    "expected REINIT");
2698
2699
				if ((nla->fd = imsg.fd) == -1)
2700
					log_warnx("expected to receive fd for "
2701
					    "%s but didn't receive any",
2702
					    log_sockaddr((struct sockaddr *)
2703
					    &nla->sa));
2704
2705
				la = calloc(1, sizeof(struct listen_addr));
2706
				if (la == NULL)
2707
					fatal(NULL);
2708
				memcpy(&la->sa, &nla->sa, sizeof(la->sa));
2709
				la->flags = nla->flags;
2710
				la->fd = nla->fd;
2711
				la->reconf = RECONF_REINIT;
2712
				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
2713
				    entry);
2714
			} else {
2715
				if (nla->reconf != RECONF_KEEP)
2716
					fatalx("king bula sez: expected KEEP");
2717
				la->reconf = RECONF_KEEP;
2718
			}
2719
2720
			break;
2721
		case IMSG_RECONF_CTRL:
2722
			if (idx != PFD_PIPE_MAIN)
2723
				fatalx("reconf request not from parent");
2724
			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2725
			    sizeof(restricted))
2726
				fatalx("IFINFO imsg with wrong len");
2727
			memcpy(&restricted, imsg.data, sizeof(restricted));
2728
			if (imsg.fd == -1) {
2729
				log_warnx("expected to receive fd for control "
2730
				    "socket but didn't receive any");
2731
				break;
2732
			}
2733
			if (restricted) {
2734
				control_shutdown(rcsock);
2735
				rcsock = imsg.fd;
2736
			} else {
2737
				control_shutdown(csock);
2738
				csock = imsg.fd;
2739
			}
2740
			break;
2741
		case IMSG_RECONF_DONE:
2742
			if (idx != PFD_PIPE_MAIN)
2743
				fatalx("reconf request not from parent");
2744
			if (nconf == NULL)
2745
				fatalx("got IMSG_RECONF_DONE but no config");
2746
			conf->flags = nconf->flags;
2747
			conf->log = nconf->log;
2748
			conf->bgpid = nconf->bgpid;
2749
			conf->clusterid = nconf->clusterid;
2750
			conf->as = nconf->as;
2751
			conf->short_as = nconf->short_as;
2752
			conf->holdtime = nconf->holdtime;
2753
			conf->min_holdtime = nconf->min_holdtime;
2754
			conf->connectretry = nconf->connectretry;
2755
2756
			/* add new peers */
2757
			for (p = npeers; p != NULL; p = next) {
2758
				next = p->next;
2759
				p->next = peers;
2760
				peers = p;
2761
			}
2762
			/* find ones that need attention */
2763
			for (p = peers; p != NULL; p = p->next) {
2764
				/* needs to be deleted? */
2765
				if (p->conf.reconf_action == RECONF_NONE &&
2766
				    !p->template)
2767
					p->conf.reconf_action = RECONF_DELETE;
2768
				/* had demotion, is demoted, demote removed? */
2769
				if (p->demoted && !p->conf.demote_group[0])
2770
						session_demote(p, -1);
2771
			}
2772
2773
			/* delete old listeners */
2774
			for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL;
2775
			    la = nla) {
2776
				nla = TAILQ_NEXT(la, entry);
2777
				if (la->reconf == RECONF_NONE) {
2778
					log_info("not listening on %s any more",
2779
					    log_sockaddr(
2780
					    (struct sockaddr *)&la->sa));
2781
					TAILQ_REMOVE(conf->listen_addrs, la,
2782
					    entry);
2783
					close(la->fd);
2784
					free(la);
2785
				}
2786
			}
2787
2788
			/* add new listeners */
2789
			while ((la = TAILQ_FIRST(nconf->listen_addrs)) !=
2790
			    NULL) {
2791
				TAILQ_REMOVE(nconf->listen_addrs, la, entry);
2792
				TAILQ_INSERT_TAIL(conf->listen_addrs, la,
2793
				    entry);
2794
			}
2795
2796
			setup_listeners(listener_cnt);
2797
			free(nconf->listen_addrs);
2798
			free(nconf);
2799
			nconf = NULL;
2800
			pending_reconf = 0;
2801
			log_info("SE reconfigured");
2802
			imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0,
2803
			    -1, NULL, 0);
2804
			break;
2805
		case IMSG_IFINFO:
2806
			if (idx != PFD_PIPE_MAIN)
2807
				fatalx("IFINFO message not from parent");
2808
			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2809
			    sizeof(struct kif))
2810
				fatalx("IFINFO imsg with wrong len");
2811
			kif = imsg.data;
2812
			depend_ok = session_link_state_is_up(kif->flags,
2813
			    kif->if_type, kif->link_state);
2814
2815
			for (p = peers; p != NULL; p = p->next)
2816
				if (!strcmp(p->conf.if_depend, kif->ifname)) {
2817
					if (depend_ok && !p->depend_ok) {
2818
						p->depend_ok = depend_ok;
2819
						bgp_fsm(p, EVNT_START);
2820
					} else if (!depend_ok && p->depend_ok) {
2821
						p->depend_ok = depend_ok;
2822
						session_stop(p,
2823
						    ERR_CEASE_OTHER_CHANGE);
2824
					}
2825
				}
2826
			break;
2827
		case IMSG_MRT_OPEN:
2828
		case IMSG_MRT_REOPEN:
2829
			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2830
			    sizeof(struct mrt)) {
2831
				log_warnx("wrong imsg len");
2832
				break;
2833
			}
2834
2835
			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2836
			if ((xmrt.wbuf.fd = imsg.fd) == -1)
2837
				log_warnx("expected to receive fd for mrt dump "
2838
				    "but didn't receive any");
2839
2840
			mrt = mrt_get(&mrthead, &xmrt);
2841
			if (mrt == NULL) {
2842
				/* new dump */
2843
				mrt = calloc(1, sizeof(struct mrt));
2844
				if (mrt == NULL)
2845
					fatal("session_dispatch_imsg");
2846
				memcpy(mrt, &xmrt, sizeof(struct mrt));
2847
				TAILQ_INIT(&mrt->wbuf.bufs);
2848
				LIST_INSERT_HEAD(&mrthead, mrt, entry);
2849
			} else {
2850
				/* old dump reopened */
2851
				close(mrt->wbuf.fd);
2852
				mrt->wbuf.fd = xmrt.wbuf.fd;
2853
			}
2854
			break;
2855
		case IMSG_MRT_CLOSE:
2856
			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2857
			    sizeof(struct mrt)) {
2858
				log_warnx("wrong imsg len");
2859
				break;
2860
			}
2861
2862
			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2863
			mrt = mrt_get(&mrthead, &xmrt);
2864
			if (mrt != NULL)
2865
				mrt_done(mrt);
2866
			break;
2867
		case IMSG_CTL_KROUTE:
2868
		case IMSG_CTL_KROUTE_ADDR:
2869
		case IMSG_CTL_SHOW_NEXTHOP:
2870
		case IMSG_CTL_SHOW_INTERFACE:
2871
		case IMSG_CTL_SHOW_FIB_TABLES:
2872
			if (idx != PFD_PIPE_MAIN)
2873
				fatalx("ctl kroute request not from parent");
2874
			control_imsg_relay(&imsg);
2875
			break;
2876
		case IMSG_CTL_SHOW_RIB:
2877
		case IMSG_CTL_SHOW_RIB_PREFIX:
2878
		case IMSG_CTL_SHOW_RIB_ATTR:
2879
		case IMSG_CTL_SHOW_RIB_MEM:
2880
		case IMSG_CTL_SHOW_NETWORK:
2881
		case IMSG_CTL_SHOW_NEIGHBOR:
2882
			if (idx != PFD_PIPE_ROUTE_CTL)
2883
				fatalx("ctl rib request not from RDE");
2884
			control_imsg_relay(&imsg);
2885
			break;
2886
		case IMSG_CTL_END:
2887
		case IMSG_CTL_RESULT:
2888
			control_imsg_relay(&imsg);
2889
			break;
2890
		case IMSG_UPDATE:
2891
			if (idx != PFD_PIPE_ROUTE)
2892
				fatalx("update request not from RDE");
2893
			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2894
			    MAX_PKTSIZE - MSGSIZE_HEADER ||
2895
			    imsg.hdr.len < IMSG_HEADER_SIZE +
2896
			    MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER)
2897
				log_warnx("RDE sent invalid update");
2898
			else
2899
				session_update(imsg.hdr.peerid, imsg.data,
2900
				    imsg.hdr.len - IMSG_HEADER_SIZE);
2901
			break;
2902
		case IMSG_UPDATE_ERR:
2903
			if (idx != PFD_PIPE_ROUTE)
2904
				fatalx("update request not from RDE");
2905
			if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) {
2906
				log_warnx("RDE sent invalid notification");
2907
				break;
2908
			}
2909
			if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) {
2910
				log_warnx("no such peer: id=%u",
2911
				    imsg.hdr.peerid);
2912
				break;
2913
			}
2914
			data = imsg.data;
2915
			errcode = *data++;
2916
			subcode = *data++;
2917
2918
			if (imsg.hdr.len == IMSG_HEADER_SIZE + 2)
2919
				data = NULL;
2920
2921
			session_notification(p, errcode, subcode,
2922
			    data, imsg.hdr.len - IMSG_HEADER_SIZE - 2);
2923
			switch (errcode) {
2924
			case ERR_CEASE:
2925
				switch (subcode) {
2926
				case ERR_CEASE_MAX_PREFIX:
2927
					bgp_fsm(p, EVNT_STOP);
2928
					if (p->conf.max_prefix_restart)
2929
						timer_set(p, Timer_IdleHold, 60 *
2930
						    p->conf.max_prefix_restart);
2931
					break;
2932
				default:
2933
					bgp_fsm(p, EVNT_CON_FATAL);
2934
					break;
2935
				}
2936
				break;
2937
			default:
2938
				bgp_fsm(p, EVNT_CON_FATAL);
2939
				break;
2940
			}
2941
			break;
2942
		case IMSG_SESSION_RESTARTED:
2943
			if (idx != PFD_PIPE_ROUTE)
2944
				fatalx("update request not from RDE");
2945
			if (imsg.hdr.len < IMSG_HEADER_SIZE + sizeof(aid)) {
2946
				log_warnx("RDE sent invalid restart msg");
2947
				break;
2948
			}
2949
			if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) {
2950
				log_warnx("no such peer: id=%u",
2951
				    imsg.hdr.peerid);
2952
				break;
2953
			}
2954
			memcpy(&aid, imsg.data, sizeof(aid));
2955
			if (aid >= AID_MAX)
2956
				fatalx("IMSG_SESSION_RESTARTED: bad AID");
2957
			if (p->capa.neg.grestart.flags[aid] &
2958
			    CAPA_GR_RESTARTING) {
2959
				log_peer_warnx(&p->conf,
2960
				    "graceful restart of %s finished",
2961
				    aid2str(aid));
2962
				p->capa.neg.grestart.flags[aid] &=
2963
				    ~CAPA_GR_RESTARTING;
2964
				timer_stop(p, Timer_RestartTimeout);
2965
2966
				/* signal back to RDE to cleanup stale routes */
2967
				if (imsg_compose(ibuf_rde,
2968
				    IMSG_SESSION_RESTARTED, imsg.hdr.peerid, 0,
2969
				    -1, &aid, sizeof(aid)) == -1)
2970
					fatal("imsg_compose: "
2971
					    "IMSG_SESSION_RESTARTED");
2972
			}
2973
			break;
2974
		case IMSG_SESSION_DOWN:
2975
			if (idx != PFD_PIPE_ROUTE)
2976
				fatalx("update request not from RDE");
2977
			if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) {
2978
				log_warnx("no such peer: id=%u",
2979
				    imsg.hdr.peerid);
2980
				break;
2981
			}
2982
			session_stop(p, ERR_CEASE_ADMIN_DOWN);
2983
			break;
2984
		default:
2985
			break;
2986
		}
2987
		imsg_free(&imsg);
2988
	}
2989
}
2990
2991
int
2992
la_cmp(struct listen_addr *a, struct listen_addr *b)
2993
{
2994
	struct sockaddr_in	*in_a, *in_b;
2995
	struct sockaddr_in6	*in6_a, *in6_b;
2996
2997
	if (a->sa.ss_family != b->sa.ss_family)
2998
		return (1);
2999
3000
	switch (a->sa.ss_family) {
3001
	case AF_INET:
3002
		in_a = (struct sockaddr_in *)&a->sa;
3003
		in_b = (struct sockaddr_in *)&b->sa;
3004
		if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
3005
			return (1);
3006
		if (in_a->sin_port != in_b->sin_port)
3007
			return (1);
3008
		break;
3009
	case AF_INET6:
3010
		in6_a = (struct sockaddr_in6 *)&a->sa;
3011
		in6_b = (struct sockaddr_in6 *)&b->sa;
3012
		if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
3013
		    sizeof(struct in6_addr)))
3014
			return (1);
3015
		if (in6_a->sin6_port != in6_b->sin6_port)
3016
			return (1);
3017
		break;
3018
	default:
3019
		fatal("king bula sez: unknown address family");
3020
		/* NOTREACHED */
3021
	}
3022
3023
	return (0);
3024
}
3025
3026
struct peer *
3027
getpeerbyaddr(struct bgpd_addr *addr)
3028
{
3029
	struct peer *p;
3030
3031
	/* we might want a more effective way to find peers by IP */
3032
	for (p = peers; p != NULL &&
3033
	    memcmp(&p->conf.remote_addr, addr, sizeof(p->conf.remote_addr));
3034
	    p = p->next)
3035
		;	/* nothing */
3036
3037
	return (p);
3038
}
3039
3040
struct peer *
3041
getpeerbydesc(const char *descr)
3042
{
3043
	struct peer	*p, *res = NULL;
3044
	int		 match = 0;
3045
3046
	for (p = peers; p != NULL; p = p->next)
3047
		if (!strcmp(p->conf.descr, descr)) {
3048
			res = p;
3049
			match++;
3050
		}
3051
3052
	if (match > 1)
3053
		log_info("neighbor description \"%s\" not unique, request "
3054
		    "aborted", descr);
3055
3056
	if (match == 1)
3057
		return (res);
3058
	else
3059
		return (NULL);
3060
}
3061
3062
struct peer *
3063
getpeerbyip(struct sockaddr *ip)
3064
{
3065
	struct bgpd_addr addr;
3066
	struct peer	*p, *newpeer, *loose = NULL;
3067
	u_int32_t	 id;
3068
3069
	sa2addr(ip, &addr);
3070
3071
	/* we might want a more effective way to find peers by IP */
3072
	for (p = peers; p != NULL; p = p->next)
3073
		if (!p->conf.template &&
3074
		    !memcmp(&addr, &p->conf.remote_addr, sizeof(addr)))
3075
			return (p);
3076
3077
	/* try template matching */
3078
	for (p = peers; p != NULL; p = p->next)
3079
		if (p->conf.template &&
3080
		    p->conf.remote_addr.aid == addr.aid &&
3081
		    session_match_mask(p, &addr))
3082
			if (loose == NULL || loose->conf.remote_masklen <
3083
			    p->conf.remote_masklen)
3084
				loose = p;
3085
3086
	if (loose != NULL) {
3087
		/* clone */
3088
		if ((newpeer = malloc(sizeof(struct peer))) == NULL)
3089
			fatal(NULL);
3090
		memcpy(newpeer, loose, sizeof(struct peer));
3091
		for (id = UINT_MAX; id > UINT_MAX / 2; id--) {
3092
			for (p = peers; p != NULL && p->conf.id != id;
3093
			    p = p->next)
3094
				;	/* nothing */
3095
			if (p == NULL) {	/* we found a free id */
3096
				break;
3097
			}
3098
		}
3099
		newpeer->template = loose;
3100
		session_template_clone(newpeer, ip, id, 0);
3101
		newpeer->state = newpeer->prev_state = STATE_NONE;
3102
		newpeer->conf.reconf_action = RECONF_KEEP;
3103
		newpeer->rbuf = NULL;
3104
		init_peer(newpeer);
3105
		bgp_fsm(newpeer, EVNT_START);
3106
		newpeer->next = peers;
3107
		peers = newpeer;
3108
		return (newpeer);
3109
	}
3110
3111
	return (NULL);
3112
}
3113
3114
void
3115
session_template_clone(struct peer *p, struct sockaddr *ip, u_int32_t id,
3116
    u_int32_t as)
3117
{
3118
	struct bgpd_addr	remote_addr;
3119
3120
	if (ip)
3121
		sa2addr(ip, &remote_addr);
3122
	else
3123
		memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr));
3124
3125
	memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config));
3126
3127
	p->conf.id = id;
3128
3129
	if (as) {
3130
		p->conf.remote_as = as;
3131
		p->conf.ebgp = (p->conf.remote_as != p->conf.local_as);
3132
		if (!p->conf.ebgp)
3133
			/* force enforce_as off for iBGP sessions */
3134
			p->conf.enforce_as = ENFORCE_AS_OFF;
3135
	}
3136
3137
	memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr));
3138
	switch (p->conf.remote_addr.aid) {
3139
	case AID_INET:
3140
		p->conf.remote_masklen = 32;
3141
		break;
3142
	case AID_INET6:
3143
		p->conf.remote_masklen = 128;
3144
		break;
3145
	}
3146
	p->conf.template = 0;
3147
}
3148
3149
int
3150
session_match_mask(struct peer *p, struct bgpd_addr *a)
3151
{
3152
	in_addr_t	 v4mask;
3153
	struct in6_addr	 masked;
3154
3155
	switch (p->conf.remote_addr.aid) {
3156
	case AID_INET:
3157
		v4mask = htonl(prefixlen2mask(p->conf.remote_masklen));
3158
		if (p->conf.remote_addr.v4.s_addr == (a->v4.s_addr & v4mask))
3159
			return (1);
3160
		return (0);
3161
	case AID_INET6:
3162
		inet6applymask(&masked, &a->v6, p->conf.remote_masklen);
3163
3164
		if (!memcmp(&masked, &p->conf.remote_addr.v6, sizeof(masked)))
3165
			return (1);
3166
		return (0);
3167
	}
3168
	return (0);
3169
}
3170
3171
struct peer *
3172
getpeerbyid(u_int32_t peerid)
3173
{
3174
	struct peer *p;
3175
3176
	/* we might want a more effective way to find peers by IP */
3177
	for (p = peers; p != NULL &&
3178
	    p->conf.id != peerid; p = p->next)
3179
		;	/* nothing */
3180
3181
	return (p);
3182
}
3183
3184
void
3185
session_down(struct peer *peer)
3186
{
3187
	bzero(&peer->capa.neg, sizeof(peer->capa.neg));
3188
	peer->stats.last_updown = time(NULL);
3189
	if (imsg_compose(ibuf_rde, IMSG_SESSION_DOWN, peer->conf.id, 0, -1,
3190
	    NULL, 0) == -1)
3191
		fatalx("imsg_compose error");
3192
}
3193
3194
void
3195
session_up(struct peer *p)
3196
{
3197
	struct session_up	 sup;
3198
3199
	if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1,
3200
	    &p->conf, sizeof(p->conf)) == -1)
3201
		fatalx("imsg_compose error");
3202
3203
	sa2addr((struct sockaddr *)&p->sa_local, &sup.local_addr);
3204
	sa2addr((struct sockaddr *)&p->sa_remote, &sup.remote_addr);
3205
3206
	sup.remote_bgpid = p->remote_bgpid;
3207
	sup.short_as = p->short_as;
3208
	memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa));
3209
	p->stats.last_updown = time(NULL);
3210
	if (imsg_compose(ibuf_rde, IMSG_SESSION_UP, p->conf.id, 0, -1,
3211
	    &sup, sizeof(sup)) == -1)
3212
		fatalx("imsg_compose error");
3213
}
3214
3215
int
3216
imsg_ctl_parent(int type, u_int32_t peerid, pid_t pid, void *data,
3217
    u_int16_t datalen)
3218
{
3219
	return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen));
3220
}
3221
3222
int
3223
imsg_ctl_rde(int type, pid_t pid, void *data, u_int16_t datalen)
3224
{
3225
	/*
3226
	 * Use control socket to talk to RDE to bypass the queue of the
3227
	 * regular imsg socket.
3228
	 */
3229
	return (imsg_compose(ibuf_rde_ctl, type, 0, pid, -1, data, datalen));
3230
}
3231
3232
void
3233
session_demote(struct peer *p, int level)
3234
{
3235
	struct demote_msg	msg;
3236
3237
	strlcpy(msg.demote_group, p->conf.demote_group,
3238
	    sizeof(msg.demote_group));
3239
	msg.level = level;
3240
	if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
3241
	    &msg, sizeof(msg)) == -1)
3242
		fatalx("imsg_compose error");
3243
3244
	p->demoted += level;
3245
}
3246
3247
void
3248
session_stop(struct peer *peer, u_int8_t subcode)
3249
{
3250
	char data[SHUT_COMM_LEN];
3251
	uint8_t datalen;
3252
	uint8_t shutcomm_len;
3253
	char *communication;
3254
3255
	datalen = 0;
3256
3257
	communication = peer->conf.shutcomm;
3258
3259
	if ((subcode == ERR_CEASE_ADMIN_DOWN || subcode == ERR_CEASE_ADMIN_RESET)
3260
	    && communication && *communication) {
3261
		shutcomm_len = strlen(communication);
3262
		if(shutcomm_len < SHUT_COMM_LEN) {
3263
			data[0] = shutcomm_len;
3264
			datalen = shutcomm_len + sizeof(data[0]);
3265
			memcpy(data + 1, communication, shutcomm_len);
3266
		}
3267
	}
3268
	switch (peer->state) {
3269
	case STATE_OPENSENT:
3270
	case STATE_OPENCONFIRM:
3271
	case STATE_ESTABLISHED:
3272
		session_notification(peer, ERR_CEASE, subcode, data, datalen);
3273
		break;
3274
	default:
3275
		/* session not open, no need to send notification */
3276
		break;
3277
	}
3278
	bgp_fsm(peer, EVNT_STOP);
3279
}
3280
3281
/*
3282
 * return 1 when the interface is up
3283
 * and the link state is up or unknwown
3284
 * except when this is a carp interface, then
3285
 * return 1 only when link state is up
3286
 */
3287
int
3288
session_link_state_is_up(int flags, int type, int link_state)
3289
{
3290
	if (!(flags & IFF_UP))
3291
		return (0);
3292
3293
	if (type == IFT_CARP &&
3294
	    link_state == LINK_STATE_UNKNOWN)
3295
		return (0);
3296
3297
	return LINK_STATE_IS_UP(link_state);
3298
}