Line data Source code
1 : /* $OpenBSD: ip_carp.c,v 1.333 2018/07/10 11:22:54 friehm Exp $ */
2 :
3 : /*
4 : * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
5 : * Copyright (c) 2003 Ryan McBride. All rights reserved.
6 : * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved.
7 : *
8 : * Redistribution and use in source and binary forms, with or without
9 : * modification, are permitted provided that the following conditions
10 : * are met:
11 : * 1. Redistributions of source code must retain the above copyright
12 : * notice, this list of conditions and the following disclaimer.
13 : * 2. Redistributions in binary form must reproduce the above copyright
14 : * notice, this list of conditions and the following disclaimer in the
15 : * documentation and/or other materials provided with the distribution.
16 : *
17 : * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 : * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 : * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 : * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21 : * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 : * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 : * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 : * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 : * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26 : * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27 : * THE POSSIBILITY OF SUCH DAMAGE.
28 : */
29 :
30 : /*
31 : * TODO:
32 : * - iface reconfigure
33 : * - support for hardware checksum calculations;
34 : *
35 : */
36 :
37 : #include "ether.h"
38 :
39 : #include <sys/param.h>
40 : #include <sys/systm.h>
41 : #include <sys/mbuf.h>
42 : #include <sys/socket.h>
43 : #include <sys/socketvar.h>
44 : #include <sys/timeout.h>
45 : #include <sys/ioctl.h>
46 : #include <sys/errno.h>
47 : #include <sys/device.h>
48 : #include <sys/kernel.h>
49 : #include <sys/sysctl.h>
50 : #include <sys/syslog.h>
51 : #include <sys/refcnt.h>
52 :
53 : #include <net/if.h>
54 : #include <net/if_var.h>
55 : #include <net/if_types.h>
56 : #include <net/netisr.h>
57 :
58 : #include <crypto/sha1.h>
59 :
60 : #include <netinet/in.h>
61 : #include <netinet/in_var.h>
62 : #include <netinet/ip.h>
63 : #include <netinet/ip_var.h>
64 : #include <netinet/if_ether.h>
65 : #include <netinet/ip_ipsp.h>
66 :
67 : #include <net/if_dl.h>
68 :
69 : #ifdef INET6
70 : #include <netinet6/in6_var.h>
71 : #include <netinet/icmp6.h>
72 : #include <netinet/ip6.h>
73 : #include <netinet6/ip6_var.h>
74 : #include <netinet6/nd6.h>
75 : #include <netinet6/in6_ifattach.h>
76 : #endif
77 :
78 : #include "bpfilter.h"
79 : #if NBPFILTER > 0
80 : #include <net/bpf.h>
81 : #endif
82 :
83 : #include "vlan.h"
84 : #if NVLAN > 0
85 : #include <net/if_vlan_var.h>
86 : #endif
87 :
88 : #include <netinet/ip_carp.h>
89 :
90 : struct carp_mc_entry {
91 : LIST_ENTRY(carp_mc_entry) mc_entries;
92 : union {
93 : struct ether_multi *mcu_enm;
94 : } mc_u;
95 : struct sockaddr_storage mc_addr;
96 : };
97 : #define mc_enm mc_u.mcu_enm
98 :
99 : enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 };
100 :
101 : struct carp_vhost_entry {
102 : SRPL_ENTRY(carp_vhost_entry) vhost_entries;
103 : struct refcnt vhost_refcnt;
104 :
105 : struct carp_softc *parent_sc;
106 : int vhe_leader;
107 : int vhid;
108 : int advskew;
109 : enum { INIT = 0, BACKUP, MASTER } state;
110 : struct timeout ad_tmo; /* advertisement timeout */
111 : struct timeout md_tmo; /* master down timeout */
112 : struct timeout md6_tmo; /* master down timeout */
113 :
114 : u_int64_t vhe_replay_cookie;
115 :
116 : /* authentication */
117 : #define CARP_HMAC_PAD 64
118 : unsigned char vhe_pad[CARP_HMAC_PAD];
119 : SHA1_CTX vhe_sha1[HMAC_MAX];
120 :
121 : u_int8_t vhe_enaddr[ETHER_ADDR_LEN];
122 : };
123 :
124 : void carp_vh_ref(void *, void *);
125 : void carp_vh_unref(void *, void *);
126 :
127 : struct srpl_rc carp_vh_rc =
128 : SRPL_RC_INITIALIZER(carp_vh_ref, carp_vh_unref, NULL);
129 :
130 : struct carp_softc {
131 : struct arpcom sc_ac;
132 : #define sc_if sc_ac.ac_if
133 : #define sc_carpdev sc_ac.ac_if.if_carpdev
134 : void *ah_cookie;
135 : void *lh_cookie;
136 : void *dh_cookie;
137 : struct ip_moptions sc_imo;
138 : #ifdef INET6
139 : struct ip6_moptions sc_im6o;
140 : #endif /* INET6 */
141 :
142 : SRPL_ENTRY(carp_softc) sc_list;
143 : struct refcnt sc_refcnt;
144 :
145 : int sc_suppress;
146 : int sc_bow_out;
147 : int sc_demote_cnt;
148 :
149 : int sc_sendad_errors;
150 : #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count)
151 : int sc_sendad_success;
152 : #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count)
153 :
154 : char sc_curlladdr[ETHER_ADDR_LEN];
155 :
156 : SRPL_HEAD(, carp_vhost_entry) carp_vhosts;
157 : int sc_vhe_count;
158 : u_int8_t sc_vhids[CARP_MAXNODES];
159 : u_int8_t sc_advskews[CARP_MAXNODES];
160 : u_int8_t sc_balancing;
161 :
162 : int sc_naddrs;
163 : int sc_naddrs6;
164 : int sc_advbase; /* seconds */
165 :
166 : /* authentication */
167 : unsigned char sc_key[CARP_KEY_LEN];
168 :
169 : u_int32_t sc_hashkey[2];
170 : u_int32_t sc_lsmask; /* load sharing mask */
171 : int sc_lscount; /* # load sharing interfaces (max 32) */
172 : int sc_delayed_arp; /* delayed ARP request countdown */
173 : int sc_realmac; /* using real mac */
174 :
175 : struct in_addr sc_peer;
176 :
177 : LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead;
178 : struct carp_vhost_entry *cur_vhe; /* current active vhe */
179 : };
180 :
181 : void carp_sc_ref(void *, void *);
182 : void carp_sc_unref(void *, void *);
183 :
184 : struct srpl_rc carp_sc_rc =
185 : SRPL_RC_INITIALIZER(carp_sc_ref, carp_sc_unref, NULL);
186 :
187 : int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, LOG_CRIT }; /* XXX for now */
188 : struct cpumem *carpcounters;
189 :
190 : int carp_send_all_recur = 0;
191 :
192 : #define CARP_LOG(l, sc, s) \
193 : do { \
194 : if (carp_opts[CARPCTL_LOG] >= l) { \
195 : if (sc) \
196 : log(l, "%s: ", \
197 : (sc)->sc_if.if_xname); \
198 : else \
199 : log(l, "carp: "); \
200 : addlog s; \
201 : addlog("\n"); \
202 : } \
203 : } while (0)
204 :
205 : void carp_hmac_prepare(struct carp_softc *);
206 : void carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t);
207 : void carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *,
208 : unsigned char *, u_int8_t);
209 : int carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *,
210 : unsigned char *);
211 : int carp_input(struct ifnet *, struct mbuf *, void *);
212 : void carp_proto_input_c(struct ifnet *, struct mbuf *,
213 : struct carp_header *, int, sa_family_t);
214 : int carp_proto_input_if(struct ifnet *, struct mbuf **, int *, int);
215 : #ifdef INET6
216 : int carp6_proto_input_if(struct ifnet *, struct mbuf **, int *, int);
217 : #endif
218 : void carpattach(int);
219 : void carpdetach(void *);
220 : void carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *,
221 : struct carp_header *);
222 : void carp_send_ad_all(void);
223 : void carp_vhe_send_ad_all(struct carp_softc *);
224 : void carp_timer_ad(void *);
225 : void carp_send_ad(struct carp_vhost_entry *);
226 : void carp_send_arp(struct carp_softc *);
227 : void carp_timer_down(void *);
228 : void carp_master_down(struct carp_vhost_entry *);
229 : int carp_ioctl(struct ifnet *, u_long, caddr_t);
230 : int carp_vhids_ioctl(struct carp_softc *, struct carpreq *);
231 : int carp_check_dup_vhids(struct carp_softc *, struct srpl *,
232 : struct carpreq *);
233 : void carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t);
234 : void carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t);
235 : void carp_start(struct ifnet *);
236 : void carp_setrun_all(struct carp_softc *, sa_family_t);
237 : void carp_setrun(struct carp_vhost_entry *, sa_family_t);
238 : void carp_set_state_all(struct carp_softc *, int);
239 : void carp_set_state(struct carp_vhost_entry *, int);
240 : void carp_multicast_cleanup(struct carp_softc *);
241 : int carp_set_ifp(struct carp_softc *, struct ifnet *);
242 : void carp_set_enaddr(struct carp_softc *);
243 : void carp_set_vhe_enaddr(struct carp_vhost_entry *);
244 : void carp_addr_updated(void *);
245 : int carp_set_addr(struct carp_softc *, struct sockaddr_in *);
246 : int carp_join_multicast(struct carp_softc *);
247 : #ifdef INET6
248 : void carp_send_na(struct carp_softc *);
249 : int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
250 : int carp_join_multicast6(struct carp_softc *);
251 : #endif
252 : int carp_clone_create(struct if_clone *, int);
253 : int carp_clone_destroy(struct ifnet *);
254 : int carp_ether_addmulti(struct carp_softc *, struct ifreq *);
255 : int carp_ether_delmulti(struct carp_softc *, struct ifreq *);
256 : void carp_ether_purgemulti(struct carp_softc *);
257 : int carp_group_demote_count(struct carp_softc *);
258 : void carp_update_lsmask(struct carp_softc *);
259 : int carp_new_vhost(struct carp_softc *, int, int);
260 : void carp_destroy_vhosts(struct carp_softc *);
261 : void carp_del_all_timeouts(struct carp_softc *);
262 :
263 : struct if_clone carp_cloner =
264 : IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
265 :
266 : #define carp_cksum(_m, _l) ((u_int16_t)in_cksum((_m), (_l)))
267 : #define CARP_IFQ_PRIO 6
268 :
269 : void
270 0 : carp_hmac_prepare(struct carp_softc *sc)
271 : {
272 : struct carp_vhost_entry *vhe;
273 : u_int8_t i;
274 :
275 0 : KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
276 :
277 0 : SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
278 0 : for (i = 0; i < HMAC_MAX; i++) {
279 0 : carp_hmac_prepare_ctx(vhe, i);
280 : }
281 : }
282 0 : }
283 :
284 : void
285 0 : carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx)
286 : {
287 0 : struct carp_softc *sc = vhe->parent_sc;
288 :
289 0 : u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
290 0 : u_int8_t vhid = vhe->vhid & 0xff;
291 0 : SHA1_CTX sha1ctx;
292 0 : u_int32_t kmd[5];
293 : struct ifaddr *ifa;
294 : int i, found;
295 0 : struct in_addr last, cur, in;
296 : #ifdef INET6
297 0 : struct in6_addr last6, cur6, in6;
298 : #endif /* INET6 */
299 :
300 : /* compute ipad from key */
301 0 : memset(vhe->vhe_pad, 0, sizeof(vhe->vhe_pad));
302 0 : bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key));
303 0 : for (i = 0; i < sizeof(vhe->vhe_pad); i++)
304 0 : vhe->vhe_pad[i] ^= 0x36;
305 :
306 : /* precompute first part of inner hash */
307 0 : SHA1Init(&vhe->vhe_sha1[ctx]);
308 0 : SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad));
309 0 : SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version));
310 0 : SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type));
311 :
312 : /* generate a key for the arpbalance hash, before the vhid is hashed */
313 0 : if (vhe->vhe_leader) {
314 0 : bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
315 0 : SHA1Final((unsigned char *)kmd, &sha1ctx);
316 0 : sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
317 0 : sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
318 0 : }
319 :
320 : /* the rest of the precomputation */
321 0 : if (!sc->sc_realmac && vhe->vhe_leader &&
322 0 : memcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, ETHER_ADDR_LEN) != 0)
323 0 : SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr,
324 : ETHER_ADDR_LEN);
325 :
326 0 : SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid));
327 :
328 : /* Hash the addresses from smallest to largest, not interface order */
329 0 : cur.s_addr = 0;
330 0 : do {
331 : found = 0;
332 0 : last = cur;
333 0 : cur.s_addr = 0xffffffff;
334 0 : TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
335 0 : if (ifa->ifa_addr->sa_family != AF_INET)
336 : continue;
337 0 : in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
338 0 : if (ntohl(in.s_addr) > ntohl(last.s_addr) &&
339 0 : ntohl(in.s_addr) < ntohl(cur.s_addr)) {
340 0 : cur.s_addr = in.s_addr;
341 0 : found++;
342 0 : }
343 : }
344 0 : if (found)
345 0 : SHA1Update(&vhe->vhe_sha1[ctx],
346 : (void *)&cur, sizeof(cur));
347 0 : } while (found);
348 : #ifdef INET6
349 0 : memset(&cur6, 0x00, sizeof(cur6));
350 0 : do {
351 : found = 0;
352 0 : last6 = cur6;
353 0 : memset(&cur6, 0xff, sizeof(cur6));
354 0 : TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
355 0 : if (ifa->ifa_addr->sa_family != AF_INET6)
356 : continue;
357 0 : in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
358 0 : if (IN6_IS_SCOPE_EMBED(&in6)) {
359 0 : if (ctx == HMAC_NOV6LL)
360 : continue;
361 0 : in6.s6_addr16[1] = 0;
362 0 : }
363 0 : if (memcmp(&in6, &last6, sizeof(in6)) > 0 &&
364 0 : memcmp(&in6, &cur6, sizeof(in6)) < 0) {
365 0 : cur6 = in6;
366 0 : found++;
367 0 : }
368 : }
369 0 : if (found)
370 0 : SHA1Update(&vhe->vhe_sha1[ctx],
371 : (void *)&cur6, sizeof(cur6));
372 0 : } while (found);
373 : #endif /* INET6 */
374 :
375 : /* convert ipad to opad */
376 0 : for (i = 0; i < sizeof(vhe->vhe_pad); i++)
377 0 : vhe->vhe_pad[i] ^= 0x36 ^ 0x5c;
378 0 : }
379 :
380 : void
381 0 : carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2],
382 : unsigned char md[20], u_int8_t ctx)
383 : {
384 0 : SHA1_CTX sha1ctx;
385 :
386 : /* fetch first half of inner hash */
387 0 : bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
388 :
389 0 : SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie));
390 0 : SHA1Final(md, &sha1ctx);
391 :
392 : /* outer hash */
393 0 : SHA1Init(&sha1ctx);
394 0 : SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad));
395 0 : SHA1Update(&sha1ctx, md, 20);
396 0 : SHA1Final(md, &sha1ctx);
397 0 : }
398 :
399 : int
400 0 : carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2],
401 : unsigned char md[20])
402 : {
403 0 : unsigned char md2[20];
404 : u_int8_t i;
405 :
406 0 : for (i = 0; i < HMAC_MAX; i++) {
407 0 : carp_hmac_generate(vhe, counter, md2, i);
408 0 : if (!timingsafe_bcmp(md, md2, sizeof(md2)))
409 0 : return (0);
410 : }
411 0 : return (1);
412 0 : }
413 :
414 : int
415 0 : carp_proto_input(struct mbuf **mp, int *offp, int proto, int af)
416 : {
417 : struct ifnet *ifp;
418 :
419 0 : ifp = if_get((*mp)->m_pkthdr.ph_ifidx);
420 0 : if (ifp == NULL) {
421 0 : m_freemp(mp);
422 0 : return IPPROTO_DONE;
423 : }
424 :
425 0 : proto = carp_proto_input_if(ifp, mp, offp, proto);
426 0 : if_put(ifp);
427 0 : return proto;
428 0 : }
429 :
430 : /*
431 : * process input packet.
432 : * we have rearranged checks order compared to the rfc,
433 : * but it seems more efficient this way or not possible otherwise.
434 : */
435 : int
436 0 : carp_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto)
437 : {
438 0 : struct mbuf *m = *mp;
439 0 : struct ip *ip = mtod(m, struct ip *);
440 : struct carp_softc *sc = NULL;
441 : struct carp_header *ch;
442 : int iplen, len, ismulti;
443 :
444 0 : carpstat_inc(carps_ipackets);
445 :
446 0 : if (!carp_opts[CARPCTL_ALLOW]) {
447 0 : m_freem(m);
448 0 : return IPPROTO_DONE;
449 : }
450 :
451 0 : ismulti = IN_MULTICAST(ip->ip_dst.s_addr);
452 :
453 : /* check if received on a valid carp interface */
454 0 : switch (ifp->if_type) {
455 : case IFT_CARP:
456 : break;
457 : case IFT_ETHER:
458 0 : if (ismulti || !SRPL_EMPTY_LOCKED(&ifp->if_carp))
459 : break;
460 : /* FALLTHROUGH */
461 : default:
462 0 : carpstat_inc(carps_badif);
463 0 : CARP_LOG(LOG_INFO, sc,
464 : ("packet received on non-carp interface: %s",
465 : ifp->if_xname));
466 0 : m_freem(m);
467 0 : return IPPROTO_DONE;
468 : }
469 :
470 : /* verify that the IP TTL is 255. */
471 0 : if (ip->ip_ttl != CARP_DFLTTL) {
472 0 : carpstat_inc(carps_badttl);
473 0 : CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s",
474 : ip->ip_ttl, CARP_DFLTTL, ifp->if_xname));
475 0 : m_freem(m);
476 0 : return IPPROTO_DONE;
477 : }
478 :
479 : /*
480 : * verify that the received packet length is
481 : * equal to the CARP header
482 : */
483 0 : iplen = ip->ip_hl << 2;
484 0 : len = iplen + sizeof(*ch);
485 0 : if (len > m->m_pkthdr.len) {
486 0 : carpstat_inc(carps_badlen);
487 0 : CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s",
488 : m->m_pkthdr.len, ifp->if_xname));
489 0 : m_freem(m);
490 0 : return IPPROTO_DONE;
491 : }
492 :
493 0 : if ((m = *mp = m_pullup(m, len)) == NULL) {
494 0 : carpstat_inc(carps_hdrops);
495 0 : return IPPROTO_DONE;
496 : }
497 0 : ip = mtod(m, struct ip *);
498 0 : ch = (struct carp_header *)(mtod(m, caddr_t) + iplen);
499 :
500 : /* verify the CARP checksum */
501 0 : m->m_data += iplen;
502 0 : if (carp_cksum(m, len - iplen)) {
503 0 : carpstat_inc(carps_badsum);
504 0 : CARP_LOG(LOG_INFO, sc, ("checksum failed on %s",
505 : ifp->if_xname));
506 0 : m_freem(m);
507 0 : return IPPROTO_DONE;
508 : }
509 0 : m->m_data -= iplen;
510 :
511 0 : KERNEL_LOCK();
512 0 : carp_proto_input_c(ifp, m, ch, ismulti, AF_INET);
513 0 : KERNEL_UNLOCK();
514 0 : return IPPROTO_DONE;
515 0 : }
516 :
517 : #ifdef INET6
518 : int
519 0 : carp6_proto_input(struct mbuf **mp, int *offp, int proto, int af)
520 : {
521 : struct ifnet *ifp;
522 :
523 0 : ifp = if_get((*mp)->m_pkthdr.ph_ifidx);
524 0 : if (ifp == NULL) {
525 0 : m_freemp(mp);
526 0 : return IPPROTO_DONE;
527 : }
528 :
529 0 : proto = carp6_proto_input_if(ifp, mp, offp, proto);
530 0 : if_put(ifp);
531 0 : return proto;
532 0 : }
533 :
534 : int
535 0 : carp6_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto)
536 : {
537 0 : struct mbuf *m = *mp;
538 : struct carp_softc *sc = NULL;
539 0 : struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
540 : struct carp_header *ch;
541 : u_int len;
542 :
543 0 : carpstat_inc(carps_ipackets6);
544 :
545 0 : if (!carp_opts[CARPCTL_ALLOW]) {
546 0 : m_freem(m);
547 0 : return IPPROTO_DONE;
548 : }
549 :
550 : /* check if received on a valid carp interface */
551 0 : if (ifp->if_type != IFT_CARP) {
552 0 : carpstat_inc(carps_badif);
553 0 : CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s",
554 : ifp->if_xname));
555 0 : m_freem(m);
556 0 : return IPPROTO_DONE;
557 : }
558 :
559 : /* verify that the IP TTL is 255 */
560 0 : if (ip6->ip6_hlim != CARP_DFLTTL) {
561 0 : carpstat_inc(carps_badttl);
562 0 : CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s",
563 : ip6->ip6_hlim, CARP_DFLTTL, ifp->if_xname));
564 0 : m_freem(m);
565 0 : return IPPROTO_DONE;
566 : }
567 :
568 : /* verify that we have a complete carp packet */
569 0 : len = m->m_len;
570 0 : if ((m = *mp = m_pullup(m, *offp + sizeof(*ch))) == NULL) {
571 0 : carpstat_inc(carps_badlen);
572 0 : CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len));
573 0 : return IPPROTO_DONE;
574 : }
575 0 : ch = (struct carp_header *)(mtod(m, caddr_t) + *offp);
576 :
577 : /* verify the CARP checksum */
578 0 : m->m_data += *offp;
579 0 : if (carp_cksum(m, sizeof(*ch))) {
580 0 : carpstat_inc(carps_badsum);
581 0 : CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s",
582 : ifp->if_xname));
583 0 : m_freem(m);
584 0 : return IPPROTO_DONE;
585 : }
586 0 : m->m_data -= *offp;
587 :
588 0 : KERNEL_LOCK();
589 0 : carp_proto_input_c(ifp, m, ch, 1, AF_INET6);
590 0 : KERNEL_UNLOCK();
591 0 : return IPPROTO_DONE;
592 0 : }
593 : #endif /* INET6 */
594 :
595 : void
596 0 : carp_proto_input_c(struct ifnet *ifp, struct mbuf *m, struct carp_header *ch,
597 : int ismulti, sa_family_t af)
598 : {
599 : struct carp_softc *sc;
600 : struct carp_vhost_entry *vhe;
601 : struct timeval sc_tv, ch_tv;
602 : struct srpl *cif;
603 :
604 0 : KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */
605 :
606 0 : if (ifp->if_type == IFT_CARP) {
607 : /*
608 : * If the parent of this carp(4) got destroyed while
609 : * `m' was being processed, silently drop it.
610 : */
611 0 : if (ifp->if_carpdev == NULL) {
612 0 : m_freem(m);
613 0 : return;
614 : }
615 0 : cif = &ifp->if_carpdev->if_carp;
616 0 : } else
617 0 : cif = &ifp->if_carp;
618 :
619 0 : SRPL_FOREACH_LOCKED(sc, cif, sc_list) {
620 0 : if (af == AF_INET &&
621 0 : ismulti != IN_MULTICAST(sc->sc_peer.s_addr))
622 : continue;
623 0 : SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
624 0 : if (vhe->vhid == ch->carp_vhid)
625 : goto found;
626 : }
627 : }
628 : found:
629 :
630 0 : if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
631 : (IFF_UP|IFF_RUNNING)) {
632 0 : carpstat_inc(carps_badvhid);
633 0 : m_freem(m);
634 0 : return;
635 : }
636 :
637 0 : getmicrotime(&sc->sc_if.if_lastchange);
638 0 : sc->sc_if.if_ipackets++;
639 0 : sc->sc_if.if_ibytes += m->m_pkthdr.len;
640 :
641 : /* verify the CARP version. */
642 0 : if (ch->carp_version != CARP_VERSION) {
643 0 : carpstat_inc(carps_badver);
644 0 : sc->sc_if.if_ierrors++;
645 0 : CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d",
646 : ch->carp_version, CARP_VERSION));
647 0 : m_freem(m);
648 0 : return;
649 : }
650 :
651 : /* verify the hash */
652 0 : if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) {
653 0 : carpstat_inc(carps_badauth);
654 0 : sc->sc_if.if_ierrors++;
655 0 : CARP_LOG(LOG_INFO, sc, ("incorrect hash"));
656 0 : m_freem(m);
657 0 : return;
658 : }
659 :
660 0 : if (!memcmp(&vhe->vhe_replay_cookie, ch->carp_counter,
661 : sizeof(ch->carp_counter))) {
662 : /* Do not log duplicates from non simplex interfaces */
663 0 : if (sc->sc_carpdev->if_flags & IFF_SIMPLEX) {
664 0 : carpstat_inc(carps_badauth);
665 0 : sc->sc_if.if_ierrors++;
666 0 : CARP_LOG(LOG_WARNING, sc,
667 : ("replay or network loop detected"));
668 : }
669 0 : m_freem(m);
670 0 : return;
671 : }
672 :
673 0 : sc_tv.tv_sec = sc->sc_advbase;
674 0 : sc_tv.tv_usec = vhe->advskew * 1000000 / 256;
675 0 : ch_tv.tv_sec = ch->carp_advbase;
676 0 : ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
677 :
678 0 : switch (vhe->state) {
679 : case INIT:
680 : break;
681 : case MASTER:
682 : /*
683 : * If we receive an advertisement from a master who's going to
684 : * be more frequent than us, and whose demote count is not higher
685 : * than ours, go into BACKUP state. If his demote count is lower,
686 : * also go into BACKUP.
687 : */
688 0 : if (((timercmp(&sc_tv, &ch_tv, >) ||
689 0 : timercmp(&sc_tv, &ch_tv, ==)) &&
690 0 : (ch->carp_demote <= carp_group_demote_count(sc))) ||
691 0 : ch->carp_demote < carp_group_demote_count(sc)) {
692 0 : timeout_del(&vhe->ad_tmo);
693 0 : carp_set_state(vhe, BACKUP);
694 0 : carp_setrun(vhe, 0);
695 0 : }
696 : break;
697 : case BACKUP:
698 : /*
699 : * If we're pre-empting masters who advertise slower than us,
700 : * and do not have a better demote count, treat them as down.
701 : *
702 : */
703 0 : if (carp_opts[CARPCTL_PREEMPT] &&
704 0 : timercmp(&sc_tv, &ch_tv, <) &&
705 0 : ch->carp_demote >= carp_group_demote_count(sc)) {
706 0 : carp_master_down(vhe);
707 0 : break;
708 : }
709 :
710 : /*
711 : * Take over masters advertising with a higher demote count,
712 : * regardless of CARPCTL_PREEMPT.
713 : */
714 0 : if (ch->carp_demote > carp_group_demote_count(sc)) {
715 0 : carp_master_down(vhe);
716 0 : break;
717 : }
718 :
719 : /*
720 : * If the master is going to advertise at such a low frequency
721 : * that he's guaranteed to time out, we'd might as well just
722 : * treat him as timed out now.
723 : */
724 0 : sc_tv.tv_sec = sc->sc_advbase * 3;
725 0 : if (sc->sc_advbase && timercmp(&sc_tv, &ch_tv, <)) {
726 0 : carp_master_down(vhe);
727 0 : break;
728 : }
729 :
730 : /*
731 : * Otherwise, we reset the counter and wait for the next
732 : * advertisement.
733 : */
734 0 : carp_setrun(vhe, af);
735 0 : break;
736 : }
737 :
738 0 : m_freem(m);
739 0 : return;
740 0 : }
741 :
742 : int
743 0 : carp_sysctl_carpstat(void *oldp, size_t *oldlenp, void *newp)
744 : {
745 0 : struct carpstats carpstat;
746 :
747 : CTASSERT(sizeof(carpstat) == (carps_ncounters * sizeof(uint64_t)));
748 0 : memset(&carpstat, 0, sizeof carpstat);
749 0 : counters_read(carpcounters, (uint64_t *)&carpstat, carps_ncounters);
750 0 : return (sysctl_rdstruct(oldp, oldlenp, newp,
751 : &carpstat, sizeof(carpstat)));
752 0 : }
753 :
754 : int
755 0 : carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
756 : size_t newlen)
757 : {
758 : int error;
759 :
760 : /* All sysctl names at this level are terminal. */
761 0 : if (namelen != 1)
762 0 : return (ENOTDIR);
763 :
764 0 : switch (name[0]) {
765 : case CARPCTL_STATS:
766 0 : return (carp_sysctl_carpstat(oldp, oldlenp, newp));
767 : default:
768 0 : if (name[0] <= 0 || name[0] >= CARPCTL_MAXID)
769 0 : return (ENOPROTOOPT);
770 0 : NET_LOCK();
771 0 : error = sysctl_int(oldp, oldlenp, newp, newlen,
772 0 : &carp_opts[name[0]]);
773 0 : NET_UNLOCK();
774 0 : return (error);
775 : }
776 0 : }
777 :
778 : /*
779 : * Interface side of the CARP implementation.
780 : */
781 :
782 : /* ARGSUSED */
783 : void
784 0 : carpattach(int n)
785 : {
786 : struct ifg_group *ifg;
787 :
788 0 : if ((ifg = if_creategroup("carp")) != NULL)
789 0 : ifg->ifg_refcnt++; /* keep around even if empty */
790 0 : if_clone_attach(&carp_cloner);
791 0 : carpcounters = counters_alloc(carps_ncounters);
792 0 : }
793 :
794 : int
795 0 : carp_clone_create(struct if_clone *ifc, int unit)
796 : {
797 : struct carp_softc *sc;
798 : struct ifnet *ifp;
799 :
800 0 : sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
801 0 : refcnt_init(&sc->sc_refcnt);
802 :
803 0 : SRPL_INIT(&sc->carp_vhosts);
804 0 : sc->sc_vhe_count = 0;
805 0 : if (carp_new_vhost(sc, 0, 0)) {
806 0 : free(sc, M_DEVBUF, sizeof(*sc));
807 0 : return (ENOMEM);
808 : }
809 :
810 0 : sc->sc_suppress = 0;
811 0 : sc->sc_advbase = CARP_DFLTINTV;
812 0 : sc->sc_naddrs = sc->sc_naddrs6 = 0;
813 : #ifdef INET6
814 0 : sc->sc_im6o.im6o_hlim = CARP_DFLTTL;
815 : #endif /* INET6 */
816 0 : sc->sc_imo.imo_membership = (struct in_multi **)malloc(
817 : (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
818 : M_WAITOK|M_ZERO);
819 0 : sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
820 :
821 0 : LIST_INIT(&sc->carp_mc_listhead);
822 0 : ifp = &sc->sc_if;
823 0 : ifp->if_softc = sc;
824 0 : snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
825 : unit);
826 0 : ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
827 0 : ifp->if_ioctl = carp_ioctl;
828 0 : ifp->if_start = carp_start;
829 0 : ifp->if_xflags = IFXF_CLONED;
830 0 : IFQ_SET_MAXLEN(&ifp->if_snd, 1);
831 0 : if_attach(ifp);
832 0 : ether_ifattach(ifp);
833 0 : ifp->if_type = IFT_CARP;
834 0 : ifp->if_sadl->sdl_type = IFT_CARP;
835 0 : ifp->if_output = carp_output;
836 0 : ifp->if_priority = IF_CARP_DEFAULT_PRIORITY;
837 0 : ifp->if_link_state = LINK_STATE_INVALID;
838 :
839 : /* Hook carp_addr_updated to cope with address and route changes. */
840 0 : sc->ah_cookie = hook_establish(sc->sc_if.if_addrhooks, 0,
841 : carp_addr_updated, sc);
842 :
843 0 : return (0);
844 0 : }
845 :
846 : int
847 0 : carp_new_vhost(struct carp_softc *sc, int vhid, int advskew)
848 : {
849 : struct carp_vhost_entry *vhe, *vhe0;
850 :
851 0 : vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO);
852 0 : if (vhe == NULL)
853 0 : return (ENOMEM);
854 :
855 0 : refcnt_init(&vhe->vhost_refcnt);
856 0 : carp_sc_ref(NULL, sc); /* give a sc ref to the vhe */
857 0 : vhe->parent_sc = sc;
858 0 : vhe->vhid = vhid;
859 0 : vhe->advskew = advskew;
860 0 : vhe->state = INIT;
861 0 : timeout_set_proc(&vhe->ad_tmo, carp_timer_ad, vhe);
862 0 : timeout_set_proc(&vhe->md_tmo, carp_timer_down, vhe);
863 0 : timeout_set_proc(&vhe->md6_tmo, carp_timer_down, vhe);
864 :
865 0 : KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
866 :
867 : /* mark the first vhe as leader */
868 0 : if (SRPL_EMPTY_LOCKED(&sc->carp_vhosts)) {
869 0 : vhe->vhe_leader = 1;
870 0 : SRPL_INSERT_HEAD_LOCKED(&carp_vh_rc, &sc->carp_vhosts,
871 : vhe, vhost_entries);
872 0 : sc->sc_vhe_count = 1;
873 0 : return (0);
874 : }
875 :
876 0 : SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) {
877 0 : if (SRPL_NEXT_LOCKED(vhe0, vhost_entries) == NULL)
878 : break;
879 : }
880 :
881 0 : SRPL_INSERT_AFTER_LOCKED(&carp_vh_rc, vhe0, vhe, vhost_entries);
882 0 : sc->sc_vhe_count++;
883 :
884 0 : return (0);
885 0 : }
886 :
887 : int
888 0 : carp_clone_destroy(struct ifnet *ifp)
889 : {
890 0 : struct carp_softc *sc = ifp->if_softc;
891 :
892 0 : NET_LOCK();
893 0 : carpdetach(sc);
894 0 : if (sc->ah_cookie != NULL)
895 0 : hook_disestablish(sc->sc_if.if_addrhooks, sc->ah_cookie);
896 0 : NET_UNLOCK();
897 :
898 0 : ether_ifdetach(ifp);
899 0 : if_detach(ifp);
900 0 : carp_destroy_vhosts(ifp->if_softc);
901 0 : refcnt_finalize(&sc->sc_refcnt, "carpdtor");
902 0 : free(sc->sc_imo.imo_membership, M_IPMOPTS, 0);
903 0 : free(sc, M_DEVBUF, sizeof(*sc));
904 0 : return (0);
905 : }
906 :
907 : void
908 0 : carp_del_all_timeouts(struct carp_softc *sc)
909 : {
910 : struct carp_vhost_entry *vhe;
911 :
912 0 : KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
913 0 : SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
914 0 : timeout_del(&vhe->ad_tmo);
915 0 : timeout_del(&vhe->md_tmo);
916 0 : timeout_del(&vhe->md6_tmo);
917 : }
918 0 : }
919 :
920 : void
921 0 : carpdetach(void *arg)
922 : {
923 0 : struct carp_softc *sc = arg;
924 : struct ifnet *ifp0;
925 : struct srpl *cif;
926 :
927 0 : carp_del_all_timeouts(sc);
928 :
929 0 : if (sc->sc_demote_cnt)
930 0 : carp_group_demote_adj(&sc->sc_if, -sc->sc_demote_cnt, "detach");
931 0 : sc->sc_suppress = 0;
932 0 : sc->sc_sendad_errors = 0;
933 :
934 0 : carp_set_state_all(sc, INIT);
935 0 : sc->sc_if.if_flags &= ~IFF_UP;
936 0 : carp_setrun_all(sc, 0);
937 0 : carp_multicast_cleanup(sc);
938 :
939 0 : ifp0 = sc->sc_carpdev;
940 0 : if (ifp0 == NULL)
941 0 : return;
942 :
943 0 : KERNEL_ASSERT_LOCKED(); /* touching if_carp */
944 :
945 0 : cif = &ifp0->if_carp;
946 :
947 : /* Restore previous input handler. */
948 0 : if_ih_remove(ifp0, carp_input, NULL);
949 :
950 0 : SRPL_REMOVE_LOCKED(&carp_sc_rc, cif, sc, carp_softc, sc_list);
951 0 : if (SRPL_EMPTY_LOCKED(cif))
952 0 : ifpromisc(ifp0, 0);
953 0 : sc->sc_carpdev = NULL;
954 :
955 0 : hook_disestablish(ifp0->if_linkstatehooks, sc->lh_cookie);
956 0 : hook_disestablish(ifp0->if_detachhooks, sc->dh_cookie);
957 0 : }
958 :
959 : void
960 0 : carp_destroy_vhosts(struct carp_softc *sc)
961 : {
962 : /* XXX bow out? */
963 : struct carp_vhost_entry *vhe;
964 :
965 0 : KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
966 :
967 0 : while ((vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts)) != NULL) {
968 0 : SRPL_REMOVE_LOCKED(&carp_vh_rc, &sc->carp_vhosts, vhe,
969 : carp_vhost_entry, vhost_entries);
970 0 : carp_vh_unref(NULL, vhe); /* drop last ref */
971 : }
972 0 : sc->sc_vhe_count = 0;
973 0 : }
974 :
975 : void
976 0 : carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe,
977 : struct carp_header *ch)
978 : {
979 0 : if (!vhe->vhe_replay_cookie) {
980 0 : arc4random_buf(&vhe->vhe_replay_cookie,
981 : sizeof(vhe->vhe_replay_cookie));
982 0 : }
983 :
984 0 : bcopy(&vhe->vhe_replay_cookie, ch->carp_counter,
985 : sizeof(ch->carp_counter));
986 :
987 : /*
988 : * For the time being, do not include the IPv6 linklayer addresses
989 : * in the HMAC.
990 : */
991 0 : carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL);
992 0 : }
993 :
994 : void
995 0 : carp_send_ad_all(void)
996 : {
997 : struct ifnet *ifp0;
998 : struct srpl *cif;
999 : struct carp_softc *vh;
1000 :
1001 0 : KERNEL_ASSERT_LOCKED(); /* touching if_carp */
1002 :
1003 0 : if (carp_send_all_recur > 0)
1004 0 : return;
1005 0 : ++carp_send_all_recur;
1006 0 : TAILQ_FOREACH(ifp0, &ifnet, if_list) {
1007 0 : if (ifp0->if_type != IFT_ETHER)
1008 : continue;
1009 :
1010 0 : cif = &ifp0->if_carp;
1011 0 : SRPL_FOREACH_LOCKED(vh, cif, sc_list) {
1012 0 : if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1013 : (IFF_UP|IFF_RUNNING)) {
1014 0 : carp_vhe_send_ad_all(vh);
1015 0 : }
1016 : }
1017 : }
1018 0 : --carp_send_all_recur;
1019 0 : }
1020 :
1021 : void
1022 0 : carp_vhe_send_ad_all(struct carp_softc *sc)
1023 : {
1024 : struct carp_vhost_entry *vhe;
1025 :
1026 0 : KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1027 :
1028 0 : SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
1029 0 : if (vhe->state == MASTER)
1030 0 : carp_send_ad(vhe);
1031 : }
1032 0 : }
1033 :
1034 : void
1035 0 : carp_timer_ad(void *v)
1036 : {
1037 0 : NET_LOCK();
1038 0 : carp_send_ad(v);
1039 0 : NET_UNLOCK();
1040 0 : }
1041 :
1042 : void
1043 0 : carp_send_ad(struct carp_vhost_entry *vhe)
1044 : {
1045 0 : struct carp_header ch;
1046 0 : struct timeval tv;
1047 0 : struct carp_softc *sc = vhe->parent_sc;
1048 : struct carp_header *ch_ptr;
1049 : struct mbuf *m;
1050 : int error, len, advbase, advskew;
1051 : struct ifaddr *ifa;
1052 0 : struct sockaddr sa;
1053 :
1054 0 : NET_ASSERT_LOCKED();
1055 :
1056 0 : if (sc->sc_carpdev == NULL) {
1057 0 : sc->sc_if.if_oerrors++;
1058 0 : return;
1059 : }
1060 :
1061 : /* bow out if we've gone to backup (the carp interface is going down) */
1062 0 : if (sc->sc_bow_out) {
1063 : advbase = 255;
1064 : advskew = 255;
1065 0 : } else {
1066 0 : advbase = sc->sc_advbase;
1067 0 : advskew = vhe->advskew;
1068 0 : tv.tv_sec = advbase;
1069 0 : if (advbase == 0 && advskew == 0)
1070 0 : tv.tv_usec = 1 * 1000000 / 256;
1071 : else
1072 0 : tv.tv_usec = advskew * 1000000 / 256;
1073 : }
1074 :
1075 0 : ch.carp_version = CARP_VERSION;
1076 0 : ch.carp_type = CARP_ADVERTISEMENT;
1077 0 : ch.carp_vhid = vhe->vhid;
1078 0 : ch.carp_demote = carp_group_demote_count(sc) & 0xff;
1079 0 : ch.carp_advbase = advbase;
1080 0 : ch.carp_advskew = advskew;
1081 0 : ch.carp_authlen = 7; /* XXX DEFINE */
1082 0 : ch.carp_cksum = 0;
1083 :
1084 0 : sc->cur_vhe = vhe; /* we need the vhe later on the output path */
1085 :
1086 0 : if (sc->sc_naddrs) {
1087 : struct ip *ip;
1088 :
1089 0 : MGETHDR(m, M_DONTWAIT, MT_HEADER);
1090 0 : if (m == NULL) {
1091 0 : sc->sc_if.if_oerrors++;
1092 0 : carpstat_inc(carps_onomem);
1093 : /* XXX maybe less ? */
1094 0 : goto retry_later;
1095 : }
1096 : len = sizeof(*ip) + sizeof(ch);
1097 0 : m->m_pkthdr.len = len;
1098 0 : m->m_pkthdr.ph_ifidx = 0;
1099 0 : m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1100 0 : m->m_pkthdr.pf.prio = CARP_IFQ_PRIO;
1101 0 : m->m_len = len;
1102 0 : MH_ALIGN(m, m->m_len);
1103 0 : ip = mtod(m, struct ip *);
1104 0 : ip->ip_v = IPVERSION;
1105 0 : ip->ip_hl = sizeof(*ip) >> 2;
1106 0 : ip->ip_tos = IPTOS_LOWDELAY;
1107 0 : ip->ip_len = htons(len);
1108 0 : ip->ip_id = htons(ip_randomid());
1109 0 : ip->ip_off = htons(IP_DF);
1110 0 : ip->ip_ttl = CARP_DFLTTL;
1111 0 : ip->ip_p = IPPROTO_CARP;
1112 0 : ip->ip_sum = 0;
1113 :
1114 0 : memset(&sa, 0, sizeof(sa));
1115 0 : sa.sa_family = AF_INET;
1116 : /* Prefer addresses on the parent interface as source for AD. */
1117 0 : ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1118 0 : if (ifa == NULL)
1119 0 : ifa = ifaof_ifpforaddr(&sa, &sc->sc_if);
1120 0 : KASSERT(ifa != NULL);
1121 0 : ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1122 0 : ip->ip_dst.s_addr = sc->sc_peer.s_addr;
1123 0 : if (IN_MULTICAST(ip->ip_dst.s_addr))
1124 0 : m->m_flags |= M_MCAST;
1125 :
1126 0 : ch_ptr = (struct carp_header *)(ip + 1);
1127 0 : bcopy(&ch, ch_ptr, sizeof(ch));
1128 0 : carp_prepare_ad(m, vhe, ch_ptr);
1129 :
1130 0 : m->m_data += sizeof(*ip);
1131 0 : ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
1132 0 : m->m_data -= sizeof(*ip);
1133 :
1134 0 : getmicrotime(&sc->sc_if.if_lastchange);
1135 0 : sc->sc_if.if_opackets++;
1136 0 : sc->sc_if.if_obytes += len;
1137 0 : carpstat_inc(carps_opackets);
1138 :
1139 0 : error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
1140 : NULL, 0);
1141 0 : if (error) {
1142 0 : if (error == ENOBUFS)
1143 0 : carpstat_inc(carps_onomem);
1144 : else
1145 0 : CARP_LOG(LOG_WARNING, sc,
1146 : ("ip_output failed: %d", error));
1147 0 : sc->sc_if.if_oerrors++;
1148 0 : if (sc->sc_sendad_errors < INT_MAX)
1149 0 : sc->sc_sendad_errors++;
1150 0 : if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1151 0 : carp_group_demote_adj(&sc->sc_if, 1,
1152 : "> snderrors");
1153 0 : sc->sc_sendad_success = 0;
1154 0 : } else {
1155 0 : if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1156 0 : if (++sc->sc_sendad_success >=
1157 0 : CARP_SENDAD_MIN_SUCCESS(sc)) {
1158 0 : carp_group_demote_adj(&sc->sc_if, -1,
1159 : "< snderrors");
1160 0 : sc->sc_sendad_errors = 0;
1161 0 : }
1162 : } else
1163 0 : sc->sc_sendad_errors = 0;
1164 : }
1165 0 : if (vhe->vhe_leader) {
1166 0 : if (sc->sc_delayed_arp > 0)
1167 0 : sc->sc_delayed_arp--;
1168 0 : if (sc->sc_delayed_arp == 0) {
1169 0 : carp_send_arp(sc);
1170 0 : sc->sc_delayed_arp = -1;
1171 0 : }
1172 : }
1173 0 : }
1174 : #ifdef INET6
1175 0 : if (sc->sc_naddrs6) {
1176 : struct ip6_hdr *ip6;
1177 :
1178 0 : MGETHDR(m, M_DONTWAIT, MT_HEADER);
1179 0 : if (m == NULL) {
1180 0 : sc->sc_if.if_oerrors++;
1181 0 : carpstat_inc(carps_onomem);
1182 : /* XXX maybe less ? */
1183 0 : goto retry_later;
1184 : }
1185 : len = sizeof(*ip6) + sizeof(ch);
1186 0 : m->m_pkthdr.len = len;
1187 0 : m->m_pkthdr.ph_ifidx = 0;
1188 0 : m->m_pkthdr.pf.prio = CARP_IFQ_PRIO;
1189 0 : m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1190 0 : m->m_len = len;
1191 0 : MH_ALIGN(m, m->m_len);
1192 0 : m->m_flags |= M_MCAST;
1193 0 : ip6 = mtod(m, struct ip6_hdr *);
1194 0 : memset(ip6, 0, sizeof(*ip6));
1195 0 : ip6->ip6_vfc |= IPV6_VERSION;
1196 0 : ip6->ip6_hlim = CARP_DFLTTL;
1197 0 : ip6->ip6_nxt = IPPROTO_CARP;
1198 :
1199 : /* set the source address */
1200 0 : memset(&sa, 0, sizeof(sa));
1201 0 : sa.sa_family = AF_INET6;
1202 : /* Prefer addresses on the parent interface as source for AD. */
1203 0 : ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1204 0 : if (ifa == NULL)
1205 0 : ifa = ifaof_ifpforaddr(&sa, &sc->sc_if);
1206 0 : KASSERT(ifa != NULL);
1207 0 : bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
1208 0 : &ip6->ip6_src, sizeof(struct in6_addr));
1209 : /* set the multicast destination */
1210 :
1211 0 : ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1212 0 : ip6->ip6_dst.s6_addr16[1] = htons(sc->sc_carpdev->if_index);
1213 0 : ip6->ip6_dst.s6_addr8[15] = 0x12;
1214 :
1215 0 : ch_ptr = (struct carp_header *)(ip6 + 1);
1216 0 : bcopy(&ch, ch_ptr, sizeof(ch));
1217 0 : carp_prepare_ad(m, vhe, ch_ptr);
1218 :
1219 0 : m->m_data += sizeof(*ip6);
1220 0 : ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
1221 0 : m->m_data -= sizeof(*ip6);
1222 :
1223 0 : getmicrotime(&sc->sc_if.if_lastchange);
1224 0 : sc->sc_if.if_opackets++;
1225 0 : sc->sc_if.if_obytes += len;
1226 0 : carpstat_inc(carps_opackets6);
1227 :
1228 0 : error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL);
1229 0 : if (error) {
1230 0 : if (error == ENOBUFS)
1231 0 : carpstat_inc(carps_onomem);
1232 : else
1233 0 : CARP_LOG(LOG_WARNING, sc,
1234 : ("ip6_output failed: %d", error));
1235 0 : sc->sc_if.if_oerrors++;
1236 0 : if (sc->sc_sendad_errors < INT_MAX)
1237 0 : sc->sc_sendad_errors++;
1238 0 : if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1239 0 : carp_group_demote_adj(&sc->sc_if, 1,
1240 : "> snd6errors");
1241 0 : sc->sc_sendad_success = 0;
1242 0 : } else {
1243 0 : if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1244 0 : if (++sc->sc_sendad_success >=
1245 0 : CARP_SENDAD_MIN_SUCCESS(sc)) {
1246 0 : carp_group_demote_adj(&sc->sc_if, -1,
1247 : "< snd6errors");
1248 0 : sc->sc_sendad_errors = 0;
1249 0 : }
1250 : } else
1251 0 : sc->sc_sendad_errors = 0;
1252 : }
1253 0 : }
1254 : #endif /* INET6 */
1255 :
1256 : retry_later:
1257 0 : sc->cur_vhe = NULL;
1258 0 : if (advbase != 255 || advskew != 255)
1259 0 : timeout_add(&vhe->ad_tmo, tvtohz(&tv));
1260 0 : }
1261 :
1262 : /*
1263 : * Broadcast a gratuitous ARP request containing
1264 : * the virtual router MAC address for each IP address
1265 : * associated with the virtual router.
1266 : */
1267 : void
1268 0 : carp_send_arp(struct carp_softc *sc)
1269 : {
1270 : struct ifaddr *ifa;
1271 0 : in_addr_t in;
1272 :
1273 0 : TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1274 :
1275 0 : if (ifa->ifa_addr->sa_family != AF_INET)
1276 : continue;
1277 :
1278 0 : in = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1279 0 : arprequest(&sc->sc_if, &in, &in, sc->sc_ac.ac_enaddr);
1280 0 : }
1281 0 : }
1282 :
1283 : #ifdef INET6
1284 : void
1285 0 : carp_send_na(struct carp_softc *sc)
1286 : {
1287 : struct ifaddr *ifa;
1288 : struct in6_addr *in6;
1289 : static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1290 :
1291 0 : TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1292 :
1293 0 : if (ifa->ifa_addr->sa_family != AF_INET6)
1294 : continue;
1295 :
1296 0 : in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1297 0 : nd6_na_output(&sc->sc_if, &mcast, in6,
1298 0 : ND_NA_FLAG_OVERRIDE |
1299 0 : (ip6_forwarding ? ND_NA_FLAG_ROUTER : 0), 1, NULL);
1300 0 : }
1301 0 : }
1302 : #endif /* INET6 */
1303 :
1304 : void
1305 0 : carp_update_lsmask(struct carp_softc *sc)
1306 : {
1307 : struct carp_vhost_entry *vhe;
1308 : int count;
1309 :
1310 0 : if (sc->sc_balancing == CARP_BAL_NONE)
1311 0 : return;
1312 :
1313 0 : sc->sc_lsmask = 0;
1314 : count = 0;
1315 :
1316 0 : KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1317 0 : SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
1318 0 : if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8)
1319 0 : sc->sc_lsmask |= 1 << count;
1320 0 : count++;
1321 : }
1322 0 : sc->sc_lscount = count;
1323 0 : CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask));
1324 0 : }
1325 :
1326 : int
1327 0 : carp_iamatch(struct ifnet *ifp)
1328 : {
1329 0 : struct carp_softc *sc = ifp->if_softc;
1330 : struct carp_vhost_entry *vhe;
1331 0 : struct srp_ref sr;
1332 : int match = 0;
1333 :
1334 0 : vhe = SRPL_FIRST(&sr, &sc->carp_vhosts);
1335 0 : if (vhe->state == MASTER)
1336 0 : match = 1;
1337 0 : SRPL_LEAVE(&sr);
1338 :
1339 0 : return (match);
1340 0 : }
1341 :
1342 : int
1343 0 : carp_ourether(struct ifnet *ifp, u_int8_t *ena)
1344 : {
1345 0 : struct srpl *cif = &ifp->if_carp;
1346 : struct carp_softc *vh;
1347 :
1348 0 : KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */
1349 :
1350 0 : if (SRPL_EMPTY_LOCKED(cif))
1351 0 : return (0);
1352 :
1353 0 : KASSERT(ifp->if_type == IFT_ETHER);
1354 :
1355 0 : SRPL_FOREACH_LOCKED(vh, cif, sc_list) {
1356 : struct carp_vhost_entry *vhe;
1357 0 : if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
1358 : (IFF_UP|IFF_RUNNING))
1359 0 : continue;
1360 0 : vhe = SRPL_FIRST_LOCKED(&vh->carp_vhosts);
1361 0 : if ((vhe->state == MASTER || vh->sc_balancing >= CARP_BAL_IP) &&
1362 0 : !memcmp(ena, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN))
1363 0 : return (1);
1364 0 : }
1365 0 : return (0);
1366 0 : }
1367 :
1368 : int
1369 0 : carp_vhe_match(struct carp_softc *sc, uint8_t *ena)
1370 : {
1371 : struct carp_vhost_entry *vhe;
1372 0 : struct srp_ref sr;
1373 : int match = 0;
1374 :
1375 0 : vhe = SRPL_FIRST(&sr, &sc->carp_vhosts);
1376 0 : match = (vhe->state == MASTER || sc->sc_balancing >= CARP_BAL_IP) &&
1377 0 : !memcmp(ena, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
1378 0 : SRPL_LEAVE(&sr);
1379 :
1380 0 : return (match);
1381 0 : }
1382 :
1383 : int
1384 0 : carp_input(struct ifnet *ifp0, struct mbuf *m, void *cookie)
1385 : {
1386 : struct ether_header *eh;
1387 0 : struct mbuf_list ml = MBUF_LIST_INITIALIZER();
1388 : struct srpl *cif;
1389 : struct carp_softc *sc;
1390 0 : struct srp_ref sr;
1391 :
1392 : #if NVLAN > 0
1393 : /*
1394 : * If the underlying interface removed the VLAN header itself,
1395 : * it's not for us.
1396 : */
1397 0 : if (ISSET(m->m_flags, M_VLANTAG))
1398 0 : return (0);
1399 : #endif
1400 :
1401 0 : eh = mtod(m, struct ether_header *);
1402 0 : cif = &ifp0->if_carp;
1403 :
1404 0 : SRPL_FOREACH(sc, &sr, cif, sc_list) {
1405 0 : if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
1406 : (IFF_UP|IFF_RUNNING))
1407 : continue;
1408 :
1409 0 : if (carp_vhe_match(sc, eh->ether_dhost)) {
1410 : /*
1411 : * These packets look like layer 2 multicast but they
1412 : * are unicast at layer 3. With help of the tag the
1413 : * mbuf's M_MCAST flag can be removed by carp_lsdrop()
1414 : * after we have passed layer 2.
1415 : */
1416 0 : if (sc->sc_balancing == CARP_BAL_IP) {
1417 : struct m_tag *mtag;
1418 0 : mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0,
1419 : M_NOWAIT);
1420 0 : if (mtag == NULL) {
1421 0 : m_freem(m);
1422 0 : goto out;
1423 : }
1424 0 : m_tag_prepend(m, mtag);
1425 0 : }
1426 : break;
1427 : }
1428 : }
1429 :
1430 0 : if (sc == NULL) {
1431 0 : SRPL_LEAVE(&sr);
1432 :
1433 0 : if (!ETHER_IS_MULTICAST(eh->ether_dhost))
1434 0 : return (0);
1435 :
1436 : /*
1437 : * XXX Should really check the list of multicast addresses
1438 : * for each CARP interface _before_ copying.
1439 : */
1440 0 : SRPL_FOREACH(sc, &sr, cif, sc_list) {
1441 : struct mbuf *m0;
1442 :
1443 0 : if (!(sc->sc_if.if_flags & IFF_UP))
1444 0 : continue;
1445 :
1446 0 : m0 = m_dup_pkt(m, ETHER_ALIGN, M_DONTWAIT);
1447 0 : if (m0 == NULL)
1448 0 : continue;
1449 :
1450 0 : ml_init(&ml);
1451 0 : ml_enqueue(&ml, m0);
1452 :
1453 0 : if_input(&sc->sc_if, &ml);
1454 0 : }
1455 0 : SRPL_LEAVE(&sr);
1456 :
1457 0 : return (0);
1458 : }
1459 :
1460 0 : ml_enqueue(&ml, m);
1461 0 : if_input(&sc->sc_if, &ml);
1462 : out:
1463 0 : SRPL_LEAVE(&sr);
1464 :
1465 0 : return (1);
1466 0 : }
1467 :
1468 : int
1469 0 : carp_lsdrop(struct ifnet *ifp, struct mbuf *m, sa_family_t af, u_int32_t *src,
1470 : u_int32_t *dst, int drop)
1471 : {
1472 : struct carp_softc *sc;
1473 : u_int32_t fold;
1474 : struct m_tag *mtag;
1475 :
1476 0 : if (ifp->if_type != IFT_CARP)
1477 0 : return 0;
1478 0 : sc = ifp->if_softc;
1479 0 : if (sc->sc_balancing == CARP_BAL_NONE)
1480 0 : return 0;
1481 :
1482 : /*
1483 : * Remove M_MCAST flag from mbuf of balancing ip traffic, since the fact
1484 : * that it is layer 2 multicast does not implicate that it is also layer
1485 : * 3 multicast.
1486 : */
1487 0 : if (m->m_flags & M_MCAST &&
1488 0 : (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) {
1489 0 : m_tag_delete(m, mtag);
1490 0 : m->m_flags &= ~M_MCAST;
1491 0 : }
1492 :
1493 : /*
1494 : * Return without making a drop decision. This allows to clear the
1495 : * M_MCAST flag and do nothing else.
1496 : */
1497 0 : if (!drop)
1498 0 : return 0;
1499 :
1500 : /*
1501 : * Never drop carp advertisements.
1502 : * XXX Bad idea to pass all broadcast / multicast traffic?
1503 : */
1504 0 : if (m->m_flags & (M_BCAST|M_MCAST))
1505 0 : return 0;
1506 :
1507 0 : fold = src[0] ^ dst[0];
1508 : #ifdef INET6
1509 0 : if (af == AF_INET6) {
1510 : int i;
1511 0 : for (i = 1; i < 4; i++)
1512 0 : fold ^= src[i] ^ dst[i];
1513 0 : }
1514 : #endif
1515 0 : if (sc->sc_lscount == 0) /* just to be safe */
1516 0 : return 1;
1517 :
1518 0 : return ((1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask) == 0;
1519 0 : }
1520 :
1521 : void
1522 0 : carp_timer_down(void *v)
1523 : {
1524 0 : NET_LOCK();
1525 0 : carp_master_down(v);
1526 0 : NET_UNLOCK();
1527 0 : }
1528 :
1529 : void
1530 0 : carp_master_down(struct carp_vhost_entry *vhe)
1531 : {
1532 0 : struct carp_softc *sc = vhe->parent_sc;
1533 :
1534 0 : NET_ASSERT_LOCKED();
1535 :
1536 0 : switch (vhe->state) {
1537 : case INIT:
1538 0 : printf("%s: master_down event in INIT state\n",
1539 0 : sc->sc_if.if_xname);
1540 0 : break;
1541 : case MASTER:
1542 : break;
1543 : case BACKUP:
1544 0 : carp_set_state(vhe, MASTER);
1545 0 : carp_send_ad(vhe);
1546 0 : if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) {
1547 0 : carp_send_arp(sc);
1548 : /* Schedule a delayed ARP to deal w/ some L3 switches */
1549 0 : sc->sc_delayed_arp = 2;
1550 : #ifdef INET6
1551 0 : carp_send_na(sc);
1552 : #endif /* INET6 */
1553 0 : }
1554 0 : carp_setrun(vhe, 0);
1555 0 : carpstat_inc(carps_preempt);
1556 0 : break;
1557 : }
1558 0 : }
1559 :
1560 : void
1561 0 : carp_setrun_all(struct carp_softc *sc, sa_family_t af)
1562 : {
1563 : struct carp_vhost_entry *vhe;
1564 :
1565 0 : KERNEL_ASSERT_LOCKED(); /* touching carp_vhost */
1566 0 : SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
1567 0 : carp_setrun(vhe, af);
1568 : }
1569 0 : }
1570 :
1571 : /*
1572 : * When in backup state, af indicates whether to reset the master down timer
1573 : * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1574 : */
1575 : void
1576 0 : carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af)
1577 : {
1578 0 : struct timeval tv;
1579 0 : struct carp_softc *sc = vhe->parent_sc;
1580 :
1581 0 : if (sc->sc_carpdev == NULL) {
1582 0 : sc->sc_if.if_flags &= ~IFF_RUNNING;
1583 0 : carp_set_state_all(sc, INIT);
1584 0 : return;
1585 : }
1586 :
1587 0 : if (memcmp(((struct arpcom *)sc->sc_carpdev)->ac_enaddr,
1588 0 : sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN) == 0)
1589 0 : sc->sc_realmac = 1;
1590 : else
1591 0 : sc->sc_realmac = 0;
1592 :
1593 0 : if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 &&
1594 0 : (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
1595 0 : sc->sc_if.if_flags |= IFF_RUNNING;
1596 : } else {
1597 0 : sc->sc_if.if_flags &= ~IFF_RUNNING;
1598 0 : return;
1599 : }
1600 :
1601 0 : switch (vhe->state) {
1602 : case INIT:
1603 0 : carp_set_state(vhe, BACKUP);
1604 0 : carp_setrun(vhe, 0);
1605 0 : break;
1606 : case BACKUP:
1607 0 : timeout_del(&vhe->ad_tmo);
1608 0 : tv.tv_sec = 3 * sc->sc_advbase;
1609 0 : if (sc->sc_advbase == 0 && vhe->advskew == 0)
1610 0 : tv.tv_usec = 3 * 1000000 / 256;
1611 0 : else if (sc->sc_advbase == 0)
1612 0 : tv.tv_usec = 3 * vhe->advskew * 1000000 / 256;
1613 : else
1614 0 : tv.tv_usec = vhe->advskew * 1000000 / 256;
1615 0 : if (vhe->vhe_leader)
1616 0 : sc->sc_delayed_arp = -1;
1617 0 : switch (af) {
1618 : case AF_INET:
1619 0 : timeout_add(&vhe->md_tmo, tvtohz(&tv));
1620 0 : break;
1621 : #ifdef INET6
1622 : case AF_INET6:
1623 0 : timeout_add(&vhe->md6_tmo, tvtohz(&tv));
1624 0 : break;
1625 : #endif /* INET6 */
1626 : default:
1627 0 : if (sc->sc_naddrs)
1628 0 : timeout_add(&vhe->md_tmo, tvtohz(&tv));
1629 0 : if (sc->sc_naddrs6)
1630 0 : timeout_add(&vhe->md6_tmo, tvtohz(&tv));
1631 : break;
1632 : }
1633 : break;
1634 : case MASTER:
1635 0 : tv.tv_sec = sc->sc_advbase;
1636 0 : if (sc->sc_advbase == 0 && vhe->advskew == 0)
1637 0 : tv.tv_usec = 1 * 1000000 / 256;
1638 : else
1639 0 : tv.tv_usec = vhe->advskew * 1000000 / 256;
1640 0 : timeout_add(&vhe->ad_tmo, tvtohz(&tv));
1641 0 : break;
1642 : }
1643 0 : }
1644 :
1645 : void
1646 0 : carp_multicast_cleanup(struct carp_softc *sc)
1647 : {
1648 0 : struct ip_moptions *imo = &sc->sc_imo;
1649 : #ifdef INET6
1650 0 : struct ip6_moptions *im6o = &sc->sc_im6o;
1651 : #endif
1652 0 : u_int16_t n = imo->imo_num_memberships;
1653 :
1654 : /* Clean up our own multicast memberships */
1655 0 : while (n-- > 0) {
1656 0 : if (imo->imo_membership[n] != NULL) {
1657 0 : in_delmulti(imo->imo_membership[n]);
1658 0 : imo->imo_membership[n] = NULL;
1659 0 : }
1660 : }
1661 0 : imo->imo_num_memberships = 0;
1662 0 : imo->imo_ifidx = 0;
1663 :
1664 : #ifdef INET6
1665 0 : while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1666 : struct in6_multi_mship *imm =
1667 : LIST_FIRST(&im6o->im6o_memberships);
1668 :
1669 0 : LIST_REMOVE(imm, i6mm_chain);
1670 0 : in6_leavegroup(imm);
1671 : }
1672 0 : im6o->im6o_ifidx = 0;
1673 : #endif
1674 :
1675 : /* And any other multicast memberships */
1676 0 : carp_ether_purgemulti(sc);
1677 0 : }
1678 :
1679 : int
1680 0 : carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp0)
1681 : {
1682 : struct srpl *cif;
1683 : struct carp_softc *vr, *last = NULL, *after = NULL;
1684 : int myself = 0, error = 0;
1685 :
1686 0 : KASSERT(ifp0 != sc->sc_carpdev);
1687 0 : KERNEL_ASSERT_LOCKED(); /* touching if_carp */
1688 :
1689 0 : if ((ifp0->if_flags & IFF_MULTICAST) == 0)
1690 0 : return (EADDRNOTAVAIL);
1691 :
1692 0 : if (ifp0->if_type != IFT_ETHER)
1693 0 : return (EINVAL);
1694 :
1695 0 : sc->dh_cookie = hook_establish(ifp0->if_detachhooks, 0,
1696 0 : carpdetach, sc);
1697 0 : if (sc->dh_cookie == NULL)
1698 0 : return (ENOMEM);
1699 :
1700 0 : sc->lh_cookie = hook_establish(ifp0->if_linkstatehooks, 1,
1701 0 : carp_carpdev_state, ifp0);
1702 0 : if (sc->lh_cookie == NULL) {
1703 : error = ENOMEM;
1704 0 : goto rm_dh;
1705 : }
1706 :
1707 0 : cif = &ifp0->if_carp;
1708 0 : if (SRPL_EMPTY_LOCKED(cif)) {
1709 0 : if ((error = ifpromisc(ifp0, 1)))
1710 : goto rm_lh;
1711 :
1712 0 : } else if (carp_check_dup_vhids(sc, cif, NULL)) {
1713 : error = EINVAL;
1714 0 : goto rm_lh;
1715 : }
1716 :
1717 : /* detach from old interface */
1718 0 : if (sc->sc_carpdev != NULL)
1719 0 : carpdetach(sc);
1720 :
1721 : /* attach carp interface to physical interface */
1722 0 : sc->sc_carpdev = ifp0;
1723 0 : sc->sc_if.if_capabilities = ifp0->if_capabilities &
1724 : IFCAP_CSUM_MASK;
1725 :
1726 0 : SRPL_FOREACH_LOCKED(vr, cif, sc_list) {
1727 : struct carp_vhost_entry *vrhead, *schead;
1728 : last = vr;
1729 :
1730 0 : if (vr == sc)
1731 0 : myself = 1;
1732 :
1733 0 : vrhead = SRPL_FIRST_LOCKED(&vr->carp_vhosts);
1734 0 : schead = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
1735 0 : if (vrhead->vhid < schead->vhid)
1736 0 : after = vr;
1737 : }
1738 :
1739 0 : if (!myself) {
1740 : /* We're trying to keep things in order */
1741 0 : if (last == NULL) {
1742 0 : SRPL_INSERT_HEAD_LOCKED(&carp_sc_rc, cif,
1743 : sc, sc_list);
1744 0 : } else if (after == NULL) {
1745 0 : SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, last,
1746 : sc, sc_list);
1747 0 : } else {
1748 0 : SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, after,
1749 : sc, sc_list);
1750 : }
1751 : }
1752 0 : if (sc->sc_naddrs || sc->sc_naddrs6)
1753 0 : sc->sc_if.if_flags |= IFF_UP;
1754 0 : carp_set_enaddr(sc);
1755 :
1756 : /* Change input handler of the physical interface. */
1757 0 : if_ih_insert(ifp0, carp_input, NULL);
1758 :
1759 0 : carp_carpdev_state(ifp0);
1760 :
1761 0 : return (0);
1762 :
1763 : rm_lh:
1764 0 : hook_disestablish(ifp0->if_linkstatehooks, sc->lh_cookie);
1765 : rm_dh:
1766 0 : hook_disestablish(ifp0->if_detachhooks, sc->dh_cookie);
1767 :
1768 0 : return (error);
1769 0 : }
1770 :
1771 : void
1772 0 : carp_set_vhe_enaddr(struct carp_vhost_entry *vhe)
1773 : {
1774 0 : struct carp_softc *sc = vhe->parent_sc;
1775 :
1776 0 : if (vhe->vhid != 0 && sc->sc_carpdev) {
1777 0 : if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP)
1778 0 : vhe->vhe_enaddr[0] = 1;
1779 : else
1780 0 : vhe->vhe_enaddr[0] = 0;
1781 0 : vhe->vhe_enaddr[1] = 0;
1782 0 : vhe->vhe_enaddr[2] = 0x5e;
1783 0 : vhe->vhe_enaddr[3] = 0;
1784 0 : vhe->vhe_enaddr[4] = 1;
1785 0 : vhe->vhe_enaddr[5] = vhe->vhid;
1786 0 : } else
1787 0 : memset(vhe->vhe_enaddr, 0, ETHER_ADDR_LEN);
1788 0 : }
1789 :
1790 : void
1791 0 : carp_set_enaddr(struct carp_softc *sc)
1792 : {
1793 : struct carp_vhost_entry *vhe;
1794 :
1795 0 : KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1796 0 : SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries)
1797 0 : carp_set_vhe_enaddr(vhe);
1798 :
1799 0 : vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
1800 :
1801 : /*
1802 : * Use the carp lladdr if the running one isn't manually set.
1803 : * Only compare static parts of the lladdr.
1804 : */
1805 0 : if ((memcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1,
1806 0 : ETHER_ADDR_LEN - 2) == 0) ||
1807 0 : (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] &&
1808 0 : !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] &&
1809 0 : !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5]))
1810 0 : bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
1811 :
1812 : /* Make sure the enaddr has changed before further twiddling. */
1813 0 : if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) {
1814 0 : bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl),
1815 : ETHER_ADDR_LEN);
1816 0 : bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN);
1817 : #ifdef INET6
1818 : /*
1819 : * (re)attach a link-local address which matches
1820 : * our new MAC address.
1821 : */
1822 0 : if (sc->sc_naddrs6)
1823 0 : in6_ifattach_linklocal(&sc->sc_if, NULL);
1824 : #endif
1825 0 : carp_set_state_all(sc, INIT);
1826 0 : carp_setrun_all(sc, 0);
1827 0 : }
1828 0 : }
1829 :
1830 : void
1831 0 : carp_addr_updated(void *v)
1832 : {
1833 0 : struct carp_softc *sc = (struct carp_softc *) v;
1834 : struct ifaddr *ifa;
1835 : int new_naddrs = 0, new_naddrs6 = 0;
1836 :
1837 0 : TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1838 0 : if (ifa->ifa_addr->sa_family == AF_INET)
1839 0 : new_naddrs++;
1840 : #ifdef INET6
1841 0 : else if (ifa->ifa_addr->sa_family == AF_INET6)
1842 0 : new_naddrs6++;
1843 : #endif /* INET6 */
1844 : }
1845 :
1846 : /* We received address changes from if_addrhooks callback */
1847 0 : if (new_naddrs != sc->sc_naddrs || new_naddrs6 != sc->sc_naddrs6) {
1848 :
1849 0 : sc->sc_naddrs = new_naddrs;
1850 0 : sc->sc_naddrs6 = new_naddrs6;
1851 :
1852 : /* Re-establish multicast membership removed by in_control */
1853 0 : if (IN_MULTICAST(sc->sc_peer.s_addr)) {
1854 0 : if (!in_hasmulti(&sc->sc_peer, &sc->sc_if)) {
1855 : struct in_multi **imm =
1856 0 : sc->sc_imo.imo_membership;
1857 : u_int16_t maxmem =
1858 0 : sc->sc_imo.imo_max_memberships;
1859 :
1860 0 : memset(&sc->sc_imo, 0, sizeof(sc->sc_imo));
1861 0 : sc->sc_imo.imo_membership = imm;
1862 0 : sc->sc_imo.imo_max_memberships = maxmem;
1863 :
1864 0 : if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0)
1865 0 : carp_join_multicast(sc);
1866 0 : }
1867 : }
1868 :
1869 0 : if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
1870 0 : sc->sc_if.if_flags &= ~IFF_UP;
1871 0 : carp_set_state_all(sc, INIT);
1872 0 : } else
1873 0 : carp_hmac_prepare(sc);
1874 : }
1875 :
1876 0 : carp_setrun_all(sc, 0);
1877 0 : }
1878 :
1879 : int
1880 0 : carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1881 : {
1882 0 : struct in_addr *in = &sin->sin_addr;
1883 : int error;
1884 :
1885 0 : KASSERT(sc->sc_carpdev != NULL);
1886 :
1887 : /* XXX is this necessary? */
1888 0 : if (in->s_addr == INADDR_ANY) {
1889 0 : carp_setrun_all(sc, 0);
1890 0 : return (0);
1891 : }
1892 :
1893 0 : if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
1894 0 : return (error);
1895 :
1896 0 : carp_set_state_all(sc, INIT);
1897 :
1898 0 : return (0);
1899 0 : }
1900 :
1901 : int
1902 0 : carp_join_multicast(struct carp_softc *sc)
1903 : {
1904 0 : struct ip_moptions *imo = &sc->sc_imo;
1905 : struct in_multi *imm;
1906 0 : struct in_addr addr;
1907 :
1908 0 : if (!IN_MULTICAST(sc->sc_peer.s_addr))
1909 0 : return (0);
1910 :
1911 0 : addr.s_addr = sc->sc_peer.s_addr;
1912 0 : if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL)
1913 0 : return (ENOBUFS);
1914 :
1915 0 : imo->imo_membership[0] = imm;
1916 0 : imo->imo_num_memberships = 1;
1917 0 : imo->imo_ifidx = sc->sc_if.if_index;
1918 0 : imo->imo_ttl = CARP_DFLTTL;
1919 0 : imo->imo_loop = 0;
1920 0 : return (0);
1921 0 : }
1922 :
1923 :
1924 : #ifdef INET6
1925 : int
1926 0 : carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1927 : {
1928 : int error;
1929 :
1930 0 : KASSERT(sc->sc_carpdev != NULL);
1931 :
1932 0 : if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1933 0 : carp_setrun_all(sc, 0);
1934 0 : return (0);
1935 : }
1936 :
1937 0 : if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
1938 0 : return (error);
1939 :
1940 0 : carp_set_state_all(sc, INIT);
1941 :
1942 0 : return (0);
1943 0 : }
1944 :
1945 : int
1946 0 : carp_join_multicast6(struct carp_softc *sc)
1947 : {
1948 : struct in6_multi_mship *imm, *imm2;
1949 0 : struct ip6_moptions *im6o = &sc->sc_im6o;
1950 0 : struct sockaddr_in6 addr6;
1951 0 : int error;
1952 :
1953 : /* Join IPv6 CARP multicast group */
1954 0 : memset(&addr6, 0, sizeof(addr6));
1955 0 : addr6.sin6_family = AF_INET6;
1956 0 : addr6.sin6_len = sizeof(addr6);
1957 0 : addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1958 0 : addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1959 0 : addr6.sin6_addr.s6_addr8[15] = 0x12;
1960 0 : if ((imm = in6_joingroup(&sc->sc_if,
1961 0 : &addr6.sin6_addr, &error)) == NULL) {
1962 0 : return (error);
1963 : }
1964 : /* join solicited multicast address */
1965 0 : memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr));
1966 0 : addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1967 0 : addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1968 0 : addr6.sin6_addr.s6_addr32[1] = 0;
1969 0 : addr6.sin6_addr.s6_addr32[2] = htonl(1);
1970 0 : addr6.sin6_addr.s6_addr32[3] = 0;
1971 0 : addr6.sin6_addr.s6_addr8[12] = 0xff;
1972 0 : if ((imm2 = in6_joingroup(&sc->sc_if,
1973 0 : &addr6.sin6_addr, &error)) == NULL) {
1974 0 : in6_leavegroup(imm);
1975 0 : return (error);
1976 : }
1977 :
1978 : /* apply v6 multicast membership */
1979 0 : im6o->im6o_ifidx = sc->sc_if.if_index;
1980 0 : if (imm)
1981 0 : LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
1982 : i6mm_chain);
1983 0 : if (imm2)
1984 0 : LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
1985 : i6mm_chain);
1986 :
1987 0 : return (0);
1988 0 : }
1989 :
1990 : #endif /* INET6 */
1991 :
1992 : int
1993 0 : carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
1994 : {
1995 0 : struct proc *p = curproc; /* XXX */
1996 0 : struct carp_softc *sc = ifp->if_softc;
1997 : struct carp_vhost_entry *vhe;
1998 0 : struct carpreq carpr;
1999 0 : struct ifaddr *ifa = (struct ifaddr *)addr;
2000 0 : struct ifreq *ifr = (struct ifreq *)addr;
2001 0 : struct ifnet *ifp0 = sc->sc_carpdev;
2002 : int i, error = 0;
2003 :
2004 0 : switch (cmd) {
2005 : case SIOCSIFADDR:
2006 0 : if (ifp0 == NULL)
2007 0 : return (EINVAL);
2008 :
2009 0 : switch (ifa->ifa_addr->sa_family) {
2010 : case AF_INET:
2011 0 : sc->sc_if.if_flags |= IFF_UP;
2012 0 : error = carp_set_addr(sc, satosin(ifa->ifa_addr));
2013 0 : break;
2014 : #ifdef INET6
2015 : case AF_INET6:
2016 0 : sc->sc_if.if_flags |= IFF_UP;
2017 0 : error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
2018 0 : break;
2019 : #endif /* INET6 */
2020 : default:
2021 : error = EAFNOSUPPORT;
2022 0 : break;
2023 : }
2024 : break;
2025 :
2026 : case SIOCSIFFLAGS:
2027 0 : KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2028 0 : vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
2029 0 : if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) {
2030 0 : carp_del_all_timeouts(sc);
2031 :
2032 : /* we need the interface up to bow out */
2033 0 : sc->sc_if.if_flags |= IFF_UP;
2034 0 : sc->sc_bow_out = 1;
2035 0 : carp_vhe_send_ad_all(sc);
2036 0 : sc->sc_bow_out = 0;
2037 :
2038 0 : sc->sc_if.if_flags &= ~IFF_UP;
2039 0 : carp_set_state_all(sc, INIT);
2040 0 : carp_setrun_all(sc, 0);
2041 0 : } else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) {
2042 0 : sc->sc_if.if_flags |= IFF_UP;
2043 0 : carp_setrun_all(sc, 0);
2044 0 : }
2045 : break;
2046 :
2047 : case SIOCSVH:
2048 0 : KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2049 0 : vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
2050 0 : if ((error = suser(p)) != 0)
2051 : break;
2052 0 : if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
2053 : break;
2054 : error = 1;
2055 0 : if (carpr.carpr_carpdev[0] != '\0' &&
2056 0 : (ifp0 = ifunit(carpr.carpr_carpdev)) == NULL)
2057 0 : return (EINVAL);
2058 0 : if (carpr.carpr_peer.s_addr == 0)
2059 0 : sc->sc_peer.s_addr = INADDR_CARP_GROUP;
2060 : else
2061 0 : sc->sc_peer.s_addr = carpr.carpr_peer.s_addr;
2062 0 : if (ifp0 != sc->sc_carpdev) {
2063 0 : if ((error = carp_set_ifp(sc, ifp0)))
2064 0 : return (error);
2065 : }
2066 0 : if (vhe->state != INIT && carpr.carpr_state != vhe->state) {
2067 0 : switch (carpr.carpr_state) {
2068 : case BACKUP:
2069 0 : timeout_del(&vhe->ad_tmo);
2070 0 : carp_set_state_all(sc, BACKUP);
2071 0 : carp_setrun_all(sc, 0);
2072 0 : break;
2073 : case MASTER:
2074 0 : KERNEL_ASSERT_LOCKED();
2075 : /* touching carp_vhosts */
2076 0 : SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts,
2077 : vhost_entries)
2078 0 : carp_master_down(vhe);
2079 : break;
2080 : default:
2081 : break;
2082 : }
2083 : }
2084 0 : if ((error = carp_vhids_ioctl(sc, &carpr)))
2085 0 : return (error);
2086 0 : if (carpr.carpr_advbase >= 0) {
2087 0 : if (carpr.carpr_advbase > 255) {
2088 : error = EINVAL;
2089 0 : break;
2090 : }
2091 0 : sc->sc_advbase = carpr.carpr_advbase;
2092 0 : error--;
2093 0 : }
2094 0 : if (memcmp(sc->sc_advskews, carpr.carpr_advskews,
2095 : sizeof(sc->sc_advskews))) {
2096 : i = 0;
2097 0 : KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2098 0 : SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts,
2099 : vhost_entries)
2100 0 : vhe->advskew = carpr.carpr_advskews[i++];
2101 0 : bcopy(carpr.carpr_advskews, sc->sc_advskews,
2102 : sizeof(sc->sc_advskews));
2103 0 : }
2104 0 : if (sc->sc_balancing != carpr.carpr_balancing) {
2105 0 : if (carpr.carpr_balancing > CARP_BAL_MAXID) {
2106 : error = EINVAL;
2107 0 : break;
2108 : }
2109 0 : sc->sc_balancing = carpr.carpr_balancing;
2110 0 : carp_set_enaddr(sc);
2111 0 : carp_update_lsmask(sc);
2112 0 : }
2113 0 : bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
2114 0 : if (error > 0)
2115 0 : error = EINVAL;
2116 : else {
2117 : error = 0;
2118 0 : carp_hmac_prepare(sc);
2119 0 : carp_setrun_all(sc, 0);
2120 : }
2121 : break;
2122 :
2123 : case SIOCGVH:
2124 0 : memset(&carpr, 0, sizeof(carpr));
2125 0 : if (ifp0 != NULL)
2126 0 : strlcpy(carpr.carpr_carpdev, ifp0->if_xname, IFNAMSIZ);
2127 : i = 0;
2128 0 : KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2129 0 : SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
2130 0 : carpr.carpr_vhids[i] = vhe->vhid;
2131 0 : carpr.carpr_advskews[i] = vhe->advskew;
2132 0 : carpr.carpr_states[i] = vhe->state;
2133 0 : i++;
2134 : }
2135 0 : carpr.carpr_advbase = sc->sc_advbase;
2136 0 : carpr.carpr_balancing = sc->sc_balancing;
2137 0 : if (suser(p) == 0)
2138 0 : bcopy(sc->sc_key, carpr.carpr_key,
2139 : sizeof(carpr.carpr_key));
2140 0 : carpr.carpr_peer.s_addr = sc->sc_peer.s_addr;
2141 0 : error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2142 0 : break;
2143 :
2144 : case SIOCADDMULTI:
2145 0 : error = carp_ether_addmulti(sc, ifr);
2146 0 : break;
2147 :
2148 : case SIOCDELMULTI:
2149 0 : error = carp_ether_delmulti(sc, ifr);
2150 0 : break;
2151 : case SIOCAIFGROUP:
2152 : case SIOCDIFGROUP:
2153 0 : if (sc->sc_demote_cnt)
2154 0 : carp_ifgroup_ioctl(ifp, cmd, addr);
2155 : break;
2156 : case SIOCSIFGATTR:
2157 0 : carp_ifgattr_ioctl(ifp, cmd, addr);
2158 0 : break;
2159 : default:
2160 : error = ENOTTY;
2161 0 : }
2162 :
2163 0 : if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0)
2164 0 : carp_set_enaddr(sc);
2165 0 : return (error);
2166 0 : }
2167 :
2168 : int
2169 0 : carp_check_dup_vhids(struct carp_softc *sc, struct srpl *cif,
2170 : struct carpreq *carpr)
2171 : {
2172 : struct carp_softc *vr;
2173 : struct carp_vhost_entry *vhe, *vhe0;
2174 : int i;
2175 :
2176 0 : KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */
2177 :
2178 0 : SRPL_FOREACH_LOCKED(vr, cif, sc_list) {
2179 0 : if (vr == sc)
2180 : continue;
2181 0 : SRPL_FOREACH_LOCKED(vhe, &vr->carp_vhosts, vhost_entries) {
2182 0 : if (carpr) {
2183 0 : for (i = 0; carpr->carpr_vhids[i]; i++) {
2184 0 : if (vhe->vhid == carpr->carpr_vhids[i])
2185 0 : return (EINVAL);
2186 : }
2187 : }
2188 0 : SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts,
2189 : vhost_entries) {
2190 0 : if (vhe->vhid == vhe0->vhid)
2191 0 : return (EINVAL);
2192 : }
2193 : }
2194 : }
2195 0 : return (0);
2196 0 : }
2197 :
2198 : int
2199 0 : carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr)
2200 : {
2201 : int i, j;
2202 0 : u_int8_t taken_vhids[256];
2203 :
2204 0 : if (carpr->carpr_vhids[0] == 0 ||
2205 0 : !memcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids)))
2206 0 : return (0);
2207 :
2208 0 : memset(taken_vhids, 0, sizeof(taken_vhids));
2209 0 : for (i = 0; carpr->carpr_vhids[i]; i++) {
2210 0 : if (taken_vhids[carpr->carpr_vhids[i]])
2211 0 : return (EINVAL);
2212 0 : taken_vhids[carpr->carpr_vhids[i]] = 1;
2213 :
2214 0 : if (sc->sc_carpdev) {
2215 : struct srpl *cif;
2216 0 : cif = &sc->sc_carpdev->if_carp;
2217 0 : if (carp_check_dup_vhids(sc, cif, carpr))
2218 0 : return (EINVAL);
2219 0 : }
2220 0 : if (carpr->carpr_advskews[i] >= 255)
2221 0 : return (EINVAL);
2222 : }
2223 : /* set sane balancing defaults */
2224 0 : if (i <= 1)
2225 0 : carpr->carpr_balancing = CARP_BAL_NONE;
2226 0 : else if (carpr->carpr_balancing == CARP_BAL_NONE &&
2227 0 : sc->sc_balancing == CARP_BAL_NONE)
2228 0 : carpr->carpr_balancing = CARP_BAL_IP;
2229 :
2230 : /* destroy all */
2231 0 : carp_del_all_timeouts(sc);
2232 0 : carp_destroy_vhosts(sc);
2233 0 : memset(sc->sc_vhids, 0, sizeof(sc->sc_vhids));
2234 :
2235 : /* sort vhosts list by vhid */
2236 0 : for (j = 1; j <= 255; j++) {
2237 0 : for (i = 0; carpr->carpr_vhids[i]; i++) {
2238 0 : if (carpr->carpr_vhids[i] != j)
2239 : continue;
2240 0 : if (carp_new_vhost(sc, carpr->carpr_vhids[i],
2241 0 : carpr->carpr_advskews[i]))
2242 0 : return (ENOMEM);
2243 0 : sc->sc_vhids[i] = carpr->carpr_vhids[i];
2244 0 : sc->sc_advskews[i] = carpr->carpr_advskews[i];
2245 0 : }
2246 : }
2247 0 : carp_set_enaddr(sc);
2248 0 : carp_set_state_all(sc, INIT);
2249 0 : return (0);
2250 0 : }
2251 :
2252 : void
2253 0 : carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2254 : {
2255 0 : struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2256 : struct ifg_list *ifgl;
2257 : int *dm, adj;
2258 :
2259 0 : if (!strcmp(ifgr->ifgr_group, IFG_ALL))
2260 0 : return;
2261 0 : adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
2262 0 : if (cmd == SIOCDIFGROUP)
2263 0 : adj = adj * -1;
2264 :
2265 0 : TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2266 0 : if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) {
2267 0 : dm = &ifgl->ifgl_group->ifg_carp_demoted;
2268 0 : if (*dm + adj >= 0)
2269 0 : *dm += adj;
2270 : else
2271 0 : *dm = 0;
2272 : }
2273 0 : }
2274 :
2275 : void
2276 0 : carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2277 : {
2278 0 : struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2279 0 : struct carp_softc *sc = ifp->if_softc;
2280 :
2281 0 : if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags &
2282 0 : (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))
2283 0 : carp_vhe_send_ad_all(sc);
2284 0 : }
2285 :
2286 : void
2287 0 : carp_start(struct ifnet *ifp)
2288 : {
2289 0 : struct carp_softc *sc = ifp->if_softc;
2290 : struct mbuf *m;
2291 :
2292 0 : for (;;) {
2293 0 : IFQ_DEQUEUE(&ifp->if_snd, m);
2294 0 : if (m == NULL)
2295 : break;
2296 :
2297 : #if NBPFILTER > 0
2298 0 : if (ifp->if_bpf)
2299 0 : bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
2300 : #endif /* NBPFILTER > 0 */
2301 :
2302 0 : if ((ifp->if_carpdev->if_flags & (IFF_UP|IFF_RUNNING)) !=
2303 : (IFF_UP|IFF_RUNNING)) {
2304 0 : ifp->if_oerrors++;
2305 0 : m_freem(m);
2306 0 : continue;
2307 : }
2308 :
2309 : /*
2310 : * Do not leak the multicast address when sending
2311 : * advertisements in 'ip' and 'ip-stealth' balacing
2312 : * modes.
2313 : */
2314 0 : if (sc->sc_balancing == CARP_BAL_IP ||
2315 0 : sc->sc_balancing == CARP_BAL_IPSTEALTH) {
2316 : struct ether_header *eh;
2317 : uint8_t *esrc;
2318 :
2319 0 : eh = mtod(m, struct ether_header *);
2320 0 : esrc = ((struct arpcom*)ifp->if_carpdev)->ac_enaddr;
2321 0 : memcpy(eh->ether_shost, esrc, sizeof(eh->ether_shost));
2322 0 : }
2323 :
2324 0 : if (if_enqueue(ifp->if_carpdev, m)) {
2325 0 : ifp->if_oerrors++;
2326 0 : continue;
2327 : }
2328 0 : ifp->if_opackets++;
2329 : }
2330 0 : }
2331 :
2332 : int
2333 0 : carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2334 : struct rtentry *rt)
2335 : {
2336 0 : struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
2337 : struct carp_vhost_entry *vhe;
2338 0 : struct srp_ref sr;
2339 : int ismaster;
2340 :
2341 : /*
2342 : * If the parent of this carp(4) got destroyed while
2343 : * `m' was being processed, silently drop it.
2344 : */
2345 0 : if (sc->sc_carpdev == NULL) {
2346 0 : m_freem(m);
2347 0 : return (0);
2348 : }
2349 :
2350 0 : if (sc->cur_vhe == NULL) {
2351 0 : vhe = SRPL_FIRST(&sr, &sc->carp_vhosts);
2352 0 : ismaster = (vhe->state == MASTER);
2353 0 : SRPL_LEAVE(&sr);
2354 0 : } else {
2355 0 : ismaster = (sc->cur_vhe->state == MASTER);
2356 : }
2357 :
2358 0 : if ((sc->sc_balancing == CARP_BAL_NONE && !ismaster)) {
2359 0 : m_freem(m);
2360 0 : return (ENETUNREACH);
2361 : }
2362 :
2363 0 : return (ether_output(ifp, m, sa, rt));
2364 0 : }
2365 :
2366 : void
2367 0 : carp_set_state_all(struct carp_softc *sc, int state)
2368 : {
2369 : struct carp_vhost_entry *vhe;
2370 :
2371 0 : KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2372 :
2373 0 : SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
2374 0 : if (vhe->state == state)
2375 : continue;
2376 :
2377 0 : carp_set_state(vhe, state);
2378 0 : }
2379 0 : }
2380 :
2381 : void
2382 0 : carp_set_state(struct carp_vhost_entry *vhe, int state)
2383 : {
2384 0 : struct carp_softc *sc = vhe->parent_sc;
2385 : static const char *carp_states[] = { CARP_STATES };
2386 : int loglevel;
2387 : struct carp_vhost_entry *vhe0;
2388 :
2389 0 : KASSERT(vhe->state != state);
2390 :
2391 0 : if (vhe->state == INIT || state == INIT)
2392 0 : loglevel = LOG_WARNING;
2393 : else
2394 : loglevel = LOG_CRIT;
2395 :
2396 0 : if (sc->sc_vhe_count > 1)
2397 0 : CARP_LOG(loglevel, sc,
2398 : ("state transition (vhid %d): %s -> %s", vhe->vhid,
2399 : carp_states[vhe->state], carp_states[state]));
2400 : else
2401 0 : CARP_LOG(loglevel, sc,
2402 : ("state transition: %s -> %s",
2403 : carp_states[vhe->state], carp_states[state]));
2404 :
2405 0 : vhe->state = state;
2406 0 : carp_update_lsmask(sc);
2407 :
2408 0 : KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2409 :
2410 0 : sc->sc_if.if_link_state = LINK_STATE_INVALID;
2411 0 : SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) {
2412 : /*
2413 : * Link must be up if at least one vhe is in state MASTER to
2414 : * bring or keep route up.
2415 : */
2416 0 : if (vhe0->state == MASTER) {
2417 0 : sc->sc_if.if_link_state = LINK_STATE_UP;
2418 0 : break;
2419 0 : } else if (vhe0->state == BACKUP) {
2420 0 : sc->sc_if.if_link_state = LINK_STATE_DOWN;
2421 0 : }
2422 : }
2423 0 : if_link_state_change(&sc->sc_if);
2424 0 : }
2425 :
2426 : void
2427 0 : carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason)
2428 : {
2429 : struct ifg_list *ifgl;
2430 : int *dm, need_ad;
2431 : struct carp_softc *nil = NULL;
2432 :
2433 0 : if (ifp->if_type == IFT_CARP) {
2434 0 : dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
2435 0 : if (*dm + adj >= 0)
2436 0 : *dm += adj;
2437 : else
2438 0 : *dm = 0;
2439 : }
2440 :
2441 : need_ad = 0;
2442 0 : TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2443 0 : if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2444 : continue;
2445 0 : dm = &ifgl->ifgl_group->ifg_carp_demoted;
2446 :
2447 0 : if (*dm + adj >= 0)
2448 0 : *dm += adj;
2449 : else
2450 0 : *dm = 0;
2451 :
2452 0 : if (adj > 0 && *dm == 1)
2453 0 : need_ad = 1;
2454 0 : CARP_LOG(LOG_ERR, nil,
2455 : ("%s demoted group %s by %d to %d (%s)",
2456 : ifp->if_xname, ifgl->ifgl_group->ifg_group,
2457 : adj, *dm, reason));
2458 : }
2459 0 : if (need_ad)
2460 0 : carp_send_ad_all();
2461 0 : }
2462 :
2463 : int
2464 0 : carp_group_demote_count(struct carp_softc *sc)
2465 : {
2466 : struct ifg_list *ifgl;
2467 : int count = 0;
2468 :
2469 0 : TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next)
2470 0 : count += ifgl->ifgl_group->ifg_carp_demoted;
2471 :
2472 0 : if (count == 0 && sc->sc_demote_cnt)
2473 0 : count = sc->sc_demote_cnt;
2474 :
2475 0 : return (count > 255 ? 255 : count);
2476 : }
2477 :
2478 : void
2479 0 : carp_carpdev_state(void *v)
2480 : {
2481 : struct srpl *cif;
2482 : struct carp_softc *sc;
2483 0 : struct ifnet *ifp0 = v;
2484 :
2485 0 : if (ifp0->if_type != IFT_ETHER)
2486 0 : return;
2487 :
2488 0 : cif = &ifp0->if_carp;
2489 :
2490 0 : KERNEL_ASSERT_LOCKED(); /* touching if_carp */
2491 :
2492 0 : SRPL_FOREACH_LOCKED(sc, cif, sc_list) {
2493 0 : int suppressed = sc->sc_suppress;
2494 :
2495 0 : if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN ||
2496 0 : !(sc->sc_carpdev->if_flags & IFF_UP)) {
2497 0 : sc->sc_if.if_flags &= ~IFF_RUNNING;
2498 0 : carp_del_all_timeouts(sc);
2499 0 : carp_set_state_all(sc, INIT);
2500 0 : sc->sc_suppress = 1;
2501 0 : carp_setrun_all(sc, 0);
2502 0 : if (!suppressed)
2503 0 : carp_group_demote_adj(&sc->sc_if, 1, "carpdev");
2504 0 : } else if (suppressed) {
2505 0 : carp_set_state_all(sc, INIT);
2506 0 : sc->sc_suppress = 0;
2507 0 : carp_setrun_all(sc, 0);
2508 0 : carp_group_demote_adj(&sc->sc_if, -1, "carpdev");
2509 0 : }
2510 : }
2511 0 : }
2512 :
2513 : int
2514 0 : carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
2515 : {
2516 : struct ifnet *ifp0;
2517 : struct carp_mc_entry *mc;
2518 0 : u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2519 : int error;
2520 :
2521 0 : ifp0 = sc->sc_carpdev;
2522 0 : if (ifp0 == NULL)
2523 0 : return (EINVAL);
2524 :
2525 0 : error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2526 0 : if (error != ENETRESET)
2527 0 : return (error);
2528 :
2529 : /*
2530 : * This is new multicast address. We have to tell parent
2531 : * about it. Also, remember this multicast address so that
2532 : * we can delete them on unconfigure.
2533 : */
2534 0 : mc = malloc(sizeof(*mc), M_DEVBUF, M_NOWAIT);
2535 0 : if (mc == NULL) {
2536 : error = ENOMEM;
2537 0 : goto alloc_failed;
2538 : }
2539 :
2540 : /*
2541 : * As ether_addmulti() returns ENETRESET, following two
2542 : * statement shouldn't fail.
2543 : */
2544 0 : (void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
2545 0 : ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm);
2546 0 : memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len);
2547 0 : LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
2548 :
2549 0 : error = (*ifp0->if_ioctl)(ifp0, SIOCADDMULTI, (caddr_t)ifr);
2550 0 : if (error != 0)
2551 : goto ioctl_failed;
2552 :
2553 0 : return (error);
2554 :
2555 : ioctl_failed:
2556 0 : LIST_REMOVE(mc, mc_entries);
2557 0 : free(mc, M_DEVBUF, sizeof(*mc));
2558 : alloc_failed:
2559 0 : (void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2560 :
2561 0 : return (error);
2562 0 : }
2563 :
2564 : int
2565 0 : carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
2566 : {
2567 : struct ifnet *ifp0;
2568 : struct ether_multi *enm;
2569 : struct carp_mc_entry *mc;
2570 0 : u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2571 : int error;
2572 :
2573 0 : ifp0 = sc->sc_carpdev;
2574 0 : if (ifp0 == NULL)
2575 0 : return (EINVAL);
2576 :
2577 : /*
2578 : * Find a key to lookup carp_mc_entry. We have to do this
2579 : * before calling ether_delmulti for obvious reason.
2580 : */
2581 0 : if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0)
2582 0 : return (error);
2583 0 : ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm);
2584 0 : if (enm == NULL)
2585 0 : return (EINVAL);
2586 :
2587 0 : LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
2588 0 : if (mc->mc_enm == enm)
2589 : break;
2590 :
2591 : /* We won't delete entries we didn't add */
2592 0 : if (mc == NULL)
2593 0 : return (EINVAL);
2594 :
2595 0 : error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2596 0 : if (error != ENETRESET)
2597 0 : return (error);
2598 :
2599 : /* We no longer use this multicast address. Tell parent so. */
2600 0 : error = (*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr);
2601 0 : if (error == 0) {
2602 : /* And forget about this address. */
2603 0 : LIST_REMOVE(mc, mc_entries);
2604 0 : free(mc, M_DEVBUF, sizeof(*mc));
2605 0 : } else
2606 0 : (void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2607 0 : return (error);
2608 0 : }
2609 :
2610 : /*
2611 : * Delete any multicast address we have asked to add from parent
2612 : * interface. Called when the carp is being unconfigured.
2613 : */
2614 : void
2615 0 : carp_ether_purgemulti(struct carp_softc *sc)
2616 : {
2617 0 : struct ifnet *ifp0 = sc->sc_carpdev; /* Parent. */
2618 : struct carp_mc_entry *mc;
2619 0 : union {
2620 : struct ifreq ifreq;
2621 : struct {
2622 : char ifr_name[IFNAMSIZ];
2623 : struct sockaddr_storage ifr_ss;
2624 : } ifreq_storage;
2625 : } u;
2626 0 : struct ifreq *ifr = &u.ifreq;
2627 :
2628 0 : if (ifp0 == NULL)
2629 0 : return;
2630 :
2631 0 : memcpy(ifr->ifr_name, ifp0->if_xname, IFNAMSIZ);
2632 0 : while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
2633 0 : memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len);
2634 0 : (void)(*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr);
2635 0 : LIST_REMOVE(mc, mc_entries);
2636 0 : free(mc, M_DEVBUF, sizeof(*mc));
2637 : }
2638 0 : }
2639 :
2640 : void
2641 0 : carp_vh_ref(void *null, void *v)
2642 : {
2643 0 : struct carp_vhost_entry *vhe = v;
2644 :
2645 0 : refcnt_take(&vhe->vhost_refcnt);
2646 0 : }
2647 :
2648 : void
2649 0 : carp_vh_unref(void *null, void *v)
2650 : {
2651 0 : struct carp_vhost_entry *vhe = v;
2652 :
2653 0 : if (refcnt_rele(&vhe->vhost_refcnt)) {
2654 0 : carp_sc_unref(NULL, vhe->parent_sc);
2655 0 : free(vhe, M_DEVBUF, sizeof(*vhe));
2656 0 : }
2657 0 : }
2658 :
2659 : void
2660 0 : carp_sc_ref(void *null, void *s)
2661 : {
2662 0 : struct carp_softc *sc = s;
2663 :
2664 0 : refcnt_take(&sc->sc_refcnt);
2665 0 : }
2666 :
2667 : void
2668 0 : carp_sc_unref(void *null, void *s)
2669 : {
2670 0 : struct carp_softc *sc = s;
2671 :
2672 0 : refcnt_rele_wake(&sc->sc_refcnt);
2673 0 : }
|