Line data Source code
1 : /* $OpenBSD: bpf.c,v 1.170 2018/07/13 08:51:15 bluhm Exp $ */
2 : /* $NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $ */
3 :
4 : /*
5 : * Copyright (c) 1990, 1991, 1993
6 : * The Regents of the University of California. All rights reserved.
7 : * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org>
8 : *
9 : * This code is derived from the Stanford/CMU enet packet filter,
10 : * (net/enet.c) distributed as part of 4.3BSD, and code contributed
11 : * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
12 : * Berkeley Laboratory.
13 : *
14 : * Redistribution and use in source and binary forms, with or without
15 : * modification, are permitted provided that the following conditions
16 : * are met:
17 : * 1. Redistributions of source code must retain the above copyright
18 : * notice, this list of conditions and the following disclaimer.
19 : * 2. Redistributions in binary form must reproduce the above copyright
20 : * notice, this list of conditions and the following disclaimer in the
21 : * documentation and/or other materials provided with the distribution.
22 : * 3. Neither the name of the University nor the names of its contributors
23 : * may be used to endorse or promote products derived from this software
24 : * without specific prior written permission.
25 : *
26 : * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 : * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 : * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 : * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 : * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 : * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 : * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 : * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 : * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 : * SUCH DAMAGE.
37 : *
38 : * @(#)bpf.c 8.2 (Berkeley) 3/28/94
39 : */
40 :
41 : #include "bpfilter.h"
42 :
43 : #include <sys/param.h>
44 : #include <sys/systm.h>
45 : #include <sys/mbuf.h>
46 : #include <sys/proc.h>
47 : #include <sys/signalvar.h>
48 : #include <sys/ioctl.h>
49 : #include <sys/conf.h>
50 : #include <sys/vnode.h>
51 : #include <sys/fcntl.h>
52 : #include <sys/socket.h>
53 : #include <sys/poll.h>
54 : #include <sys/kernel.h>
55 : #include <sys/sysctl.h>
56 : #include <sys/rwlock.h>
57 : #include <sys/atomic.h>
58 : #include <sys/srp.h>
59 : #include <sys/specdev.h>
60 : #include <sys/selinfo.h>
61 : #include <sys/task.h>
62 :
63 : #include <net/if.h>
64 : #include <net/bpf.h>
65 : #include <net/bpfdesc.h>
66 :
67 : #include <netinet/in.h>
68 : #include <netinet/if_ether.h>
69 :
70 : #include "vlan.h"
71 : #if NVLAN > 0
72 : #include <net/if_vlan_var.h>
73 : #endif
74 :
75 : #define BPF_BUFSIZE 32768
76 :
77 : #define PRINET 26 /* interruptible */
78 :
79 : /* from kern/kern_clock.c; incremented each clock tick. */
80 : extern int ticks;
81 :
82 : /*
83 : * The default read buffer size is patchable.
84 : */
85 : int bpf_bufsize = BPF_BUFSIZE;
86 : int bpf_maxbufsize = BPF_MAXBUFSIZE;
87 :
88 : /*
89 : * bpf_iflist is the list of interfaces; each corresponds to an ifnet
90 : * bpf_d_list is the list of descriptors
91 : */
92 : struct bpf_if *bpf_iflist;
93 : LIST_HEAD(, bpf_d) bpf_d_list;
94 :
95 : int bpf_allocbufs(struct bpf_d *);
96 : void bpf_ifname(struct bpf_if*, struct ifreq *);
97 : int _bpf_mtap(caddr_t, const struct mbuf *, u_int,
98 : void (*)(const void *, void *, size_t));
99 : void bpf_mcopy(const void *, void *, size_t);
100 : int bpf_movein(struct uio *, u_int, struct mbuf **,
101 : struct sockaddr *, struct bpf_insn *);
102 : int bpf_setif(struct bpf_d *, struct ifreq *);
103 : int bpfpoll(dev_t, int, struct proc *);
104 : int bpfkqfilter(dev_t, struct knote *);
105 : void bpf_wakeup(struct bpf_d *);
106 : void bpf_wakeup_cb(void *);
107 : void bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t,
108 : void (*)(const void *, void *, size_t), struct timeval *);
109 : int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
110 : int bpf_setdlt(struct bpf_d *, u_int);
111 :
112 : void filt_bpfrdetach(struct knote *);
113 : int filt_bpfread(struct knote *, long);
114 :
115 : int bpf_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t);
116 :
117 : struct bpf_d *bpfilter_lookup(int);
118 :
119 : /*
120 : * Called holding ``bd_mtx''.
121 : */
122 : void bpf_attachd(struct bpf_d *, struct bpf_if *);
123 : void bpf_detachd(struct bpf_d *);
124 : void bpf_resetd(struct bpf_d *);
125 :
126 : /*
127 : * Reference count access to descriptor buffers
128 : */
129 : void bpf_get(struct bpf_d *);
130 : void bpf_put(struct bpf_d *);
131 :
132 : /*
133 : * garbage collector srps
134 : */
135 :
136 : void bpf_d_ref(void *, void *);
137 : void bpf_d_unref(void *, void *);
138 : struct srpl_rc bpf_d_rc = SRPL_RC_INITIALIZER(bpf_d_ref, bpf_d_unref, NULL);
139 :
140 : void bpf_insn_dtor(void *, void *);
141 : struct srp_gc bpf_insn_gc = SRP_GC_INITIALIZER(bpf_insn_dtor, NULL);
142 :
143 : struct rwlock bpf_sysctl_lk = RWLOCK_INITIALIZER("bpfsz");
144 :
145 : int
146 0 : bpf_movein(struct uio *uio, u_int linktype, struct mbuf **mp,
147 : struct sockaddr *sockp, struct bpf_insn *filter)
148 : {
149 : struct mbuf *m;
150 : struct m_tag *mtag;
151 : int error;
152 : u_int hlen;
153 : u_int len;
154 : u_int slen;
155 :
156 : /*
157 : * Build a sockaddr based on the data link layer type.
158 : * We do this at this level because the ethernet header
159 : * is copied directly into the data field of the sockaddr.
160 : * In the case of SLIP, there is no header and the packet
161 : * is forwarded as is.
162 : * Also, we are careful to leave room at the front of the mbuf
163 : * for the link level header.
164 : */
165 0 : switch (linktype) {
166 :
167 : case DLT_SLIP:
168 0 : sockp->sa_family = AF_INET;
169 : hlen = 0;
170 0 : break;
171 :
172 : case DLT_PPP:
173 0 : sockp->sa_family = AF_UNSPEC;
174 : hlen = 0;
175 0 : break;
176 :
177 : case DLT_EN10MB:
178 0 : sockp->sa_family = AF_UNSPEC;
179 : /* XXX Would MAXLINKHDR be better? */
180 : hlen = ETHER_HDR_LEN;
181 0 : break;
182 :
183 : case DLT_IEEE802_11:
184 : case DLT_IEEE802_11_RADIO:
185 0 : sockp->sa_family = AF_UNSPEC;
186 : hlen = 0;
187 0 : break;
188 :
189 : case DLT_RAW:
190 : case DLT_NULL:
191 0 : sockp->sa_family = AF_UNSPEC;
192 : hlen = 0;
193 0 : break;
194 :
195 : case DLT_LOOP:
196 0 : sockp->sa_family = AF_UNSPEC;
197 : hlen = sizeof(u_int32_t);
198 0 : break;
199 :
200 : default:
201 0 : return (EIO);
202 : }
203 :
204 0 : if (uio->uio_resid > MAXMCLBYTES)
205 0 : return (EIO);
206 0 : len = uio->uio_resid;
207 :
208 0 : MGETHDR(m, M_WAIT, MT_DATA);
209 0 : m->m_pkthdr.ph_ifidx = 0;
210 0 : m->m_pkthdr.len = len - hlen;
211 :
212 0 : if (len > MHLEN) {
213 0 : MCLGETI(m, M_WAIT, NULL, len);
214 0 : if ((m->m_flags & M_EXT) == 0) {
215 : error = ENOBUFS;
216 0 : goto bad;
217 : }
218 : }
219 0 : m->m_len = len;
220 0 : *mp = m;
221 :
222 0 : error = uiomove(mtod(m, caddr_t), len, uio);
223 0 : if (error)
224 : goto bad;
225 :
226 0 : slen = bpf_filter(filter, mtod(m, u_char *), len, len);
227 0 : if (slen < len) {
228 : error = EPERM;
229 0 : goto bad;
230 : }
231 :
232 0 : if (m->m_len < hlen) {
233 : error = EPERM;
234 0 : goto bad;
235 : }
236 : /*
237 : * Make room for link header, and copy it to sockaddr
238 : */
239 0 : if (hlen != 0) {
240 0 : if (linktype == DLT_LOOP) {
241 0 : u_int32_t af;
242 :
243 : /* the link header indicates the address family */
244 0 : KASSERT(hlen == sizeof(u_int32_t));
245 0 : memcpy(&af, m->m_data, hlen);
246 0 : sockp->sa_family = ntohl(af);
247 0 : } else
248 0 : memcpy(sockp->sa_data, m->m_data, hlen);
249 0 : m->m_len -= hlen;
250 0 : m->m_data += hlen; /* XXX */
251 0 : }
252 :
253 : /*
254 : * Prepend the data link type as a mbuf tag
255 : */
256 0 : mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT);
257 0 : *(u_int *)(mtag + 1) = linktype;
258 0 : m_tag_prepend(m, mtag);
259 :
260 0 : return (0);
261 : bad:
262 0 : m_freem(m);
263 0 : return (error);
264 0 : }
265 :
266 : /*
267 : * Attach file to the bpf interface, i.e. make d listen on bp.
268 : */
269 : void
270 0 : bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
271 : {
272 0 : MUTEX_ASSERT_LOCKED(&d->bd_mtx);
273 :
274 : /*
275 : * Point d at bp, and add d to the interface's list of listeners.
276 : * Finally, point the driver's bpf cookie at the interface so
277 : * it will divert packets to bpf.
278 : */
279 :
280 0 : d->bd_bif = bp;
281 :
282 0 : KERNEL_ASSERT_LOCKED();
283 0 : SRPL_INSERT_HEAD_LOCKED(&bpf_d_rc, &bp->bif_dlist, d, bd_next);
284 :
285 0 : *bp->bif_driverp = bp;
286 0 : }
287 :
288 : /*
289 : * Detach a file from its interface.
290 : */
291 : void
292 0 : bpf_detachd(struct bpf_d *d)
293 : {
294 : struct bpf_if *bp;
295 :
296 0 : MUTEX_ASSERT_LOCKED(&d->bd_mtx);
297 :
298 0 : bp = d->bd_bif;
299 : /* Not attached. */
300 0 : if (bp == NULL)
301 0 : return;
302 :
303 : /* Remove ``d'' from the interface's descriptor list. */
304 0 : KERNEL_ASSERT_LOCKED();
305 0 : SRPL_REMOVE_LOCKED(&bpf_d_rc, &bp->bif_dlist, d, bpf_d, bd_next);
306 :
307 0 : if (SRPL_EMPTY_LOCKED(&bp->bif_dlist)) {
308 : /*
309 : * Let the driver know that there are no more listeners.
310 : */
311 0 : *bp->bif_driverp = NULL;
312 0 : }
313 :
314 0 : d->bd_bif = NULL;
315 :
316 : /*
317 : * Check if this descriptor had requested promiscuous mode.
318 : * If so, turn it off.
319 : */
320 0 : if (d->bd_promisc) {
321 : int error;
322 :
323 0 : KASSERT(bp->bif_ifp != NULL);
324 :
325 0 : d->bd_promisc = 0;
326 :
327 0 : bpf_get(d);
328 0 : mtx_leave(&d->bd_mtx);
329 0 : NET_LOCK();
330 0 : error = ifpromisc(bp->bif_ifp, 0);
331 0 : NET_UNLOCK();
332 0 : mtx_enter(&d->bd_mtx);
333 0 : bpf_put(d);
334 :
335 0 : if (error && !(error == EINVAL || error == ENODEV ||
336 0 : error == ENXIO))
337 : /*
338 : * Something is really wrong if we were able to put
339 : * the driver into promiscuous mode, but can't
340 : * take it out.
341 : */
342 0 : panic("bpf: ifpromisc failed");
343 0 : }
344 0 : }
345 :
346 : void
347 0 : bpfilterattach(int n)
348 : {
349 0 : LIST_INIT(&bpf_d_list);
350 0 : }
351 :
352 : /*
353 : * Open ethernet device. Returns ENXIO for illegal minor device number,
354 : * EBUSY if file is open by another process.
355 : */
356 : int
357 0 : bpfopen(dev_t dev, int flag, int mode, struct proc *p)
358 : {
359 : struct bpf_d *bd;
360 0 : int unit = minor(dev);
361 :
362 0 : if (unit & ((1 << CLONE_SHIFT) - 1))
363 0 : return (ENXIO);
364 :
365 0 : KASSERT(bpfilter_lookup(unit) == NULL);
366 :
367 : /* create on demand */
368 0 : if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
369 0 : return (EBUSY);
370 :
371 : /* Mark "free" and do most initialization. */
372 0 : bd->bd_unit = unit;
373 0 : bd->bd_bufsize = bpf_bufsize;
374 0 : bd->bd_sig = SIGIO;
375 0 : mtx_init(&bd->bd_mtx, IPL_NET);
376 0 : task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd);
377 :
378 0 : if (flag & FNONBLOCK)
379 0 : bd->bd_rtout = -1;
380 :
381 0 : bpf_get(bd);
382 0 : LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list);
383 :
384 0 : return (0);
385 0 : }
386 :
387 : /*
388 : * Close the descriptor by detaching it from its interface,
389 : * deallocating its buffers, and marking it free.
390 : */
391 : int
392 0 : bpfclose(dev_t dev, int flag, int mode, struct proc *p)
393 : {
394 : struct bpf_d *d;
395 :
396 0 : d = bpfilter_lookup(minor(dev));
397 0 : mtx_enter(&d->bd_mtx);
398 0 : bpf_detachd(d);
399 0 : bpf_wakeup(d);
400 0 : LIST_REMOVE(d, bd_list);
401 0 : mtx_leave(&d->bd_mtx);
402 0 : bpf_put(d);
403 :
404 0 : return (0);
405 : }
406 :
407 : /*
408 : * Rotate the packet buffers in descriptor d. Move the store buffer
409 : * into the hold slot, and the free buffer into the store slot.
410 : * Zero the length of the new store buffer.
411 : */
412 : #define ROTATE_BUFFERS(d) \
413 : KASSERT(d->bd_in_uiomove == 0); \
414 : MUTEX_ASSERT_LOCKED(&d->bd_mtx); \
415 : (d)->bd_hbuf = (d)->bd_sbuf; \
416 : (d)->bd_hlen = (d)->bd_slen; \
417 : (d)->bd_sbuf = (d)->bd_fbuf; \
418 : (d)->bd_slen = 0; \
419 : (d)->bd_fbuf = NULL;
420 : /*
421 : * bpfread - read next chunk of packets from buffers
422 : */
423 : int
424 0 : bpfread(dev_t dev, struct uio *uio, int ioflag)
425 : {
426 : struct bpf_d *d;
427 : caddr_t hbuf;
428 : int hlen, error;
429 :
430 0 : KERNEL_ASSERT_LOCKED();
431 :
432 0 : d = bpfilter_lookup(minor(dev));
433 0 : if (d->bd_bif == NULL)
434 0 : return (ENXIO);
435 :
436 0 : bpf_get(d);
437 0 : mtx_enter(&d->bd_mtx);
438 :
439 : /*
440 : * Restrict application to use a buffer the same size as
441 : * as kernel buffers.
442 : */
443 0 : if (uio->uio_resid != d->bd_bufsize) {
444 : error = EINVAL;
445 0 : goto out;
446 : }
447 :
448 : /*
449 : * If there's a timeout, bd_rdStart is tagged when we start the read.
450 : * we can then figure out when we're done reading.
451 : */
452 0 : if (d->bd_rtout != -1 && d->bd_rdStart == 0)
453 0 : d->bd_rdStart = ticks;
454 : else
455 0 : d->bd_rdStart = 0;
456 :
457 : /*
458 : * If the hold buffer is empty, then do a timed sleep, which
459 : * ends when the timeout expires or when enough packets
460 : * have arrived to fill the store buffer.
461 : */
462 0 : while (d->bd_hbuf == NULL) {
463 0 : if (d->bd_bif == NULL) {
464 : /* interface is gone */
465 0 : if (d->bd_slen == 0) {
466 : error = EIO;
467 0 : goto out;
468 : }
469 0 : ROTATE_BUFFERS(d);
470 0 : break;
471 : }
472 0 : if (d->bd_immediate && d->bd_slen != 0) {
473 : /*
474 : * A packet(s) either arrived since the previous
475 : * read or arrived while we were asleep.
476 : * Rotate the buffers and return what's here.
477 : */
478 0 : ROTATE_BUFFERS(d);
479 0 : break;
480 : }
481 0 : if (d->bd_rtout == -1) {
482 : /* User requested non-blocking I/O */
483 : error = EWOULDBLOCK;
484 0 : } else {
485 0 : if ((d->bd_rdStart + d->bd_rtout) < ticks) {
486 0 : error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
487 0 : "bpf", d->bd_rtout);
488 0 : } else
489 : error = EWOULDBLOCK;
490 : }
491 0 : if (error == EINTR || error == ERESTART)
492 : goto out;
493 0 : if (error == EWOULDBLOCK) {
494 : /*
495 : * On a timeout, return what's in the buffer,
496 : * which may be nothing. If there is something
497 : * in the store buffer, we can rotate the buffers.
498 : */
499 0 : if (d->bd_hbuf != NULL)
500 : /*
501 : * We filled up the buffer in between
502 : * getting the timeout and arriving
503 : * here, so we don't need to rotate.
504 : */
505 : break;
506 :
507 0 : if (d->bd_slen == 0) {
508 : error = 0;
509 0 : goto out;
510 : }
511 0 : ROTATE_BUFFERS(d);
512 0 : break;
513 : }
514 : }
515 : /*
516 : * At this point, we know we have something in the hold slot.
517 : */
518 0 : hbuf = d->bd_hbuf;
519 0 : hlen = d->bd_hlen;
520 0 : d->bd_hbuf = NULL;
521 0 : d->bd_hlen = 0;
522 0 : d->bd_fbuf = NULL;
523 0 : d->bd_in_uiomove = 1;
524 :
525 : /*
526 : * Move data from hold buffer into user space.
527 : * We know the entire buffer is transferred since
528 : * we checked above that the read buffer is bpf_bufsize bytes.
529 : */
530 0 : mtx_leave(&d->bd_mtx);
531 0 : error = uiomove(hbuf, hlen, uio);
532 0 : mtx_enter(&d->bd_mtx);
533 :
534 : /* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */
535 0 : KASSERT(d->bd_fbuf == NULL);
536 0 : KASSERT(d->bd_hbuf == NULL);
537 0 : d->bd_fbuf = hbuf;
538 0 : d->bd_in_uiomove = 0;
539 : out:
540 0 : mtx_leave(&d->bd_mtx);
541 0 : bpf_put(d);
542 :
543 0 : return (error);
544 0 : }
545 :
546 :
547 : /*
548 : * If there are processes sleeping on this descriptor, wake them up.
549 : */
550 : void
551 0 : bpf_wakeup(struct bpf_d *d)
552 : {
553 0 : MUTEX_ASSERT_LOCKED(&d->bd_mtx);
554 :
555 : /*
556 : * As long as csignal() and selwakeup() need to be protected
557 : * by the KERNEL_LOCK() we have to delay the wakeup to
558 : * another context to keep the hot path KERNEL_LOCK()-free.
559 : */
560 0 : bpf_get(d);
561 0 : if (!task_add(systq, &d->bd_wake_task))
562 0 : bpf_put(d);
563 0 : }
564 :
565 : void
566 0 : bpf_wakeup_cb(void *xd)
567 : {
568 0 : struct bpf_d *d = xd;
569 :
570 0 : KERNEL_ASSERT_LOCKED();
571 :
572 0 : wakeup(d);
573 0 : if (d->bd_async && d->bd_sig)
574 0 : csignal(d->bd_pgid, d->bd_sig, d->bd_siguid, d->bd_sigeuid);
575 :
576 0 : selwakeup(&d->bd_sel);
577 0 : bpf_put(d);
578 0 : }
579 :
580 : int
581 0 : bpfwrite(dev_t dev, struct uio *uio, int ioflag)
582 : {
583 : struct bpf_d *d;
584 : struct ifnet *ifp;
585 0 : struct mbuf *m;
586 : struct bpf_program *bf;
587 : struct bpf_insn *fcode = NULL;
588 : int error;
589 0 : struct sockaddr_storage dst;
590 : u_int dlt;
591 :
592 0 : KERNEL_ASSERT_LOCKED();
593 :
594 0 : d = bpfilter_lookup(minor(dev));
595 0 : if (d->bd_bif == NULL)
596 0 : return (ENXIO);
597 :
598 0 : bpf_get(d);
599 0 : ifp = d->bd_bif->bif_ifp;
600 :
601 0 : if (ifp == NULL || (ifp->if_flags & IFF_UP) == 0) {
602 : error = ENETDOWN;
603 0 : goto out;
604 : }
605 :
606 0 : if (uio->uio_resid == 0) {
607 : error = 0;
608 0 : goto out;
609 : }
610 :
611 0 : KERNEL_ASSERT_LOCKED(); /* for accessing bd_wfilter */
612 0 : bf = srp_get_locked(&d->bd_wfilter);
613 0 : if (bf != NULL)
614 0 : fcode = bf->bf_insns;
615 :
616 0 : dlt = d->bd_bif->bif_dlt;
617 :
618 0 : error = bpf_movein(uio, dlt, &m, sstosa(&dst), fcode);
619 0 : if (error)
620 : goto out;
621 :
622 0 : if (m->m_pkthdr.len > ifp->if_mtu) {
623 0 : m_freem(m);
624 : error = EMSGSIZE;
625 0 : goto out;
626 : }
627 :
628 0 : m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
629 0 : m->m_pkthdr.pf.prio = ifp->if_llprio;
630 :
631 0 : if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC)
632 0 : dst.ss_family = pseudo_AF_HDRCMPLT;
633 :
634 0 : NET_LOCK();
635 0 : error = ifp->if_output(ifp, m, sstosa(&dst), NULL);
636 0 : NET_UNLOCK();
637 :
638 : out:
639 0 : bpf_put(d);
640 0 : return (error);
641 0 : }
642 :
643 : /*
644 : * Reset a descriptor by flushing its packet buffer and clearing the
645 : * receive and drop counts.
646 : */
647 : void
648 0 : bpf_resetd(struct bpf_d *d)
649 : {
650 0 : MUTEX_ASSERT_LOCKED(&d->bd_mtx);
651 0 : KASSERT(d->bd_in_uiomove == 0);
652 :
653 0 : if (d->bd_hbuf != NULL) {
654 : /* Free the hold buffer. */
655 0 : d->bd_fbuf = d->bd_hbuf;
656 0 : d->bd_hbuf = NULL;
657 0 : }
658 0 : d->bd_slen = 0;
659 0 : d->bd_hlen = 0;
660 0 : d->bd_rcount = 0;
661 0 : d->bd_dcount = 0;
662 0 : }
663 :
664 : /*
665 : * FIONREAD Check for read packet available.
666 : * BIOCGBLEN Get buffer len [for read()].
667 : * BIOCSETF Set ethernet read filter.
668 : * BIOCFLUSH Flush read packet buffer.
669 : * BIOCPROMISC Put interface into promiscuous mode.
670 : * BIOCGDLTLIST Get supported link layer types.
671 : * BIOCGDLT Get link layer type.
672 : * BIOCSDLT Set link layer type.
673 : * BIOCGETIF Get interface name.
674 : * BIOCSETIF Set interface.
675 : * BIOCSRTIMEOUT Set read timeout.
676 : * BIOCGRTIMEOUT Get read timeout.
677 : * BIOCGSTATS Get packet stats.
678 : * BIOCIMMEDIATE Set immediate mode.
679 : * BIOCVERSION Get filter language version.
680 : * BIOCGHDRCMPLT Get "header already complete" flag
681 : * BIOCSHDRCMPLT Set "header already complete" flag
682 : */
683 : int
684 0 : bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
685 : {
686 : struct bpf_d *d;
687 : int error = 0;
688 :
689 0 : d = bpfilter_lookup(minor(dev));
690 0 : if (d->bd_locked && suser(p) != 0) {
691 : /* list of allowed ioctls when locked and not root */
692 0 : switch (cmd) {
693 : case BIOCGBLEN:
694 : case BIOCFLUSH:
695 : case BIOCGDLT:
696 : case BIOCGDLTLIST:
697 : case BIOCGETIF:
698 : case BIOCGRTIMEOUT:
699 : case BIOCGSTATS:
700 : case BIOCVERSION:
701 : case BIOCGRSIG:
702 : case BIOCGHDRCMPLT:
703 : case FIONREAD:
704 : case BIOCLOCK:
705 : case BIOCSRTIMEOUT:
706 : case BIOCIMMEDIATE:
707 : case TIOCGPGRP:
708 : case BIOCGDIRFILT:
709 : break;
710 : default:
711 0 : return (EPERM);
712 : }
713 : }
714 :
715 0 : bpf_get(d);
716 :
717 0 : switch (cmd) {
718 : default:
719 : error = EINVAL;
720 0 : break;
721 :
722 : /*
723 : * Check for read packet available.
724 : */
725 : case FIONREAD:
726 : {
727 : int n;
728 :
729 0 : mtx_enter(&d->bd_mtx);
730 0 : n = d->bd_slen;
731 0 : if (d->bd_hbuf != NULL)
732 0 : n += d->bd_hlen;
733 0 : mtx_leave(&d->bd_mtx);
734 :
735 0 : *(int *)addr = n;
736 : break;
737 : }
738 :
739 : /*
740 : * Get buffer len [for read()].
741 : */
742 : case BIOCGBLEN:
743 0 : *(u_int *)addr = d->bd_bufsize;
744 0 : break;
745 :
746 : /*
747 : * Set buffer length.
748 : */
749 : case BIOCSBLEN:
750 0 : if (d->bd_bif != NULL)
751 0 : error = EINVAL;
752 : else {
753 0 : u_int size = *(u_int *)addr;
754 :
755 0 : if (size > bpf_maxbufsize)
756 0 : *(u_int *)addr = size = bpf_maxbufsize;
757 0 : else if (size < BPF_MINBUFSIZE)
758 0 : *(u_int *)addr = size = BPF_MINBUFSIZE;
759 0 : mtx_enter(&d->bd_mtx);
760 0 : d->bd_bufsize = size;
761 0 : mtx_leave(&d->bd_mtx);
762 : }
763 : break;
764 :
765 : /*
766 : * Set link layer read filter.
767 : */
768 : case BIOCSETF:
769 0 : error = bpf_setf(d, (struct bpf_program *)addr, 0);
770 0 : break;
771 :
772 : /*
773 : * Set link layer write filter.
774 : */
775 : case BIOCSETWF:
776 0 : error = bpf_setf(d, (struct bpf_program *)addr, 1);
777 0 : break;
778 :
779 : /*
780 : * Flush read packet buffer.
781 : */
782 : case BIOCFLUSH:
783 0 : mtx_enter(&d->bd_mtx);
784 0 : bpf_resetd(d);
785 0 : mtx_leave(&d->bd_mtx);
786 0 : break;
787 :
788 : /*
789 : * Put interface into promiscuous mode.
790 : */
791 : case BIOCPROMISC:
792 0 : if (d->bd_bif == NULL) {
793 : /*
794 : * No interface attached yet.
795 : */
796 : error = EINVAL;
797 0 : } else if (d->bd_bif->bif_ifp != NULL) {
798 0 : if (d->bd_promisc == 0) {
799 0 : MUTEX_ASSERT_UNLOCKED(&d->bd_mtx);
800 0 : NET_LOCK();
801 0 : error = ifpromisc(d->bd_bif->bif_ifp, 1);
802 0 : NET_UNLOCK();
803 0 : if (error == 0)
804 0 : d->bd_promisc = 1;
805 : }
806 : }
807 : break;
808 :
809 : /*
810 : * Get a list of supported device parameters.
811 : */
812 : case BIOCGDLTLIST:
813 0 : if (d->bd_bif == NULL)
814 0 : error = EINVAL;
815 : else
816 0 : error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
817 : break;
818 :
819 : /*
820 : * Get device parameters.
821 : */
822 : case BIOCGDLT:
823 0 : if (d->bd_bif == NULL)
824 0 : error = EINVAL;
825 : else
826 0 : *(u_int *)addr = d->bd_bif->bif_dlt;
827 : break;
828 :
829 : /*
830 : * Set device parameters.
831 : */
832 : case BIOCSDLT:
833 0 : if (d->bd_bif == NULL)
834 0 : error = EINVAL;
835 : else {
836 0 : mtx_enter(&d->bd_mtx);
837 0 : error = bpf_setdlt(d, *(u_int *)addr);
838 0 : mtx_leave(&d->bd_mtx);
839 : }
840 : break;
841 :
842 : /*
843 : * Set interface name.
844 : */
845 : case BIOCGETIF:
846 0 : if (d->bd_bif == NULL)
847 0 : error = EINVAL;
848 : else
849 0 : bpf_ifname(d->bd_bif, (struct ifreq *)addr);
850 : break;
851 :
852 : /*
853 : * Set interface.
854 : */
855 : case BIOCSETIF:
856 0 : error = bpf_setif(d, (struct ifreq *)addr);
857 0 : break;
858 :
859 : /*
860 : * Set read timeout.
861 : */
862 : case BIOCSRTIMEOUT:
863 : {
864 0 : struct timeval *tv = (struct timeval *)addr;
865 :
866 : /* Compute number of ticks. */
867 0 : d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick;
868 0 : if (d->bd_rtout == 0 && tv->tv_usec != 0)
869 0 : d->bd_rtout = 1;
870 : break;
871 : }
872 :
873 : /*
874 : * Get read timeout.
875 : */
876 : case BIOCGRTIMEOUT:
877 : {
878 0 : struct timeval *tv = (struct timeval *)addr;
879 :
880 0 : tv->tv_sec = d->bd_rtout / hz;
881 0 : tv->tv_usec = (d->bd_rtout % hz) * tick;
882 : break;
883 : }
884 :
885 : /*
886 : * Get packet stats.
887 : */
888 : case BIOCGSTATS:
889 : {
890 0 : struct bpf_stat *bs = (struct bpf_stat *)addr;
891 :
892 0 : bs->bs_recv = d->bd_rcount;
893 0 : bs->bs_drop = d->bd_dcount;
894 : break;
895 : }
896 :
897 : /*
898 : * Set immediate mode.
899 : */
900 : case BIOCIMMEDIATE:
901 0 : d->bd_immediate = *(u_int *)addr;
902 0 : break;
903 :
904 : case BIOCVERSION:
905 : {
906 0 : struct bpf_version *bv = (struct bpf_version *)addr;
907 :
908 0 : bv->bv_major = BPF_MAJOR_VERSION;
909 0 : bv->bv_minor = BPF_MINOR_VERSION;
910 : break;
911 : }
912 :
913 : case BIOCGHDRCMPLT: /* get "header already complete" flag */
914 0 : *(u_int *)addr = d->bd_hdrcmplt;
915 0 : break;
916 :
917 : case BIOCSHDRCMPLT: /* set "header already complete" flag */
918 0 : d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
919 0 : break;
920 :
921 : case BIOCLOCK: /* set "locked" flag (no reset) */
922 0 : d->bd_locked = 1;
923 0 : break;
924 :
925 : case BIOCGFILDROP: /* get "filter-drop" flag */
926 0 : *(u_int *)addr = d->bd_fildrop;
927 0 : break;
928 :
929 : case BIOCSFILDROP: /* set "filter-drop" flag */
930 0 : d->bd_fildrop = *(u_int *)addr ? 1 : 0;
931 0 : break;
932 :
933 : case BIOCGDIRFILT: /* get direction filter */
934 0 : *(u_int *)addr = d->bd_dirfilt;
935 0 : break;
936 :
937 : case BIOCSDIRFILT: /* set direction filter */
938 0 : d->bd_dirfilt = (*(u_int *)addr) &
939 : (BPF_DIRECTION_IN|BPF_DIRECTION_OUT);
940 0 : break;
941 :
942 : case FIONBIO: /* Non-blocking I/O */
943 0 : if (*(int *)addr)
944 0 : d->bd_rtout = -1;
945 : else
946 0 : d->bd_rtout = 0;
947 : break;
948 :
949 : case FIOASYNC: /* Send signal on receive packets */
950 0 : d->bd_async = *(int *)addr;
951 0 : break;
952 :
953 : /*
954 : * N.B. ioctl (FIOSETOWN) and fcntl (F_SETOWN) both end up doing
955 : * the equivalent of a TIOCSPGRP and hence end up here. *However*
956 : * TIOCSPGRP's arg is a process group if it's positive and a process
957 : * id if it's negative. This is exactly the opposite of what the
958 : * other two functions want! Therefore there is code in ioctl and
959 : * fcntl to negate the arg before calling here.
960 : */
961 : case TIOCSPGRP: /* Process or group to send signals to */
962 0 : d->bd_pgid = *(int *)addr;
963 0 : d->bd_siguid = p->p_ucred->cr_ruid;
964 0 : d->bd_sigeuid = p->p_ucred->cr_uid;
965 0 : break;
966 :
967 : case TIOCGPGRP:
968 0 : *(int *)addr = d->bd_pgid;
969 0 : break;
970 :
971 : case BIOCSRSIG: /* Set receive signal */
972 : {
973 : u_int sig;
974 :
975 0 : sig = *(u_int *)addr;
976 :
977 0 : if (sig >= NSIG)
978 0 : error = EINVAL;
979 : else
980 0 : d->bd_sig = sig;
981 : break;
982 : }
983 : case BIOCGRSIG:
984 0 : *(u_int *)addr = d->bd_sig;
985 0 : break;
986 : }
987 :
988 0 : bpf_put(d);
989 0 : return (error);
990 0 : }
991 :
992 : /*
993 : * Set d's packet filter program to fp. If this file already has a filter,
994 : * free it and replace it. Returns EINVAL for bogus requests.
995 : */
996 : int
997 0 : bpf_setf(struct bpf_d *d, struct bpf_program *fp, int wf)
998 : {
999 : struct bpf_program *bf;
1000 : struct srp *filter;
1001 : struct bpf_insn *fcode;
1002 : u_int flen, size;
1003 :
1004 0 : KERNEL_ASSERT_LOCKED();
1005 0 : filter = wf ? &d->bd_wfilter : &d->bd_rfilter;
1006 :
1007 0 : if (fp->bf_insns == 0) {
1008 0 : if (fp->bf_len != 0)
1009 0 : return (EINVAL);
1010 0 : srp_update_locked(&bpf_insn_gc, filter, NULL);
1011 0 : mtx_enter(&d->bd_mtx);
1012 0 : bpf_resetd(d);
1013 0 : mtx_leave(&d->bd_mtx);
1014 0 : return (0);
1015 : }
1016 : flen = fp->bf_len;
1017 0 : if (flen > BPF_MAXINSNS)
1018 0 : return (EINVAL);
1019 :
1020 0 : fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF,
1021 : M_WAITOK | M_CANFAIL);
1022 0 : if (fcode == NULL)
1023 0 : return (ENOMEM);
1024 :
1025 0 : size = flen * sizeof(*fp->bf_insns);
1026 0 : if (copyin(fp->bf_insns, fcode, size) != 0 ||
1027 0 : bpf_validate(fcode, (int)flen) == 0) {
1028 0 : free(fcode, M_DEVBUF, size);
1029 0 : return (EINVAL);
1030 : }
1031 :
1032 0 : bf = malloc(sizeof(*bf), M_DEVBUF, M_WAITOK);
1033 0 : bf->bf_len = flen;
1034 0 : bf->bf_insns = fcode;
1035 :
1036 0 : srp_update_locked(&bpf_insn_gc, filter, bf);
1037 :
1038 0 : mtx_enter(&d->bd_mtx);
1039 0 : bpf_resetd(d);
1040 0 : mtx_leave(&d->bd_mtx);
1041 0 : return (0);
1042 0 : }
1043 :
1044 : /*
1045 : * Detach a file from its current interface (if attached at all) and attach
1046 : * to the interface indicated by the name stored in ifr.
1047 : * Return an errno or 0.
1048 : */
1049 : int
1050 0 : bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1051 : {
1052 : struct bpf_if *bp, *candidate = NULL;
1053 : int error = 0;
1054 :
1055 : /*
1056 : * Look through attached interfaces for the named one.
1057 : */
1058 0 : for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1059 0 : if (strcmp(bp->bif_name, ifr->ifr_name) != 0)
1060 : continue;
1061 :
1062 0 : if (candidate == NULL || candidate->bif_dlt > bp->bif_dlt)
1063 0 : candidate = bp;
1064 : }
1065 :
1066 : /* Not found. */
1067 0 : if (candidate == NULL)
1068 0 : return (ENXIO);
1069 :
1070 : /*
1071 : * Allocate the packet buffers if we need to.
1072 : * If we're already attached to requested interface,
1073 : * just flush the buffer.
1074 : */
1075 0 : mtx_enter(&d->bd_mtx);
1076 0 : if (d->bd_sbuf == NULL) {
1077 0 : if ((error = bpf_allocbufs(d)))
1078 : goto out;
1079 : }
1080 0 : if (candidate != d->bd_bif) {
1081 : /*
1082 : * Detach if attached to something else.
1083 : */
1084 0 : bpf_detachd(d);
1085 0 : bpf_attachd(d, candidate);
1086 0 : }
1087 0 : bpf_resetd(d);
1088 : out:
1089 0 : mtx_leave(&d->bd_mtx);
1090 0 : return (error);
1091 0 : }
1092 :
1093 : /*
1094 : * Copy the interface name to the ifreq.
1095 : */
1096 : void
1097 0 : bpf_ifname(struct bpf_if *bif, struct ifreq *ifr)
1098 : {
1099 0 : bcopy(bif->bif_name, ifr->ifr_name, sizeof(ifr->ifr_name));
1100 0 : }
1101 :
1102 : /*
1103 : * Support for poll() system call
1104 : */
1105 : int
1106 0 : bpfpoll(dev_t dev, int events, struct proc *p)
1107 : {
1108 : struct bpf_d *d;
1109 : int revents;
1110 :
1111 0 : KERNEL_ASSERT_LOCKED();
1112 :
1113 : /*
1114 : * An imitation of the FIONREAD ioctl code.
1115 : */
1116 0 : d = bpfilter_lookup(minor(dev));
1117 :
1118 : /*
1119 : * XXX The USB stack manages it to trigger some race condition
1120 : * which causes bpfilter_lookup to return NULL when a USB device
1121 : * gets detached while it is up and has an open bpf handler (e.g.
1122 : * dhclient). We still should recheck if we can fix the root
1123 : * cause of this issue.
1124 : */
1125 0 : if (d == NULL)
1126 0 : return (POLLERR);
1127 :
1128 : /* Always ready to write data */
1129 0 : revents = events & (POLLOUT | POLLWRNORM);
1130 :
1131 0 : if (events & (POLLIN | POLLRDNORM)) {
1132 0 : mtx_enter(&d->bd_mtx);
1133 0 : if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0))
1134 0 : revents |= events & (POLLIN | POLLRDNORM);
1135 : else {
1136 : /*
1137 : * if there's a timeout, mark the time we
1138 : * started waiting.
1139 : */
1140 0 : if (d->bd_rtout != -1 && d->bd_rdStart == 0)
1141 0 : d->bd_rdStart = ticks;
1142 0 : selrecord(p, &d->bd_sel);
1143 : }
1144 0 : mtx_leave(&d->bd_mtx);
1145 0 : }
1146 0 : return (revents);
1147 0 : }
1148 :
1149 : struct filterops bpfread_filtops =
1150 : { 1, NULL, filt_bpfrdetach, filt_bpfread };
1151 :
1152 : int
1153 0 : bpfkqfilter(dev_t dev, struct knote *kn)
1154 : {
1155 : struct bpf_d *d;
1156 : struct klist *klist;
1157 :
1158 0 : KERNEL_ASSERT_LOCKED();
1159 :
1160 0 : d = bpfilter_lookup(minor(dev));
1161 :
1162 0 : switch (kn->kn_filter) {
1163 : case EVFILT_READ:
1164 0 : klist = &d->bd_sel.si_note;
1165 0 : kn->kn_fop = &bpfread_filtops;
1166 : break;
1167 : default:
1168 0 : return (EINVAL);
1169 : }
1170 :
1171 0 : bpf_get(d);
1172 0 : kn->kn_hook = d;
1173 0 : SLIST_INSERT_HEAD(klist, kn, kn_selnext);
1174 :
1175 0 : mtx_enter(&d->bd_mtx);
1176 0 : if (d->bd_rtout != -1 && d->bd_rdStart == 0)
1177 0 : d->bd_rdStart = ticks;
1178 0 : mtx_leave(&d->bd_mtx);
1179 :
1180 0 : return (0);
1181 0 : }
1182 :
1183 : void
1184 0 : filt_bpfrdetach(struct knote *kn)
1185 : {
1186 0 : struct bpf_d *d = kn->kn_hook;
1187 :
1188 0 : KERNEL_ASSERT_LOCKED();
1189 :
1190 0 : SLIST_REMOVE(&d->bd_sel.si_note, kn, knote, kn_selnext);
1191 0 : bpf_put(d);
1192 0 : }
1193 :
1194 : int
1195 0 : filt_bpfread(struct knote *kn, long hint)
1196 : {
1197 0 : struct bpf_d *d = kn->kn_hook;
1198 :
1199 0 : KERNEL_ASSERT_LOCKED();
1200 :
1201 0 : mtx_enter(&d->bd_mtx);
1202 0 : kn->kn_data = d->bd_hlen;
1203 0 : if (d->bd_immediate)
1204 0 : kn->kn_data += d->bd_slen;
1205 0 : mtx_leave(&d->bd_mtx);
1206 :
1207 0 : return (kn->kn_data > 0);
1208 : }
1209 :
1210 : /*
1211 : * Copy data from an mbuf chain into a buffer. This code is derived
1212 : * from m_copydata in sys/uipc_mbuf.c.
1213 : */
1214 : void
1215 0 : bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1216 : {
1217 : const struct mbuf *m;
1218 : u_int count;
1219 : u_char *dst;
1220 :
1221 0 : m = src_arg;
1222 : dst = dst_arg;
1223 0 : while (len > 0) {
1224 0 : if (m == NULL)
1225 0 : panic("bpf_mcopy");
1226 0 : count = min(m->m_len, len);
1227 0 : bcopy(mtod(m, caddr_t), (caddr_t)dst, count);
1228 0 : m = m->m_next;
1229 0 : dst += count;
1230 0 : len -= count;
1231 : }
1232 0 : }
1233 :
1234 : /*
1235 : * like bpf_mtap, but copy fn can be given. used by various bpf_mtap*
1236 : */
1237 : int
1238 0 : _bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction,
1239 : void (*cpfn)(const void *, void *, size_t))
1240 : {
1241 0 : struct bpf_if *bp = (struct bpf_if *)arg;
1242 0 : struct srp_ref sr;
1243 : struct bpf_d *d;
1244 : size_t pktlen, slen;
1245 : const struct mbuf *m0;
1246 0 : struct timeval tv;
1247 : int gottime = 0;
1248 : int drop = 0;
1249 :
1250 0 : if (m == NULL)
1251 0 : return (0);
1252 :
1253 0 : if (cpfn == NULL)
1254 0 : cpfn = bpf_mcopy;
1255 :
1256 0 : if (bp == NULL)
1257 0 : return (0);
1258 :
1259 : pktlen = 0;
1260 0 : for (m0 = m; m0 != NULL; m0 = m0->m_next)
1261 0 : pktlen += m0->m_len;
1262 :
1263 0 : SRPL_FOREACH(d, &sr, &bp->bif_dlist, bd_next) {
1264 0 : atomic_inc_long(&d->bd_rcount);
1265 :
1266 0 : if ((direction & d->bd_dirfilt) != 0)
1267 0 : slen = 0;
1268 : else {
1269 0 : struct srp_ref bsr;
1270 : struct bpf_program *bf;
1271 : struct bpf_insn *fcode = NULL;
1272 :
1273 0 : bf = srp_enter(&bsr, &d->bd_rfilter);
1274 0 : if (bf != NULL)
1275 0 : fcode = bf->bf_insns;
1276 0 : slen = bpf_mfilter(fcode, m, pktlen);
1277 0 : srp_leave(&bsr);
1278 0 : }
1279 :
1280 0 : if (slen > 0) {
1281 0 : if (!gottime++)
1282 0 : microtime(&tv);
1283 :
1284 0 : mtx_enter(&d->bd_mtx);
1285 0 : bpf_catchpacket(d, (u_char *)m, pktlen, slen, cpfn,
1286 : &tv);
1287 0 : mtx_leave(&d->bd_mtx);
1288 :
1289 0 : if (d->bd_fildrop)
1290 0 : drop = 1;
1291 : }
1292 : }
1293 0 : SRPL_LEAVE(&sr);
1294 :
1295 0 : return (drop);
1296 0 : }
1297 :
1298 : /*
1299 : * Incoming linkage from device drivers, where a data buffer should be
1300 : * prepended by an arbitrary header. In this situation we already have a
1301 : * way of representing a chain of memory buffers, ie, mbufs, so reuse
1302 : * the existing functionality by attaching the buffers to mbufs.
1303 : *
1304 : * Con up a minimal mbuf chain to pacify bpf by allocating (only) a
1305 : * struct m_hdr each for the header and data on the stack.
1306 : */
1307 : int
1308 0 : bpf_tap_hdr(caddr_t arg, const void *hdr, unsigned int hdrlen,
1309 : const void *buf, unsigned int buflen, u_int direction)
1310 : {
1311 0 : struct m_hdr mh, md;
1312 0 : struct mbuf *m0 = NULL;
1313 : struct mbuf **mp = &m0;
1314 :
1315 0 : if (hdr != NULL) {
1316 0 : mh.mh_flags = 0;
1317 0 : mh.mh_next = NULL;
1318 0 : mh.mh_len = hdrlen;
1319 0 : mh.mh_data = (void *)hdr;
1320 :
1321 0 : *mp = (struct mbuf *)&mh;
1322 : mp = &mh.mh_next;
1323 0 : }
1324 :
1325 0 : if (buf != NULL) {
1326 0 : md.mh_flags = 0;
1327 0 : md.mh_next = NULL;
1328 0 : md.mh_len = buflen;
1329 0 : md.mh_data = (void *)buf;
1330 :
1331 0 : *mp = (struct mbuf *)&md;
1332 0 : }
1333 :
1334 0 : return _bpf_mtap(arg, m0, direction, bpf_mcopy);
1335 0 : }
1336 :
1337 : /*
1338 : * Incoming linkage from device drivers, when packet is in an mbuf chain.
1339 : */
1340 : int
1341 0 : bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction)
1342 : {
1343 0 : return _bpf_mtap(arg, m, direction, NULL);
1344 : }
1345 :
1346 : /*
1347 : * Incoming linkage from device drivers, where we have a mbuf chain
1348 : * but need to prepend some arbitrary header from a linear buffer.
1349 : *
1350 : * Con up a minimal dummy header to pacify bpf. Allocate (only) a
1351 : * struct m_hdr on the stack. This is safe as bpf only reads from the
1352 : * fields in this header that we initialize, and will not try to free
1353 : * it or keep a pointer to it.
1354 : */
1355 : int
1356 0 : bpf_mtap_hdr(caddr_t arg, caddr_t data, u_int dlen, const struct mbuf *m,
1357 : u_int direction, void (*cpfn)(const void *, void *, size_t))
1358 : {
1359 0 : struct m_hdr mh;
1360 : const struct mbuf *m0;
1361 :
1362 0 : if (dlen > 0) {
1363 0 : mh.mh_flags = 0;
1364 0 : mh.mh_next = (struct mbuf *)m;
1365 0 : mh.mh_len = dlen;
1366 0 : mh.mh_data = data;
1367 0 : m0 = (struct mbuf *)&mh;
1368 0 : } else
1369 : m0 = m;
1370 :
1371 0 : return _bpf_mtap(arg, m0, direction, cpfn);
1372 0 : }
1373 :
1374 : /*
1375 : * Incoming linkage from device drivers, where we have a mbuf chain
1376 : * but need to prepend the address family.
1377 : *
1378 : * Con up a minimal dummy header to pacify bpf. We allocate (only) a
1379 : * struct m_hdr on the stack. This is safe as bpf only reads from the
1380 : * fields in this header that we initialize, and will not try to free
1381 : * it or keep a pointer to it.
1382 : */
1383 : int
1384 0 : bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction)
1385 : {
1386 0 : u_int32_t afh;
1387 :
1388 0 : afh = htonl(af);
1389 :
1390 0 : return bpf_mtap_hdr(arg, (caddr_t)&afh, sizeof(afh),
1391 : m, direction, NULL);
1392 0 : }
1393 :
1394 : /*
1395 : * Incoming linkage from device drivers, where we have a mbuf chain
1396 : * but need to prepend a VLAN encapsulation header.
1397 : *
1398 : * Con up a minimal dummy header to pacify bpf. Allocate (only) a
1399 : * struct m_hdr on the stack. This is safe as bpf only reads from the
1400 : * fields in this header that we initialize, and will not try to free
1401 : * it or keep a pointer to it.
1402 : */
1403 : int
1404 0 : bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction)
1405 : {
1406 : #if NVLAN > 0
1407 0 : struct ether_vlan_header evh;
1408 0 : struct m_hdr mh;
1409 : uint8_t prio;
1410 :
1411 0 : if ((m->m_flags & M_VLANTAG) == 0)
1412 : #endif
1413 : {
1414 0 : return bpf_mtap(arg, m, direction);
1415 : }
1416 :
1417 : #if NVLAN > 0
1418 0 : KASSERT(m->m_len >= ETHER_HDR_LEN);
1419 :
1420 0 : prio = m->m_pkthdr.pf.prio;
1421 0 : if (prio <= 1)
1422 0 : prio = !prio;
1423 :
1424 0 : memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN);
1425 0 : evh.evl_proto = evh.evl_encap_proto;
1426 0 : evh.evl_encap_proto = htons(ETHERTYPE_VLAN);
1427 0 : evh.evl_tag = htons(m->m_pkthdr.ether_vtag |
1428 : (prio << EVL_PRIO_BITS));
1429 :
1430 0 : mh.mh_flags = 0;
1431 0 : mh.mh_data = m->m_data + ETHER_HDR_LEN;
1432 0 : mh.mh_len = m->m_len - ETHER_HDR_LEN;
1433 0 : mh.mh_next = m->m_next;
1434 :
1435 0 : return bpf_mtap_hdr(arg, (caddr_t)&evh, sizeof(evh),
1436 0 : (struct mbuf *)&mh, direction, NULL);
1437 : #endif
1438 0 : }
1439 :
1440 : /*
1441 : * Move the packet data from interface memory (pkt) into the
1442 : * store buffer. Wake up listeners if needed.
1443 : * "copy" is the routine called to do the actual data
1444 : * transfer. bcopy is passed in to copy contiguous chunks, while
1445 : * bpf_mcopy is passed in to copy mbuf chains. In the latter case,
1446 : * pkt is really an mbuf.
1447 : */
1448 : void
1449 0 : bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen,
1450 : void (*cpfn)(const void *, void *, size_t), struct timeval *tv)
1451 : {
1452 : struct bpf_hdr *hp;
1453 : int totlen, curlen;
1454 : int hdrlen, do_wakeup = 0;
1455 :
1456 0 : MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1457 0 : if (d->bd_bif == NULL)
1458 0 : return;
1459 :
1460 0 : hdrlen = d->bd_bif->bif_hdrlen;
1461 :
1462 : /*
1463 : * Figure out how many bytes to move. If the packet is
1464 : * greater or equal to the snapshot length, transfer that
1465 : * much. Otherwise, transfer the whole packet (unless
1466 : * we hit the buffer size limit).
1467 : */
1468 0 : totlen = hdrlen + min(snaplen, pktlen);
1469 0 : if (totlen > d->bd_bufsize)
1470 0 : totlen = d->bd_bufsize;
1471 :
1472 : /*
1473 : * Round up the end of the previous packet to the next longword.
1474 : */
1475 0 : curlen = BPF_WORDALIGN(d->bd_slen);
1476 0 : if (curlen + totlen > d->bd_bufsize) {
1477 : /*
1478 : * This packet will overflow the storage buffer.
1479 : * Rotate the buffers if we can, then wakeup any
1480 : * pending reads.
1481 : */
1482 0 : if (d->bd_fbuf == NULL) {
1483 : /*
1484 : * We haven't completed the previous read yet,
1485 : * so drop the packet.
1486 : */
1487 0 : ++d->bd_dcount;
1488 0 : return;
1489 : }
1490 0 : ROTATE_BUFFERS(d);
1491 : do_wakeup = 1;
1492 : curlen = 0;
1493 0 : }
1494 :
1495 : /*
1496 : * Append the bpf header.
1497 : */
1498 0 : hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1499 0 : hp->bh_tstamp.tv_sec = tv->tv_sec;
1500 0 : hp->bh_tstamp.tv_usec = tv->tv_usec;
1501 0 : hp->bh_datalen = pktlen;
1502 0 : hp->bh_hdrlen = hdrlen;
1503 : /*
1504 : * Copy the packet data into the store buffer and update its length.
1505 : */
1506 0 : (*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1507 0 : d->bd_slen = curlen + totlen;
1508 :
1509 0 : if (d->bd_immediate) {
1510 : /*
1511 : * Immediate mode is set. A packet arrived so any
1512 : * reads should be woken up.
1513 : */
1514 : do_wakeup = 1;
1515 0 : }
1516 :
1517 0 : if (d->bd_rdStart && (d->bd_rtout + d->bd_rdStart < ticks)) {
1518 : /*
1519 : * we could be selecting on the bpf, and we
1520 : * may have timeouts set. We got here by getting
1521 : * a packet, so wake up the reader.
1522 : */
1523 0 : if (d->bd_fbuf != NULL) {
1524 0 : d->bd_rdStart = 0;
1525 0 : ROTATE_BUFFERS(d);
1526 : do_wakeup = 1;
1527 0 : }
1528 : }
1529 :
1530 0 : if (do_wakeup)
1531 0 : bpf_wakeup(d);
1532 0 : }
1533 :
1534 : /*
1535 : * Initialize all nonzero fields of a descriptor.
1536 : */
1537 : int
1538 0 : bpf_allocbufs(struct bpf_d *d)
1539 : {
1540 0 : MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1541 :
1542 0 : d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1543 0 : if (d->bd_fbuf == NULL)
1544 0 : return (ENOMEM);
1545 :
1546 0 : d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1547 0 : if (d->bd_sbuf == NULL) {
1548 0 : free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize);
1549 0 : return (ENOMEM);
1550 : }
1551 :
1552 0 : d->bd_slen = 0;
1553 0 : d->bd_hlen = 0;
1554 :
1555 0 : return (0);
1556 0 : }
1557 :
1558 : void
1559 0 : bpf_get(struct bpf_d *bd)
1560 : {
1561 0 : atomic_inc_int(&bd->bd_ref);
1562 0 : }
1563 :
1564 : /*
1565 : * Free buffers currently in use by a descriptor
1566 : * when the reference count drops to zero.
1567 : */
1568 : void
1569 0 : bpf_put(struct bpf_d *bd)
1570 : {
1571 0 : if (atomic_dec_int_nv(&bd->bd_ref) > 0)
1572 : return;
1573 :
1574 0 : free(bd->bd_sbuf, M_DEVBUF, 0);
1575 0 : free(bd->bd_hbuf, M_DEVBUF, 0);
1576 0 : free(bd->bd_fbuf, M_DEVBUF, 0);
1577 0 : KERNEL_ASSERT_LOCKED();
1578 0 : srp_update_locked(&bpf_insn_gc, &bd->bd_rfilter, NULL);
1579 0 : srp_update_locked(&bpf_insn_gc, &bd->bd_wfilter, NULL);
1580 :
1581 0 : free(bd, M_DEVBUF, sizeof(*bd));
1582 0 : }
1583 :
1584 : void *
1585 0 : bpfsattach(caddr_t *bpfp, const char *name, u_int dlt, u_int hdrlen)
1586 : {
1587 : struct bpf_if *bp;
1588 :
1589 0 : if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL)
1590 0 : panic("bpfattach");
1591 0 : SRPL_INIT(&bp->bif_dlist);
1592 0 : bp->bif_driverp = (struct bpf_if **)bpfp;
1593 0 : bp->bif_name = name;
1594 0 : bp->bif_ifp = NULL;
1595 0 : bp->bif_dlt = dlt;
1596 :
1597 0 : bp->bif_next = bpf_iflist;
1598 0 : bpf_iflist = bp;
1599 :
1600 0 : *bp->bif_driverp = NULL;
1601 :
1602 : /*
1603 : * Compute the length of the bpf header. This is not necessarily
1604 : * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1605 : * that the network layer header begins on a longword boundary (for
1606 : * performance reasons and to alleviate alignment restrictions).
1607 : */
1608 0 : bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1609 :
1610 0 : return (bp);
1611 : }
1612 :
1613 : void
1614 0 : bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen)
1615 : {
1616 : struct bpf_if *bp;
1617 :
1618 0 : bp = bpfsattach(driverp, ifp->if_xname, dlt, hdrlen);
1619 0 : bp->bif_ifp = ifp;
1620 0 : }
1621 :
1622 : /* Detach an interface from its attached bpf device. */
1623 : void
1624 0 : bpfdetach(struct ifnet *ifp)
1625 : {
1626 : struct bpf_if *bp, *nbp, **pbp = &bpf_iflist;
1627 :
1628 0 : KERNEL_ASSERT_LOCKED();
1629 :
1630 0 : for (bp = bpf_iflist; bp; bp = nbp) {
1631 0 : nbp = bp->bif_next;
1632 0 : if (bp->bif_ifp == ifp) {
1633 0 : *pbp = nbp;
1634 :
1635 0 : bpfsdetach(bp);
1636 0 : } else
1637 : pbp = &bp->bif_next;
1638 : }
1639 0 : ifp->if_bpf = NULL;
1640 0 : }
1641 :
1642 : void
1643 0 : bpfsdetach(void *p)
1644 : {
1645 0 : struct bpf_if *bp = p;
1646 : struct bpf_d *bd;
1647 : int maj;
1648 :
1649 : /* Locate the major number. */
1650 0 : for (maj = 0; maj < nchrdev; maj++)
1651 0 : if (cdevsw[maj].d_open == bpfopen)
1652 : break;
1653 :
1654 0 : while ((bd = SRPL_FIRST_LOCKED(&bp->bif_dlist)))
1655 0 : vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR);
1656 :
1657 0 : free(bp, M_DEVBUF, sizeof *bp);
1658 0 : }
1659 :
1660 : int
1661 0 : bpf_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1662 : void *newp, size_t newlen)
1663 : {
1664 0 : int newval;
1665 : int error;
1666 :
1667 0 : switch (name[0]) {
1668 : case NET_BPF_BUFSIZE:
1669 0 : newval = bpf_bufsize;
1670 0 : error = sysctl_int(oldp, oldlenp, newp, newlen, &newval);
1671 0 : if (error)
1672 0 : return (error);
1673 0 : if (newval < BPF_MINBUFSIZE || newval > bpf_maxbufsize)
1674 0 : return (EINVAL);
1675 0 : bpf_bufsize = newval;
1676 0 : break;
1677 : case NET_BPF_MAXBUFSIZE:
1678 0 : newval = bpf_maxbufsize;
1679 0 : error = sysctl_int(oldp, oldlenp, newp, newlen, &newval);
1680 0 : if (error)
1681 0 : return (error);
1682 0 : if (newval < BPF_MINBUFSIZE)
1683 0 : return (EINVAL);
1684 0 : bpf_maxbufsize = newval;
1685 0 : break;
1686 : default:
1687 0 : return (EOPNOTSUPP);
1688 : }
1689 0 : return (0);
1690 0 : }
1691 :
1692 : int
1693 0 : bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1694 : size_t newlen)
1695 : {
1696 : int flags = RW_INTR;
1697 : int error;
1698 :
1699 0 : if (namelen != 1)
1700 0 : return (ENOTDIR);
1701 :
1702 0 : flags |= (newp == NULL) ? RW_READ : RW_WRITE;
1703 :
1704 0 : error = rw_enter(&bpf_sysctl_lk, flags);
1705 0 : if (error != 0)
1706 0 : return (error);
1707 :
1708 0 : error = bpf_sysctl_locked(name, namelen, oldp, oldlenp, newp, newlen);
1709 :
1710 0 : rw_exit(&bpf_sysctl_lk);
1711 :
1712 0 : return (error);
1713 0 : }
1714 :
1715 : struct bpf_d *
1716 0 : bpfilter_lookup(int unit)
1717 : {
1718 : struct bpf_d *bd;
1719 :
1720 0 : KERNEL_ASSERT_LOCKED();
1721 :
1722 0 : LIST_FOREACH(bd, &bpf_d_list, bd_list)
1723 0 : if (bd->bd_unit == unit)
1724 0 : return (bd);
1725 0 : return (NULL);
1726 0 : }
1727 :
1728 : /*
1729 : * Get a list of available data link type of the interface.
1730 : */
1731 : int
1732 0 : bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1733 : {
1734 : int n, error;
1735 : struct bpf_if *bp;
1736 : const char *name;
1737 :
1738 0 : name = d->bd_bif->bif_name;
1739 : n = 0;
1740 : error = 0;
1741 0 : for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1742 0 : if (strcmp(name, bp->bif_name) != 0)
1743 : continue;
1744 0 : if (bfl->bfl_list != NULL) {
1745 0 : if (n >= bfl->bfl_len)
1746 0 : return (ENOMEM);
1747 0 : error = copyout(&bp->bif_dlt,
1748 0 : bfl->bfl_list + n, sizeof(u_int));
1749 0 : if (error)
1750 : break;
1751 : }
1752 0 : n++;
1753 0 : }
1754 :
1755 0 : bfl->bfl_len = n;
1756 0 : return (error);
1757 0 : }
1758 :
1759 : /*
1760 : * Set the data link type of a BPF instance.
1761 : */
1762 : int
1763 0 : bpf_setdlt(struct bpf_d *d, u_int dlt)
1764 : {
1765 : const char *name;
1766 : struct bpf_if *bp;
1767 :
1768 0 : MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1769 0 : if (d->bd_bif->bif_dlt == dlt)
1770 0 : return (0);
1771 0 : name = d->bd_bif->bif_name;
1772 0 : for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1773 0 : if (strcmp(name, bp->bif_name) != 0)
1774 : continue;
1775 0 : if (bp->bif_dlt == dlt)
1776 : break;
1777 : }
1778 0 : if (bp == NULL)
1779 0 : return (EINVAL);
1780 0 : bpf_detachd(d);
1781 0 : bpf_attachd(d, bp);
1782 0 : bpf_resetd(d);
1783 0 : return (0);
1784 0 : }
1785 :
1786 : void
1787 0 : bpf_d_ref(void *null, void *d)
1788 : {
1789 0 : bpf_get(d);
1790 0 : }
1791 :
1792 : void
1793 0 : bpf_d_unref(void *null, void *d)
1794 : {
1795 0 : bpf_put(d);
1796 0 : }
1797 :
1798 : void
1799 0 : bpf_insn_dtor(void *null, void *f)
1800 : {
1801 0 : struct bpf_program *bf = f;
1802 0 : struct bpf_insn *insns = bf->bf_insns;
1803 :
1804 0 : free(insns, M_DEVBUF, bf->bf_len * sizeof(*insns));
1805 0 : free(bf, M_DEVBUF, sizeof(*bf));
1806 0 : }
1807 :
1808 : u_int32_t bpf_mbuf_ldw(const void *, u_int32_t, int *);
1809 : u_int32_t bpf_mbuf_ldh(const void *, u_int32_t, int *);
1810 : u_int32_t bpf_mbuf_ldb(const void *, u_int32_t, int *);
1811 :
1812 : int bpf_mbuf_copy(const struct mbuf *, u_int32_t,
1813 : void *, u_int32_t);
1814 :
1815 : const struct bpf_ops bpf_mbuf_ops = {
1816 : bpf_mbuf_ldw,
1817 : bpf_mbuf_ldh,
1818 : bpf_mbuf_ldb,
1819 : };
1820 :
1821 : int
1822 0 : bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len)
1823 : {
1824 : u_int8_t *cp = buf;
1825 : u_int32_t count;
1826 :
1827 0 : while (off >= m->m_len) {
1828 0 : off -= m->m_len;
1829 :
1830 0 : m = m->m_next;
1831 0 : if (m == NULL)
1832 0 : return (-1);
1833 : }
1834 :
1835 0 : for (;;) {
1836 0 : count = min(m->m_len - off, len);
1837 :
1838 0 : memcpy(cp, m->m_data + off, count);
1839 0 : len -= count;
1840 :
1841 0 : if (len == 0)
1842 0 : return (0);
1843 :
1844 0 : m = m->m_next;
1845 0 : if (m == NULL)
1846 : break;
1847 :
1848 0 : cp += count;
1849 : off = 0;
1850 : }
1851 :
1852 0 : return (-1);
1853 0 : }
1854 :
1855 : u_int32_t
1856 0 : bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err)
1857 : {
1858 0 : u_int32_t v;
1859 :
1860 0 : if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1861 0 : *err = 1;
1862 0 : return (0);
1863 : }
1864 :
1865 0 : *err = 0;
1866 0 : return ntohl(v);
1867 0 : }
1868 :
1869 : u_int32_t
1870 0 : bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err)
1871 : {
1872 0 : u_int16_t v;
1873 :
1874 0 : if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1875 0 : *err = 1;
1876 0 : return (0);
1877 : }
1878 :
1879 0 : *err = 0;
1880 0 : return ntohs(v);
1881 0 : }
1882 :
1883 : u_int32_t
1884 0 : bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err)
1885 : {
1886 0 : const struct mbuf *m = m0;
1887 : u_int8_t v;
1888 :
1889 0 : while (k >= m->m_len) {
1890 0 : k -= m->m_len;
1891 :
1892 0 : m = m->m_next;
1893 0 : if (m == NULL) {
1894 0 : *err = 1;
1895 0 : return (0);
1896 : }
1897 : }
1898 0 : v = m->m_data[k];
1899 :
1900 0 : *err = 0;
1901 0 : return v;
1902 0 : }
1903 :
1904 : u_int
1905 0 : bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen)
1906 : {
1907 0 : return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen);
1908 : }
|