Line data Source code
1 : /* $OpenBSD: pf_norm.c,v 1.216 2018/09/10 16:14:07 bluhm Exp $ */
2 :
3 : /*
4 : * Copyright 2001 Niels Provos <provos@citi.umich.edu>
5 : * Copyright 2009 Henning Brauer <henning@openbsd.org>
6 : * Copyright 2011-2018 Alexander Bluhm <bluhm@openbsd.org>
7 : * All rights reserved.
8 : *
9 : * Redistribution and use in source and binary forms, with or without
10 : * modification, are permitted provided that the following conditions
11 : * are met:
12 : * 1. Redistributions of source code must retain the above copyright
13 : * notice, this list of conditions and the following disclaimer.
14 : * 2. Redistributions in binary form must reproduce the above copyright
15 : * notice, this list of conditions and the following disclaimer in the
16 : * documentation and/or other materials provided with the distribution.
17 : *
18 : * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 : * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 : * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 : * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 : * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 : * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 : * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 : * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 : * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 : * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 : */
29 :
30 : #include "pflog.h"
31 :
32 : #include <sys/param.h>
33 : #include <sys/systm.h>
34 : #include <sys/mbuf.h>
35 : #include <sys/filio.h>
36 : #include <sys/fcntl.h>
37 : #include <sys/socket.h>
38 : #include <sys/kernel.h>
39 : #include <sys/time.h>
40 : #include <sys/pool.h>
41 : #include <sys/syslog.h>
42 : #include <sys/mutex.h>
43 :
44 : #include <net/if.h>
45 : #include <net/if_var.h>
46 : #include <net/if_pflog.h>
47 :
48 : #include <netinet/in.h>
49 : #include <netinet/ip.h>
50 : #include <netinet/ip_var.h>
51 : #include <netinet/ip_icmp.h>
52 : #include <netinet/tcp.h>
53 : #include <netinet/tcp_seq.h>
54 : #include <netinet/tcp_fsm.h>
55 : #include <netinet/udp.h>
56 :
57 : #ifdef INET6
58 : #include <netinet6/in6_var.h>
59 : #include <netinet/ip6.h>
60 : #include <netinet6/ip6_var.h>
61 : #include <netinet/icmp6.h>
62 : #include <netinet6/nd6.h>
63 : #endif /* INET6 */
64 :
65 : #include <net/pfvar.h>
66 : #include <net/pfvar_priv.h>
67 :
68 : struct pf_frent {
69 : TAILQ_ENTRY(pf_frent) fr_next;
70 : struct mbuf *fe_m;
71 : u_int16_t fe_hdrlen; /* ipv4 header length with ip options
72 : ipv6, extension, fragment header */
73 : u_int16_t fe_extoff; /* last extension header offset or 0 */
74 : u_int16_t fe_len; /* fragment length */
75 : u_int16_t fe_off; /* fragment offset */
76 : u_int16_t fe_mff; /* more fragment flag */
77 : };
78 :
79 : RB_HEAD(pf_frag_tree, pf_fragment);
80 : struct pf_frnode {
81 : struct pf_addr fn_src; /* ip source address */
82 : struct pf_addr fn_dst; /* ip destination address */
83 : sa_family_t fn_af; /* address family */
84 : u_int8_t fn_proto; /* protocol for fragments in fn_tree */
85 : u_int8_t fn_direction; /* pf packet direction */
86 : u_int32_t fn_fragments; /* number of entries in fn_tree */
87 : u_int32_t fn_gen; /* fr_gen of newest entry in fn_tree */
88 :
89 : RB_ENTRY(pf_frnode) fn_entry;
90 : struct pf_frag_tree fn_tree; /* matching fragments, lookup by id */
91 : };
92 :
93 : struct pf_fragment {
94 : struct pf_frent *fr_firstoff[PF_FRAG_ENTRY_POINTS];
95 : /* pointers to queue element */
96 : u_int8_t fr_entries[PF_FRAG_ENTRY_POINTS];
97 : /* count entries between pointers */
98 : RB_ENTRY(pf_fragment) fr_entry;
99 : TAILQ_ENTRY(pf_fragment) frag_next;
100 : TAILQ_HEAD(pf_fragq, pf_frent) fr_queue;
101 : u_int32_t fr_id; /* fragment id for reassemble */
102 : int32_t fr_timeout;
103 : u_int32_t fr_gen; /* generation number (per pf_frnode) */
104 : u_int16_t fr_maxlen; /* maximum length of single fragment */
105 : u_int16_t fr_holes; /* number of holes in the queue */
106 : struct pf_frnode *fr_node; /* ip src/dst/proto/af for fragments */
107 : };
108 :
109 : struct pf_fragment_tag {
110 : u_int16_t ft_hdrlen; /* header length of reassembled pkt */
111 : u_int16_t ft_extoff; /* last extension header offset or 0 */
112 : u_int16_t ft_maxlen; /* maximum fragment payload length */
113 : };
114 :
115 : TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue;
116 :
117 : static __inline int pf_frnode_compare(struct pf_frnode *,
118 : struct pf_frnode *);
119 : RB_HEAD(pf_frnode_tree, pf_frnode) pf_frnode_tree;
120 : RB_PROTOTYPE(pf_frnode_tree, pf_frnode, fn_entry, pf_frnode_compare);
121 0 : RB_GENERATE(pf_frnode_tree, pf_frnode, fn_entry, pf_frnode_compare);
122 :
123 : static __inline int pf_frag_compare(struct pf_fragment *,
124 : struct pf_fragment *);
125 : RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
126 0 : RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
127 :
128 : /* Private prototypes */
129 : void pf_flush_fragments(void);
130 : void pf_free_fragment(struct pf_fragment *);
131 : struct pf_fragment *pf_find_fragment(struct pf_frnode *, u_int32_t);
132 : struct pf_frent *pf_create_fragment(u_short *);
133 : int pf_frent_holes(struct pf_frent *);
134 : static inline int pf_frent_index(struct pf_frent *);
135 : int pf_frent_insert(struct pf_fragment *,
136 : struct pf_frent *, struct pf_frent *);
137 : void pf_frent_remove(struct pf_fragment *,
138 : struct pf_frent *);
139 : struct pf_frent *pf_frent_previous(struct pf_fragment *,
140 : struct pf_frent *);
141 : struct pf_fragment *pf_fillup_fragment(struct pf_frnode *, u_int32_t,
142 : struct pf_frent *, u_short *);
143 : struct mbuf *pf_join_fragment(struct pf_fragment *);
144 : int pf_reassemble(struct mbuf **, int, u_short *);
145 : #ifdef INET6
146 : int pf_reassemble6(struct mbuf **, struct ip6_frag *,
147 : u_int16_t, u_int16_t, int, u_short *);
148 : #endif /* INET6 */
149 :
150 : /* Globals */
151 : struct pool pf_frent_pl, pf_frag_pl, pf_frnode_pl;
152 : struct pool pf_state_scrub_pl;
153 : int pf_nfrents;
154 :
155 : #ifdef WITH_PF_LOCK
156 : struct mutex pf_frag_mtx;
157 :
158 : #define PF_FRAG_LOCK_INIT() mtx_init(&pf_frag_mtx, IPL_SOFTNET)
159 : #define PF_FRAG_LOCK() mtx_enter(&pf_frag_mtx)
160 : #define PF_FRAG_UNLOCK() mtx_leave(&pf_frag_mtx)
161 : #else /* !WITH_PF_LOCK */
162 : #define PF_FRAG_LOCK_INIT() (void)(0)
163 : #define PF_FRAG_LOCK() (void)(0)
164 : #define PF_FRAG_UNLOCK() (void)(0)
165 : #endif /* WITH_PF_LOCK */
166 :
167 : void
168 0 : pf_normalize_init(void)
169 : {
170 0 : pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0,
171 : IPL_SOFTNET, 0, "pffrent", NULL);
172 0 : pool_init(&pf_frnode_pl, sizeof(struct pf_frnode), 0,
173 : IPL_SOFTNET, 0, "pffrnode", NULL);
174 0 : pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0,
175 : IPL_SOFTNET, 0, "pffrag", NULL);
176 0 : pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0,
177 : IPL_SOFTNET, 0, "pfstscr", NULL);
178 :
179 0 : pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
180 0 : pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
181 :
182 0 : TAILQ_INIT(&pf_fragqueue);
183 :
184 : PF_FRAG_LOCK_INIT();
185 0 : }
186 :
187 : static __inline int
188 0 : pf_frnode_compare(struct pf_frnode *a, struct pf_frnode *b)
189 : {
190 : int diff;
191 :
192 0 : if ((diff = a->fn_proto - b->fn_proto) != 0)
193 0 : return (diff);
194 0 : if ((diff = a->fn_af - b->fn_af) != 0)
195 0 : return (diff);
196 0 : if ((diff = pf_addr_compare(&a->fn_src, &b->fn_src, a->fn_af)) != 0)
197 0 : return (diff);
198 0 : if ((diff = pf_addr_compare(&a->fn_dst, &b->fn_dst, a->fn_af)) != 0)
199 0 : return (diff);
200 :
201 0 : return (0);
202 0 : }
203 :
204 : static __inline int
205 0 : pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
206 : {
207 : int diff;
208 :
209 0 : if ((diff = a->fr_id - b->fr_id) != 0)
210 0 : return (diff);
211 :
212 0 : return (0);
213 0 : }
214 :
215 : void
216 0 : pf_purge_expired_fragments(void)
217 : {
218 : struct pf_fragment *frag;
219 : int32_t expire;
220 :
221 : PF_ASSERT_UNLOCKED();
222 :
223 0 : expire = time_uptime - pf_default_rule.timeout[PFTM_FRAG];
224 :
225 : PF_FRAG_LOCK();
226 0 : while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
227 0 : if (frag->fr_timeout > expire)
228 : break;
229 0 : DPFPRINTF(LOG_NOTICE, "expiring %d(%p)", frag->fr_id, frag);
230 0 : pf_free_fragment(frag);
231 : }
232 : PF_FRAG_UNLOCK();
233 0 : }
234 :
235 : /*
236 : * Try to flush old fragments to make space for new ones
237 : */
238 : void
239 0 : pf_flush_fragments(void)
240 : {
241 : struct pf_fragment *frag;
242 : int goal;
243 :
244 0 : goal = pf_nfrents * 9 / 10;
245 0 : DPFPRINTF(LOG_NOTICE, "trying to free > %d frents", pf_nfrents - goal);
246 0 : while (goal < pf_nfrents) {
247 0 : if ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) == NULL)
248 : break;
249 0 : pf_free_fragment(frag);
250 : }
251 0 : }
252 :
253 : /*
254 : * Remove a fragment from the fragment queue, free its fragment entries,
255 : * and free the fragment itself.
256 : */
257 : void
258 0 : pf_free_fragment(struct pf_fragment *frag)
259 : {
260 : struct pf_frent *frent;
261 : struct pf_frnode *frnode;
262 :
263 0 : frnode = frag->fr_node;
264 0 : RB_REMOVE(pf_frag_tree, &frnode->fn_tree, frag);
265 0 : KASSERT(frnode->fn_fragments >= 1);
266 0 : frnode->fn_fragments--;
267 0 : if (frnode->fn_fragments == 0) {
268 0 : KASSERT(RB_EMPTY(&frnode->fn_tree));
269 0 : RB_REMOVE(pf_frnode_tree, &pf_frnode_tree, frnode);
270 0 : pool_put(&pf_frnode_pl, frnode);
271 0 : }
272 0 : TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
273 :
274 : /* Free all fragment entries */
275 0 : while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) {
276 0 : TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
277 0 : m_freem(frent->fe_m);
278 0 : pool_put(&pf_frent_pl, frent);
279 0 : pf_nfrents--;
280 : }
281 0 : pool_put(&pf_frag_pl, frag);
282 0 : }
283 :
284 : struct pf_fragment *
285 0 : pf_find_fragment(struct pf_frnode *key, u_int32_t id)
286 : {
287 0 : struct pf_fragment *frag, idkey;
288 : struct pf_frnode *frnode;
289 : u_int32_t stale;
290 :
291 0 : frnode = RB_FIND(pf_frnode_tree, &pf_frnode_tree, key);
292 0 : if (frnode == NULL)
293 0 : return (NULL);
294 0 : KASSERT(frnode->fn_fragments >= 1);
295 0 : idkey.fr_id = id;
296 0 : frag = RB_FIND(pf_frag_tree, &frnode->fn_tree, &idkey);
297 0 : if (frag == NULL)
298 0 : return (NULL);
299 : /*
300 : * Limit the number of fragments we accept for each (proto,src,dst,af)
301 : * combination (aka pf_frnode), so we can deal better with a high rate
302 : * of fragments. Problem analysis is in RFC 4963.
303 : * Store the current generation for each pf_frnode in fn_gen and on
304 : * lookup discard 'stale' fragments (pf_fragment, based on the fr_gen
305 : * member). Instead of adding another button interpret the pf fragment
306 : * timeout in multiples of 200 fragments. This way the default of 60s
307 : * means: pf_fragment objects older than 60*200 = 12,000 generations
308 : * are considered stale.
309 : */
310 0 : stale = pf_default_rule.timeout[PFTM_FRAG] * PF_FRAG_STALE;
311 0 : if ((frnode->fn_gen - frag->fr_gen) >= stale) {
312 0 : DPFPRINTF(LOG_NOTICE, "stale fragment %d(%p), gen %u, num %u",
313 : frag->fr_id, frag, frag->fr_gen, frnode->fn_fragments);
314 0 : pf_free_fragment(frag);
315 0 : return (NULL);
316 : }
317 0 : TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
318 0 : TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
319 :
320 0 : return (frag);
321 0 : }
322 :
323 : struct pf_frent *
324 0 : pf_create_fragment(u_short *reason)
325 : {
326 : struct pf_frent *frent;
327 :
328 0 : frent = pool_get(&pf_frent_pl, PR_NOWAIT);
329 0 : if (frent == NULL) {
330 0 : pf_flush_fragments();
331 0 : frent = pool_get(&pf_frent_pl, PR_NOWAIT);
332 0 : if (frent == NULL) {
333 0 : REASON_SET(reason, PFRES_MEMORY);
334 0 : return (NULL);
335 : }
336 : }
337 0 : pf_nfrents++;
338 :
339 0 : return (frent);
340 0 : }
341 :
342 : /*
343 : * Calculate the additional holes that were created in the fragment
344 : * queue by inserting this fragment. A fragment in the middle
345 : * creates one more hole by splitting. For each connected side,
346 : * it loses one hole.
347 : * Fragment entry must be in the queue when calling this function.
348 : */
349 : int
350 0 : pf_frent_holes(struct pf_frent *frent)
351 : {
352 0 : struct pf_frent *prev = TAILQ_PREV(frent, pf_fragq, fr_next);
353 0 : struct pf_frent *next = TAILQ_NEXT(frent, fr_next);
354 : int holes = 1;
355 :
356 0 : if (prev == NULL) {
357 0 : if (frent->fe_off == 0)
358 0 : holes--;
359 : } else {
360 0 : KASSERT(frent->fe_off != 0);
361 0 : if (frent->fe_off == prev->fe_off + prev->fe_len)
362 0 : holes--;
363 : }
364 0 : if (next == NULL) {
365 0 : if (!frent->fe_mff)
366 0 : holes--;
367 : } else {
368 0 : KASSERT(frent->fe_mff);
369 0 : if (next->fe_off == frent->fe_off + frent->fe_len)
370 0 : holes--;
371 : }
372 0 : return holes;
373 : }
374 :
375 : static inline int
376 0 : pf_frent_index(struct pf_frent *frent)
377 : {
378 : /*
379 : * We have an array of 16 entry points to the queue. A full size
380 : * 65535 octet IP packet can have 8192 fragments. So the queue
381 : * traversal length is at most 512 and at most 16 entry points are
382 : * checked. We need 128 additional bytes on a 64 bit architecture.
383 : */
384 : CTASSERT(((u_int16_t)0xffff &~ 7) / (0x10000 / PF_FRAG_ENTRY_POINTS) ==
385 : 16 - 1);
386 : CTASSERT(((u_int16_t)0xffff >> 3) / PF_FRAG_ENTRY_POINTS == 512 - 1);
387 :
388 0 : return frent->fe_off / (0x10000 / PF_FRAG_ENTRY_POINTS);
389 : }
390 :
391 : int
392 0 : pf_frent_insert(struct pf_fragment *frag, struct pf_frent *frent,
393 : struct pf_frent *prev)
394 : {
395 : CTASSERT(PF_FRAG_ENTRY_LIMIT <= 0xff);
396 : int index;
397 :
398 : /*
399 : * A packet has at most 65536 octets. With 16 entry points, each one
400 : * spawns 4096 octets. We limit these to 64 fragments each, which
401 : * means on average every fragment must have at least 64 octets.
402 : */
403 0 : index = pf_frent_index(frent);
404 0 : if (frag->fr_entries[index] >= PF_FRAG_ENTRY_LIMIT)
405 0 : return ENOBUFS;
406 0 : frag->fr_entries[index]++;
407 :
408 0 : if (prev == NULL) {
409 0 : TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
410 0 : } else {
411 0 : KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off);
412 0 : TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
413 : }
414 :
415 0 : if (frag->fr_firstoff[index] == NULL) {
416 0 : KASSERT(prev == NULL || pf_frent_index(prev) < index);
417 0 : frag->fr_firstoff[index] = frent;
418 0 : } else {
419 0 : if (frent->fe_off < frag->fr_firstoff[index]->fe_off) {
420 0 : KASSERT(prev == NULL || pf_frent_index(prev) < index);
421 0 : frag->fr_firstoff[index] = frent;
422 0 : } else {
423 0 : KASSERT(prev != NULL);
424 0 : KASSERT(pf_frent_index(prev) == index);
425 : }
426 : }
427 :
428 0 : frag->fr_holes += pf_frent_holes(frent);
429 :
430 0 : return 0;
431 0 : }
432 :
433 : void
434 0 : pf_frent_remove(struct pf_fragment *frag, struct pf_frent *frent)
435 : {
436 0 : struct pf_frent *prev = TAILQ_PREV(frent, pf_fragq, fr_next);
437 0 : struct pf_frent *next = TAILQ_NEXT(frent, fr_next);
438 : int index;
439 :
440 0 : frag->fr_holes -= pf_frent_holes(frent);
441 :
442 0 : index = pf_frent_index(frent);
443 0 : KASSERT(frag->fr_firstoff[index] != NULL);
444 0 : if (frag->fr_firstoff[index]->fe_off == frent->fe_off) {
445 0 : if (next == NULL) {
446 0 : frag->fr_firstoff[index] = NULL;
447 0 : } else {
448 0 : KASSERT(frent->fe_off + frent->fe_len <= next->fe_off);
449 0 : if (pf_frent_index(next) == index) {
450 0 : frag->fr_firstoff[index] = next;
451 0 : } else {
452 0 : frag->fr_firstoff[index] = NULL;
453 : }
454 : }
455 : } else {
456 0 : KASSERT(frag->fr_firstoff[index]->fe_off < frent->fe_off);
457 0 : KASSERT(prev != NULL);
458 0 : KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off);
459 0 : KASSERT(pf_frent_index(prev) == index);
460 : }
461 :
462 0 : TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
463 :
464 0 : KASSERT(frag->fr_entries[index] > 0);
465 0 : frag->fr_entries[index]--;
466 0 : }
467 :
468 : struct pf_frent *
469 0 : pf_frent_previous(struct pf_fragment *frag, struct pf_frent *frent)
470 : {
471 : struct pf_frent *prev, *next;
472 : int index;
473 :
474 : /*
475 : * If there are no fragments after frag, take the final one. Assume
476 : * that the global queue is not empty.
477 : */
478 0 : prev = TAILQ_LAST(&frag->fr_queue, pf_fragq);
479 0 : KASSERT(prev != NULL);
480 0 : if (prev->fe_off <= frent->fe_off)
481 0 : return prev;
482 : /*
483 : * We want to find a fragment entry that is before frag, but still
484 : * close to it. Find the first fragment entry that is in the same
485 : * entry point or in the first entry point after that. As we have
486 : * already checked that there are entries behind frag, this will
487 : * succeed.
488 : */
489 0 : for (index = pf_frent_index(frent); index < PF_FRAG_ENTRY_POINTS;
490 0 : index++) {
491 0 : prev = frag->fr_firstoff[index];
492 0 : if (prev != NULL)
493 : break;
494 : }
495 0 : KASSERT(prev != NULL);
496 : /*
497 : * In prev we may have a fragment from the same entry point that is
498 : * before frent, or one that is just one position behind frent.
499 : * In the latter case, we go back one step and have the predecessor.
500 : * There may be none if the new fragment will be the first one.
501 : */
502 0 : if (prev->fe_off > frent->fe_off) {
503 0 : prev = TAILQ_PREV(prev, pf_fragq, fr_next);
504 0 : if (prev == NULL)
505 0 : return NULL;
506 0 : KASSERT(prev->fe_off <= frent->fe_off);
507 0 : return prev;
508 : }
509 : /*
510 : * In prev is the first fragment of the entry point. The offset
511 : * of frag is behind it. Find the closest previous fragment.
512 : */
513 0 : for (next = TAILQ_NEXT(prev, fr_next); next != NULL;
514 0 : next = TAILQ_NEXT(next, fr_next)) {
515 0 : if (next->fe_off > frent->fe_off)
516 : break;
517 : prev = next;
518 : }
519 0 : return prev;
520 0 : }
521 :
522 : struct pf_fragment *
523 0 : pf_fillup_fragment(struct pf_frnode *key, u_int32_t id,
524 : struct pf_frent *frent, u_short *reason)
525 : {
526 : struct pf_frent *after, *next, *prev;
527 : struct pf_fragment *frag;
528 : struct pf_frnode *frnode;
529 : u_int16_t total;
530 :
531 : /* No empty fragments */
532 0 : if (frent->fe_len == 0) {
533 0 : DPFPRINTF(LOG_NOTICE, "bad fragment: len 0");
534 : goto bad_fragment;
535 : }
536 :
537 : /* All fragments are 8 byte aligned */
538 0 : if (frent->fe_mff && (frent->fe_len & 0x7)) {
539 0 : DPFPRINTF(LOG_NOTICE, "bad fragment: mff and len %d",
540 : frent->fe_len);
541 : goto bad_fragment;
542 : }
543 :
544 : /* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET */
545 0 : if (frent->fe_off + frent->fe_len > IP_MAXPACKET) {
546 0 : DPFPRINTF(LOG_NOTICE, "bad fragment: max packet %d",
547 : frent->fe_off + frent->fe_len);
548 : goto bad_fragment;
549 : }
550 :
551 0 : DPFPRINTF(LOG_INFO, key->fn_af == AF_INET ?
552 : "reass frag %d @ %d-%d" : "reass frag %#08x @ %d-%d",
553 : id, frent->fe_off, frent->fe_off + frent->fe_len);
554 :
555 : /* Fully buffer all of the fragments in this fragment queue */
556 0 : frag = pf_find_fragment(key, id);
557 :
558 : /* Create a new reassembly queue for this packet */
559 0 : if (frag == NULL) {
560 0 : frag = pool_get(&pf_frag_pl, PR_NOWAIT);
561 0 : if (frag == NULL) {
562 0 : pf_flush_fragments();
563 0 : frag = pool_get(&pf_frag_pl, PR_NOWAIT);
564 0 : if (frag == NULL) {
565 0 : REASON_SET(reason, PFRES_MEMORY);
566 : goto drop_fragment;
567 : }
568 : }
569 0 : frnode = RB_FIND(pf_frnode_tree, &pf_frnode_tree, key);
570 0 : if (frnode == NULL) {
571 0 : frnode = pool_get(&pf_frnode_pl, PR_NOWAIT);
572 0 : if (frnode == NULL) {
573 0 : pf_flush_fragments();
574 0 : frnode = pool_get(&pf_frnode_pl, PR_NOWAIT);
575 0 : if (frnode == NULL) {
576 0 : REASON_SET(reason, PFRES_MEMORY);
577 0 : pool_put(&pf_frag_pl, frag);
578 0 : goto drop_fragment;
579 : }
580 : }
581 0 : *frnode = *key;
582 0 : RB_INIT(&frnode->fn_tree);
583 0 : frnode->fn_fragments = 0;
584 0 : frnode->fn_gen = 0;
585 0 : }
586 0 : memset(frag->fr_firstoff, 0, sizeof(frag->fr_firstoff));
587 0 : memset(frag->fr_entries, 0, sizeof(frag->fr_entries));
588 0 : TAILQ_INIT(&frag->fr_queue);
589 0 : frag->fr_id = id;
590 0 : frag->fr_timeout = time_uptime;
591 0 : frag->fr_gen = frnode->fn_gen++;
592 0 : frag->fr_maxlen = frent->fe_len;
593 0 : frag->fr_holes = 1;
594 0 : frag->fr_node = frnode;
595 : /* RB_INSERT cannot fail as pf_find_fragment() found nothing */
596 0 : RB_INSERT(pf_frag_tree, &frnode->fn_tree, frag);
597 0 : frnode->fn_fragments++;
598 0 : if (frnode->fn_fragments == 1)
599 0 : RB_INSERT(pf_frnode_tree, &pf_frnode_tree, frnode);
600 0 : TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
601 :
602 : /* We do not have a previous fragment, cannot fail. */
603 0 : pf_frent_insert(frag, frent, NULL);
604 :
605 0 : return (frag);
606 : }
607 :
608 0 : KASSERT(!TAILQ_EMPTY(&frag->fr_queue));
609 0 : KASSERT(frag->fr_node);
610 :
611 : /* Remember maximum fragment len for refragmentation */
612 0 : if (frent->fe_len > frag->fr_maxlen)
613 0 : frag->fr_maxlen = frent->fe_len;
614 :
615 : /* Maximum data we have seen already */
616 0 : total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
617 0 : TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
618 :
619 : /* Non terminal fragments must have more fragments flag */
620 0 : if (frent->fe_off + frent->fe_len < total && !frent->fe_mff)
621 : goto free_ipv6_fragment;
622 :
623 : /* Check if we saw the last fragment already */
624 0 : if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) {
625 0 : if (frent->fe_off + frent->fe_len > total ||
626 0 : (frent->fe_off + frent->fe_len == total && frent->fe_mff))
627 : goto free_ipv6_fragment;
628 : } else {
629 0 : if (frent->fe_off + frent->fe_len == total && !frent->fe_mff)
630 : goto free_ipv6_fragment;
631 : }
632 :
633 : /* Find neighbors for newly inserted fragment */
634 0 : prev = pf_frent_previous(frag, frent);
635 0 : if (prev == NULL) {
636 0 : after = TAILQ_FIRST(&frag->fr_queue);
637 0 : KASSERT(after != NULL);
638 : } else {
639 0 : after = TAILQ_NEXT(prev, fr_next);
640 : }
641 :
642 0 : if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) {
643 : u_int16_t precut;
644 :
645 : #ifdef INET6
646 0 : if (frag->fr_node->fn_af == AF_INET6)
647 0 : goto free_ipv6_fragment;
648 : #endif /* INET6 */
649 :
650 0 : precut = prev->fe_off + prev->fe_len - frent->fe_off;
651 0 : if (precut >= frent->fe_len) {
652 0 : DPFPRINTF(LOG_NOTICE, "new frag overlapped");
653 0 : goto drop_fragment;
654 : }
655 0 : DPFPRINTF(LOG_NOTICE, "frag head overlap %d", precut);
656 0 : m_adj(frent->fe_m, precut);
657 0 : frent->fe_off += precut;
658 0 : frent->fe_len -= precut;
659 0 : }
660 :
661 0 : for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off;
662 : after = next) {
663 : u_int16_t aftercut;
664 :
665 : #ifdef INET6
666 0 : if (frag->fr_node->fn_af == AF_INET6)
667 0 : goto free_ipv6_fragment;
668 : #endif /* INET6 */
669 :
670 0 : aftercut = frent->fe_off + frent->fe_len - after->fe_off;
671 0 : if (aftercut < after->fe_len) {
672 0 : DPFPRINTF(LOG_NOTICE, "frag tail overlap %d", aftercut);
673 0 : m_adj(after->fe_m, aftercut);
674 0 : after->fe_off += aftercut;
675 0 : after->fe_len -= aftercut;
676 0 : break;
677 : }
678 :
679 : /* This fragment is completely overlapped, lose it */
680 0 : DPFPRINTF(LOG_NOTICE, "old frag overlapped");
681 0 : next = TAILQ_NEXT(after, fr_next);
682 0 : pf_frent_remove(frag, after);
683 0 : m_freem(after->fe_m);
684 0 : pool_put(&pf_frent_pl, after);
685 0 : pf_nfrents--;
686 0 : }
687 :
688 : /* If part of the queue gets too long, there is not way to recover. */
689 0 : if (pf_frent_insert(frag, frent, prev)) {
690 0 : DPFPRINTF(LOG_WARNING, "fragment queue limit exceeded");
691 : goto free_fragment;
692 : }
693 :
694 0 : return (frag);
695 :
696 : free_ipv6_fragment:
697 0 : if (frag->fr_node->fn_af == AF_INET)
698 : goto bad_fragment;
699 : /*
700 : * RFC 5722, Errata 3089: When reassembling an IPv6 datagram, if one
701 : * or more its constituent fragments is determined to be an overlapping
702 : * fragment, the entire datagram (and any constituent fragments) MUST
703 : * be silently discarded.
704 : */
705 0 : DPFPRINTF(LOG_NOTICE, "flush overlapping fragments");
706 : free_fragment:
707 0 : pf_free_fragment(frag);
708 : bad_fragment:
709 0 : REASON_SET(reason, PFRES_FRAG);
710 : drop_fragment:
711 0 : pool_put(&pf_frent_pl, frent);
712 0 : pf_nfrents--;
713 0 : return (NULL);
714 0 : }
715 :
716 : struct mbuf *
717 0 : pf_join_fragment(struct pf_fragment *frag)
718 : {
719 : struct mbuf *m, *m2;
720 : struct pf_frent *frent;
721 :
722 0 : frent = TAILQ_FIRST(&frag->fr_queue);
723 0 : TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
724 :
725 0 : m = frent->fe_m;
726 : /* Strip off any trailing bytes */
727 0 : if ((frent->fe_hdrlen + frent->fe_len) < m->m_pkthdr.len)
728 0 : m_adj(m, (frent->fe_hdrlen + frent->fe_len) - m->m_pkthdr.len);
729 : /* Magic from ip_input */
730 0 : m2 = m->m_next;
731 0 : m->m_next = NULL;
732 0 : m_cat(m, m2);
733 0 : pool_put(&pf_frent_pl, frent);
734 0 : pf_nfrents--;
735 :
736 0 : while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) {
737 0 : TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
738 0 : m2 = frent->fe_m;
739 : /* Strip off ip header */
740 0 : m_adj(m2, frent->fe_hdrlen);
741 : /* Strip off any trailing bytes */
742 0 : if (frent->fe_len < m2->m_pkthdr.len)
743 0 : m_adj(m2, frent->fe_len - m2->m_pkthdr.len);
744 0 : pool_put(&pf_frent_pl, frent);
745 0 : pf_nfrents--;
746 0 : m_removehdr(m2);
747 0 : m_cat(m, m2);
748 : }
749 :
750 : /* Remove from fragment queue */
751 0 : pf_free_fragment(frag);
752 :
753 0 : return (m);
754 : }
755 :
756 : int
757 0 : pf_reassemble(struct mbuf **m0, int dir, u_short *reason)
758 : {
759 0 : struct mbuf *m = *m0;
760 0 : struct ip *ip = mtod(m, struct ip *);
761 : struct pf_frent *frent;
762 : struct pf_fragment *frag;
763 0 : struct pf_frnode key;
764 : u_int16_t total, hdrlen;
765 :
766 : /* Get an entry for the fragment queue */
767 0 : if ((frent = pf_create_fragment(reason)) == NULL)
768 0 : return (PF_DROP);
769 :
770 0 : frent->fe_m = m;
771 0 : frent->fe_hdrlen = ip->ip_hl << 2;
772 0 : frent->fe_extoff = 0;
773 0 : frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2);
774 0 : frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
775 0 : frent->fe_mff = ntohs(ip->ip_off) & IP_MF;
776 :
777 0 : key.fn_src.v4 = ip->ip_src;
778 0 : key.fn_dst.v4 = ip->ip_dst;
779 0 : key.fn_af = AF_INET;
780 0 : key.fn_proto = ip->ip_p;
781 0 : key.fn_direction = dir;
782 :
783 : PF_FRAG_LOCK();
784 0 : if ((frag = pf_fillup_fragment(&key, ip->ip_id, frent, reason))
785 0 : == NULL) {
786 : PF_FRAG_UNLOCK();
787 0 : return (PF_DROP);
788 : }
789 :
790 : /* The mbuf is part of the fragment entry, no direct free or access */
791 0 : m = *m0 = NULL;
792 :
793 0 : if (frag->fr_holes) {
794 0 : DPFPRINTF(LOG_DEBUG, "frag %d, holes %d",
795 : frag->fr_id, frag->fr_holes);
796 : PF_FRAG_UNLOCK();
797 0 : return (PF_PASS); /* drop because *m0 is NULL, no error */
798 : }
799 :
800 : /* We have all the data */
801 0 : frent = TAILQ_FIRST(&frag->fr_queue);
802 0 : KASSERT(frent != NULL);
803 0 : total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
804 0 : TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
805 0 : hdrlen = frent->fe_hdrlen;
806 0 : m = *m0 = pf_join_fragment(frag);
807 : frag = NULL;
808 0 : m_calchdrlen(m);
809 :
810 0 : ip = mtod(m, struct ip *);
811 0 : ip->ip_len = htons(hdrlen + total);
812 0 : ip->ip_off &= ~(IP_MF|IP_OFFMASK);
813 :
814 0 : if (hdrlen + total > IP_MAXPACKET) {
815 : PF_FRAG_UNLOCK();
816 0 : DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total);
817 0 : ip->ip_len = 0;
818 0 : REASON_SET(reason, PFRES_SHORT);
819 : /* PF_DROP requires a valid mbuf *m0 in pf_test() */
820 0 : return (PF_DROP);
821 : }
822 :
823 : PF_FRAG_UNLOCK();
824 0 : DPFPRINTF(LOG_INFO, "complete: %p(%d)", m, ntohs(ip->ip_len));
825 0 : return (PF_PASS);
826 0 : }
827 :
828 : #ifdef INET6
829 : int
830 0 : pf_reassemble6(struct mbuf **m0, struct ip6_frag *fraghdr,
831 : u_int16_t hdrlen, u_int16_t extoff, int dir, u_short *reason)
832 : {
833 0 : struct mbuf *m = *m0;
834 0 : struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
835 : struct m_tag *mtag;
836 : struct pf_fragment_tag *ftag;
837 : struct pf_frent *frent;
838 : struct pf_fragment *frag;
839 0 : struct pf_frnode key;
840 0 : int off;
841 : u_int16_t total, maxlen;
842 : u_int8_t proto;
843 :
844 : /* Get an entry for the fragment queue */
845 0 : if ((frent = pf_create_fragment(reason)) == NULL)
846 0 : return (PF_DROP);
847 :
848 0 : frent->fe_m = m;
849 0 : frent->fe_hdrlen = hdrlen;
850 0 : frent->fe_extoff = extoff;
851 0 : frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen;
852 0 : frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK);
853 0 : frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG;
854 :
855 0 : key.fn_src.v6 = ip6->ip6_src;
856 0 : key.fn_dst.v6 = ip6->ip6_dst;
857 0 : key.fn_af = AF_INET6;
858 : /* Only the first fragment's protocol is relevant */
859 0 : key.fn_proto = 0;
860 0 : key.fn_direction = dir;
861 :
862 : PF_FRAG_LOCK();
863 0 : if ((frag = pf_fillup_fragment(&key, fraghdr->ip6f_ident, frent,
864 0 : reason)) == NULL) {
865 : PF_FRAG_UNLOCK();
866 0 : return (PF_DROP);
867 : }
868 :
869 : /* The mbuf is part of the fragment entry, no direct free or access */
870 0 : m = *m0 = NULL;
871 :
872 0 : if (frag->fr_holes) {
873 0 : DPFPRINTF(LOG_DEBUG, "frag %#08x, holes %d",
874 : frag->fr_id, frag->fr_holes);
875 : PF_FRAG_UNLOCK();
876 0 : return (PF_PASS); /* drop because *m0 is NULL, no error */
877 : }
878 :
879 : /* We have all the data */
880 0 : extoff = frent->fe_extoff;
881 0 : maxlen = frag->fr_maxlen;
882 0 : frent = TAILQ_FIRST(&frag->fr_queue);
883 0 : KASSERT(frent != NULL);
884 0 : total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
885 0 : TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
886 0 : hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag);
887 0 : m = *m0 = pf_join_fragment(frag);
888 : frag = NULL;
889 :
890 : /* Take protocol from first fragment header */
891 0 : if ((m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt),
892 0 : &off)) == NULL)
893 0 : panic("%s: short frag mbuf chain", __func__);
894 0 : proto = *(mtod(m, caddr_t) + off);
895 0 : m = *m0;
896 :
897 : /* Delete frag6 header */
898 0 : if (frag6_deletefraghdr(m, hdrlen) != 0)
899 : goto fail;
900 :
901 0 : m_calchdrlen(m);
902 :
903 0 : if ((mtag = m_tag_get(PACKET_TAG_PF_REASSEMBLED, sizeof(struct
904 0 : pf_fragment_tag), M_NOWAIT)) == NULL)
905 : goto fail;
906 0 : ftag = (struct pf_fragment_tag *)(mtag + 1);
907 0 : ftag->ft_hdrlen = hdrlen;
908 0 : ftag->ft_extoff = extoff;
909 0 : ftag->ft_maxlen = maxlen;
910 0 : m_tag_prepend(m, mtag);
911 :
912 0 : ip6 = mtod(m, struct ip6_hdr *);
913 0 : ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total);
914 0 : if (extoff) {
915 : /* Write protocol into next field of last extension header */
916 0 : if ((m = m_getptr(m, extoff + offsetof(struct ip6_ext,
917 0 : ip6e_nxt), &off)) == NULL)
918 0 : panic("%s: short ext mbuf chain", __func__);
919 0 : *(mtod(m, caddr_t) + off) = proto;
920 0 : m = *m0;
921 0 : } else
922 0 : ip6->ip6_nxt = proto;
923 :
924 0 : if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) {
925 : PF_FRAG_UNLOCK();
926 0 : DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total);
927 0 : ip6->ip6_plen = 0;
928 0 : REASON_SET(reason, PFRES_SHORT);
929 : /* PF_DROP requires a valid mbuf *m0 in pf_test6() */
930 0 : return (PF_DROP);
931 : }
932 : PF_FRAG_UNLOCK();
933 :
934 0 : DPFPRINTF(LOG_INFO, "complete: %p(%d)", m, ntohs(ip6->ip6_plen));
935 0 : return (PF_PASS);
936 :
937 : fail:
938 : PF_FRAG_UNLOCK();
939 0 : REASON_SET(reason, PFRES_MEMORY);
940 : /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later */
941 0 : return (PF_DROP);
942 0 : }
943 :
944 : int
945 0 : pf_refragment6(struct mbuf **m0, struct m_tag *mtag, struct sockaddr_in6 *dst,
946 : struct ifnet *ifp, struct rtentry *rt)
947 : {
948 0 : struct mbuf *m = *m0, *t;
949 0 : struct pf_fragment_tag *ftag = (struct pf_fragment_tag *)(mtag + 1);
950 : u_int32_t mtu;
951 : u_int16_t hdrlen, extoff, maxlen;
952 : u_int8_t proto;
953 : int error, action;
954 :
955 0 : hdrlen = ftag->ft_hdrlen;
956 0 : extoff = ftag->ft_extoff;
957 0 : maxlen = ftag->ft_maxlen;
958 0 : m_tag_delete(m, mtag);
959 : mtag = NULL;
960 : ftag = NULL;
961 :
962 : /* Checksum must be calculated for the whole packet */
963 0 : in6_proto_cksum_out(m, NULL);
964 :
965 0 : if (extoff) {
966 0 : int off;
967 :
968 : /* Use protocol from next field of last extension header */
969 0 : if ((m = m_getptr(m, extoff + offsetof(struct ip6_ext,
970 0 : ip6e_nxt), &off)) == NULL)
971 0 : panic("%s: short ext mbuf chain", __func__);
972 0 : proto = *(mtod(m, caddr_t) + off);
973 0 : *(mtod(m, caddr_t) + off) = IPPROTO_FRAGMENT;
974 0 : m = *m0;
975 0 : } else {
976 : struct ip6_hdr *hdr;
977 :
978 0 : hdr = mtod(m, struct ip6_hdr *);
979 0 : proto = hdr->ip6_nxt;
980 0 : hdr->ip6_nxt = IPPROTO_FRAGMENT;
981 : }
982 :
983 : /*
984 : * Maxlen may be less than 8 iff there was only a single
985 : * fragment. As it was fragmented before, add a fragment
986 : * header also for a single fragment. If total or maxlen
987 : * is less than 8, ip6_fragment() will return EMSGSIZE and
988 : * we drop the packet.
989 : */
990 0 : mtu = hdrlen + sizeof(struct ip6_frag) + maxlen;
991 0 : error = ip6_fragment(m, hdrlen, proto, mtu);
992 :
993 0 : m = (*m0)->m_nextpkt;
994 0 : (*m0)->m_nextpkt = NULL;
995 0 : if (error == 0) {
996 : /* The first mbuf contains the unfragmented packet */
997 0 : m_freemp(m0);
998 : action = PF_PASS;
999 0 : } else {
1000 : /* Drop expects an mbuf to free */
1001 0 : DPFPRINTF(LOG_NOTICE, "refragment error %d", error);
1002 : action = PF_DROP;
1003 : }
1004 :
1005 0 : for (t = m; m; m = t) {
1006 0 : t = m->m_nextpkt;
1007 0 : m->m_nextpkt = NULL;
1008 0 : m->m_pkthdr.pf.flags |= PF_TAG_REFRAGMENTED;
1009 0 : if (error == 0) {
1010 0 : if (ifp == NULL) {
1011 0 : ip6_forward(m, NULL, 0);
1012 0 : } else if ((u_long)m->m_pkthdr.len <= ifp->if_mtu) {
1013 0 : ifp->if_output(ifp, m, sin6tosa(dst), rt);
1014 0 : } else {
1015 0 : icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0,
1016 : ifp->if_mtu);
1017 : }
1018 : } else {
1019 0 : m_freem(m);
1020 : }
1021 : }
1022 :
1023 0 : return (action);
1024 : }
1025 : #endif /* INET6 */
1026 :
1027 : int
1028 0 : pf_normalize_ip(struct pf_pdesc *pd, u_short *reason)
1029 : {
1030 0 : struct ip *h = mtod(pd->m, struct ip *);
1031 0 : u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1032 0 : u_int16_t mff = (ntohs(h->ip_off) & IP_MF);
1033 :
1034 0 : if (!fragoff && !mff)
1035 : goto no_fragment;
1036 :
1037 : /* Clear IP_DF if we're in no-df mode */
1038 0 : if (pf_status.reass & PF_REASS_NODF && h->ip_off & htons(IP_DF))
1039 0 : h->ip_off &= htons(~IP_DF);
1040 :
1041 : /* We're dealing with a fragment now. Don't allow fragments
1042 : * with IP_DF to enter the cache. If the flag was cleared by
1043 : * no-df above, fine. Otherwise drop it.
1044 : */
1045 0 : if (h->ip_off & htons(IP_DF)) {
1046 0 : DPFPRINTF(LOG_NOTICE, "bad fragment: IP_DF");
1047 0 : REASON_SET(reason, PFRES_FRAG);
1048 0 : return (PF_DROP);
1049 : }
1050 :
1051 0 : if (!pf_status.reass)
1052 0 : return (PF_PASS); /* no reassembly */
1053 :
1054 : /* Returns PF_DROP or m is NULL or completely reassembled mbuf */
1055 0 : if (pf_reassemble(&pd->m, pd->dir, reason) != PF_PASS)
1056 0 : return (PF_DROP);
1057 0 : if (pd->m == NULL)
1058 0 : return (PF_PASS); /* packet has been reassembled, no error */
1059 :
1060 0 : h = mtod(pd->m, struct ip *);
1061 :
1062 : no_fragment:
1063 : /* At this point, only IP_DF is allowed in ip_off */
1064 0 : if (h->ip_off & ~htons(IP_DF))
1065 0 : h->ip_off &= htons(IP_DF);
1066 :
1067 0 : return (PF_PASS);
1068 0 : }
1069 :
1070 : #ifdef INET6
1071 : int
1072 0 : pf_normalize_ip6(struct pf_pdesc *pd, u_short *reason)
1073 : {
1074 0 : struct ip6_frag frag;
1075 :
1076 0 : if (pd->fragoff == 0)
1077 : goto no_fragment;
1078 :
1079 0 : if (!pf_pull_hdr(pd->m, pd->fragoff, &frag, sizeof(frag), NULL, reason,
1080 : AF_INET6))
1081 0 : return (PF_DROP);
1082 :
1083 0 : if (!pf_status.reass)
1084 0 : return (PF_PASS); /* no reassembly */
1085 :
1086 : /* Returns PF_DROP or m is NULL or completely reassembled mbuf */
1087 0 : if (pf_reassemble6(&pd->m, &frag, pd->fragoff + sizeof(frag),
1088 0 : pd->extoff, pd->dir, reason) != PF_PASS)
1089 0 : return (PF_DROP);
1090 0 : if (pd->m == NULL)
1091 0 : return (PF_PASS); /* packet has been reassembled, no error */
1092 :
1093 : no_fragment:
1094 0 : return (PF_PASS);
1095 0 : }
1096 : #endif /* INET6 */
1097 :
1098 : int
1099 0 : pf_normalize_tcp(struct pf_pdesc *pd)
1100 : {
1101 0 : struct tcphdr *th = &pd->hdr.tcp;
1102 : u_short reason;
1103 : u_int8_t flags;
1104 : u_int rewrite = 0;
1105 :
1106 0 : flags = th->th_flags;
1107 0 : if (flags & TH_SYN) {
1108 : /* Illegal packet */
1109 0 : if (flags & TH_RST)
1110 : goto tcp_drop;
1111 :
1112 0 : if (flags & TH_FIN) /* XXX why clear instead of drop? */
1113 0 : flags &= ~TH_FIN;
1114 : } else {
1115 : /* Illegal packet */
1116 0 : if (!(flags & (TH_ACK|TH_RST)))
1117 : goto tcp_drop;
1118 : }
1119 :
1120 0 : if (!(flags & TH_ACK)) {
1121 : /* These flags are only valid if ACK is set */
1122 0 : if (flags & (TH_FIN|TH_PUSH|TH_URG))
1123 : goto tcp_drop;
1124 : }
1125 :
1126 : /* If flags changed, or reserved data set, then adjust */
1127 0 : if (flags != th->th_flags || th->th_x2 != 0) {
1128 : /* hack: set 4-bit th_x2 = 0 */
1129 0 : u_int8_t *th_off = (u_int8_t*)(&th->th_ack+1);
1130 0 : pf_patch_8(pd, th_off, th->th_off << 4, PF_HI);
1131 :
1132 0 : pf_patch_8(pd, &th->th_flags, flags, PF_LO);
1133 : rewrite = 1;
1134 0 : }
1135 :
1136 : /* Remove urgent pointer, if TH_URG is not set */
1137 0 : if (!(flags & TH_URG) && th->th_urp) {
1138 0 : pf_patch_16(pd, &th->th_urp, 0);
1139 : rewrite = 1;
1140 0 : }
1141 :
1142 : /* copy back packet headers if we sanitized */
1143 0 : if (rewrite) {
1144 0 : m_copyback(pd->m, pd->off, sizeof(*th), th, M_NOWAIT);
1145 0 : }
1146 :
1147 0 : return (PF_PASS);
1148 :
1149 : tcp_drop:
1150 0 : REASON_SET(&reason, PFRES_NORM);
1151 0 : return (PF_DROP);
1152 0 : }
1153 :
1154 : int
1155 0 : pf_normalize_tcp_init(struct pf_pdesc *pd, struct pf_state_peer *src)
1156 : {
1157 0 : struct tcphdr *th = &pd->hdr.tcp;
1158 : u_int32_t tsval, tsecr;
1159 : int olen;
1160 0 : u_int8_t opts[MAX_TCPOPTLEN], *opt;
1161 :
1162 :
1163 0 : KASSERT(src->scrub == NULL);
1164 :
1165 0 : src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
1166 0 : if (src->scrub == NULL)
1167 0 : return (1);
1168 0 : memset(src->scrub, 0, sizeof(*src->scrub));
1169 :
1170 0 : switch (pd->af) {
1171 : case AF_INET: {
1172 0 : struct ip *h = mtod(pd->m, struct ip *);
1173 0 : src->scrub->pfss_ttl = h->ip_ttl;
1174 : break;
1175 : }
1176 : #ifdef INET6
1177 : case AF_INET6: {
1178 0 : struct ip6_hdr *h = mtod(pd->m, struct ip6_hdr *);
1179 0 : src->scrub->pfss_ttl = h->ip6_hlim;
1180 : break;
1181 : }
1182 : #endif /* INET6 */
1183 : default:
1184 0 : unhandled_af(pd->af);
1185 : }
1186 :
1187 : /*
1188 : * All normalizations below are only begun if we see the start of
1189 : * the connections. They must all set an enabled bit in pfss_flags
1190 : */
1191 0 : if ((th->th_flags & TH_SYN) == 0)
1192 0 : return (0);
1193 :
1194 0 : olen = (th->th_off << 2) - sizeof(*th);
1195 0 : if (olen < TCPOLEN_TIMESTAMP || !pf_pull_hdr(pd->m,
1196 0 : pd->off + sizeof(*th), opts, olen, NULL, NULL, pd->af))
1197 0 : return (0);
1198 :
1199 : opt = opts;
1200 0 : while ((opt = pf_find_tcpopt(opt, opts, olen,
1201 0 : TCPOPT_TIMESTAMP, TCPOLEN_TIMESTAMP)) != NULL) {
1202 :
1203 0 : src->scrub->pfss_flags |= PFSS_TIMESTAMP;
1204 0 : src->scrub->pfss_ts_mod = arc4random();
1205 : /* note PFSS_PAWS not set yet */
1206 0 : memcpy(&tsval, &opt[2], sizeof(u_int32_t));
1207 0 : memcpy(&tsecr, &opt[6], sizeof(u_int32_t));
1208 0 : src->scrub->pfss_tsval0 = ntohl(tsval);
1209 0 : src->scrub->pfss_tsval = ntohl(tsval);
1210 0 : src->scrub->pfss_tsecr = ntohl(tsecr);
1211 0 : getmicrouptime(&src->scrub->pfss_last);
1212 :
1213 0 : opt += opt[1];
1214 : }
1215 :
1216 0 : return (0);
1217 0 : }
1218 :
1219 : void
1220 0 : pf_normalize_tcp_cleanup(struct pf_state *state)
1221 : {
1222 0 : if (state->src.scrub)
1223 0 : pool_put(&pf_state_scrub_pl, state->src.scrub);
1224 0 : if (state->dst.scrub)
1225 0 : pool_put(&pf_state_scrub_pl, state->dst.scrub);
1226 :
1227 : /* Someday... flush the TCP segment reassembly descriptors. */
1228 0 : }
1229 :
1230 : int
1231 0 : pf_normalize_tcp_stateful(struct pf_pdesc *pd, u_short *reason,
1232 : struct pf_state *state, struct pf_state_peer *src,
1233 : struct pf_state_peer *dst, int *writeback)
1234 : {
1235 0 : struct tcphdr *th = &pd->hdr.tcp;
1236 0 : struct timeval uptime;
1237 : u_int tsval_from_last;
1238 : u_int32_t tsval, tsecr;
1239 : int copyback = 0;
1240 : int got_ts = 0;
1241 : int olen;
1242 0 : u_int8_t opts[MAX_TCPOPTLEN], *opt;
1243 :
1244 0 : KASSERT(src->scrub || dst->scrub);
1245 :
1246 : /*
1247 : * Enforce the minimum TTL seen for this connection. Negate a common
1248 : * technique to evade an intrusion detection system and confuse
1249 : * firewall state code.
1250 : */
1251 0 : switch (pd->af) {
1252 : case AF_INET:
1253 0 : if (src->scrub) {
1254 0 : struct ip *h = mtod(pd->m, struct ip *);
1255 0 : if (h->ip_ttl > src->scrub->pfss_ttl)
1256 0 : src->scrub->pfss_ttl = h->ip_ttl;
1257 0 : h->ip_ttl = src->scrub->pfss_ttl;
1258 0 : }
1259 : break;
1260 : #ifdef INET6
1261 : case AF_INET6:
1262 0 : if (src->scrub) {
1263 0 : struct ip6_hdr *h = mtod(pd->m, struct ip6_hdr *);
1264 0 : if (h->ip6_hlim > src->scrub->pfss_ttl)
1265 0 : src->scrub->pfss_ttl = h->ip6_hlim;
1266 0 : h->ip6_hlim = src->scrub->pfss_ttl;
1267 0 : }
1268 : break;
1269 : #endif /* INET6 */
1270 : default:
1271 0 : unhandled_af(pd->af);
1272 : }
1273 :
1274 0 : olen = (th->th_off << 2) - sizeof(*th);
1275 :
1276 0 : if (olen >= TCPOLEN_TIMESTAMP &&
1277 0 : ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
1278 0 : (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
1279 0 : pf_pull_hdr(pd->m, pd->off + sizeof(*th), opts, olen, NULL, NULL,
1280 0 : pd->af)) {
1281 :
1282 : /* Modulate the timestamps. Can be used for NAT detection, OS
1283 : * uptime determination or reboot detection.
1284 : */
1285 : opt = opts;
1286 0 : while ((opt = pf_find_tcpopt(opt, opts, olen,
1287 0 : TCPOPT_TIMESTAMP, TCPOLEN_TIMESTAMP)) != NULL) {
1288 :
1289 0 : u_int8_t *ts = opt + 2;
1290 0 : u_int8_t *tsr = opt + 6;
1291 :
1292 0 : if (got_ts) {
1293 : /* Huh? Multiple timestamps!? */
1294 0 : if (pf_status.debug >= LOG_NOTICE) {
1295 0 : log(LOG_NOTICE,
1296 : "pf: %s: multiple TS??", __func__);
1297 0 : pf_print_state(state);
1298 0 : addlog("\n");
1299 0 : }
1300 0 : REASON_SET(reason, PFRES_TS);
1301 0 : return (PF_DROP);
1302 : }
1303 :
1304 0 : memcpy(&tsval, ts, sizeof(u_int32_t));
1305 0 : memcpy(&tsecr, tsr, sizeof(u_int32_t));
1306 :
1307 : /* modulate TS */
1308 0 : if (tsval && src->scrub &&
1309 0 : (src->scrub->pfss_flags & PFSS_TIMESTAMP)) {
1310 : /* tsval used further on */
1311 0 : tsval = ntohl(tsval);
1312 0 : pf_patch_32_unaligned(pd,
1313 0 : ts, htonl(tsval + src->scrub->pfss_ts_mod),
1314 0 : PF_ALGNMNT(ts - opts));
1315 : copyback = 1;
1316 0 : }
1317 :
1318 : /* modulate TS reply if any (!0) */
1319 0 : if (tsecr && dst->scrub &&
1320 0 : (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
1321 : /* tsecr used further on */
1322 0 : tsecr = ntohl(tsecr) - dst->scrub->pfss_ts_mod;
1323 0 : pf_patch_32_unaligned(pd,
1324 0 : tsr, htonl(tsecr), PF_ALGNMNT(tsr - opts));
1325 : copyback = 1;
1326 0 : }
1327 :
1328 : got_ts = 1;
1329 0 : opt += opt[1];
1330 0 : }
1331 :
1332 0 : if (copyback) {
1333 : /* Copyback the options, caller copys back header */
1334 0 : *writeback = 1;
1335 0 : m_copyback(pd->m, pd->off + sizeof(*th), olen, opts, M_NOWAIT);
1336 0 : }
1337 : }
1338 :
1339 :
1340 : /*
1341 : * Must invalidate PAWS checks on connections idle for too long.
1342 : * The fastest allowed timestamp clock is 1ms. That turns out to
1343 : * be about 24 days before it wraps. XXX Right now our lowerbound
1344 : * TS echo check only works for the first 12 days of a connection
1345 : * when the TS has exhausted half its 32bit space
1346 : */
1347 : #define TS_MAX_IDLE (24*24*60*60)
1348 : #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */
1349 :
1350 0 : getmicrouptime(&uptime);
1351 0 : if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
1352 0 : (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
1353 0 : time_uptime - state->creation > TS_MAX_CONN)) {
1354 0 : if (pf_status.debug >= LOG_NOTICE) {
1355 0 : log(LOG_NOTICE, "pf: src idled out of PAWS ");
1356 0 : pf_print_state(state);
1357 0 : addlog("\n");
1358 0 : }
1359 0 : src->scrub->pfss_flags =
1360 0 : (src->scrub->pfss_flags & ~PFSS_PAWS) | PFSS_PAWS_IDLED;
1361 0 : }
1362 0 : if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
1363 0 : uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
1364 0 : if (pf_status.debug >= LOG_NOTICE) {
1365 0 : log(LOG_NOTICE, "pf: dst idled out of PAWS ");
1366 0 : pf_print_state(state);
1367 0 : addlog("\n");
1368 0 : }
1369 0 : dst->scrub->pfss_flags =
1370 0 : (dst->scrub->pfss_flags & ~PFSS_PAWS) | PFSS_PAWS_IDLED;
1371 0 : }
1372 :
1373 0 : if (got_ts && src->scrub && dst->scrub &&
1374 0 : (src->scrub->pfss_flags & PFSS_PAWS) &&
1375 0 : (dst->scrub->pfss_flags & PFSS_PAWS)) {
1376 : /* Validate that the timestamps are "in-window".
1377 : * RFC1323 describes TCP Timestamp options that allow
1378 : * measurement of RTT (round trip time) and PAWS
1379 : * (protection against wrapped sequence numbers). PAWS
1380 : * gives us a set of rules for rejecting packets on
1381 : * long fat pipes (packets that were somehow delayed
1382 : * in transit longer than the time it took to send the
1383 : * full TCP sequence space of 4Gb). We can use these
1384 : * rules and infer a few others that will let us treat
1385 : * the 32bit timestamp and the 32bit echoed timestamp
1386 : * as sequence numbers to prevent a blind attacker from
1387 : * inserting packets into a connection.
1388 : *
1389 : * RFC1323 tells us:
1390 : * - The timestamp on this packet must be greater than
1391 : * or equal to the last value echoed by the other
1392 : * endpoint. The RFC says those will be discarded
1393 : * since it is a dup that has already been acked.
1394 : * This gives us a lowerbound on the timestamp.
1395 : * timestamp >= other last echoed timestamp
1396 : * - The timestamp will be less than or equal to
1397 : * the last timestamp plus the time between the
1398 : * last packet and now. The RFC defines the max
1399 : * clock rate as 1ms. We will allow clocks to be
1400 : * up to 10% fast and will allow a total difference
1401 : * or 30 seconds due to a route change. And this
1402 : * gives us an upperbound on the timestamp.
1403 : * timestamp <= last timestamp + max ticks
1404 : * We have to be careful here. Windows will send an
1405 : * initial timestamp of zero and then initialize it
1406 : * to a random value after the 3whs; presumably to
1407 : * avoid a DoS by having to call an expensive RNG
1408 : * during a SYN flood. Proof MS has at least one
1409 : * good security geek.
1410 : *
1411 : * - The TCP timestamp option must also echo the other
1412 : * endpoints timestamp. The timestamp echoed is the
1413 : * one carried on the earliest unacknowledged segment
1414 : * on the left edge of the sequence window. The RFC
1415 : * states that the host will reject any echoed
1416 : * timestamps that were larger than any ever sent.
1417 : * This gives us an upperbound on the TS echo.
1418 : * tescr <= largest_tsval
1419 : * - The lowerbound on the TS echo is a little more
1420 : * tricky to determine. The other endpoint's echoed
1421 : * values will not decrease. But there may be
1422 : * network conditions that re-order packets and
1423 : * cause our view of them to decrease. For now the
1424 : * only lowerbound we can safely determine is that
1425 : * the TS echo will never be less than the original
1426 : * TS. XXX There is probably a better lowerbound.
1427 : * Remove TS_MAX_CONN with better lowerbound check.
1428 : * tescr >= other original TS
1429 : *
1430 : * It is also important to note that the fastest
1431 : * timestamp clock of 1ms will wrap its 32bit space in
1432 : * 24 days. So we just disable TS checking after 24
1433 : * days of idle time. We actually must use a 12d
1434 : * connection limit until we can come up with a better
1435 : * lowerbound to the TS echo check.
1436 : */
1437 : struct timeval delta_ts;
1438 : int ts_fudge;
1439 :
1440 : /*
1441 : * PFTM_TS_DIFF is how many seconds of leeway to allow
1442 : * a host's timestamp. This can happen if the previous
1443 : * packet got delayed in transit for much longer than
1444 : * this packet.
1445 : */
1446 0 : if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
1447 0 : ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
1448 :
1449 : /* Calculate max ticks since the last timestamp */
1450 : #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */
1451 : #define TS_MICROSECS 1000000 /* microseconds per second */
1452 0 : timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
1453 0 : tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
1454 0 : tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
1455 :
1456 0 : if ((src->state >= TCPS_ESTABLISHED &&
1457 0 : dst->state >= TCPS_ESTABLISHED) &&
1458 0 : (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
1459 0 : SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
1460 0 : (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
1461 0 : SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
1462 : /* Bad RFC1323 implementation or an insertion attack.
1463 : *
1464 : * - Solaris 2.6 and 2.7 are known to send another ACK
1465 : * after the FIN,FIN|ACK,ACK closing that carries
1466 : * an old timestamp.
1467 : */
1468 :
1469 0 : DPFPRINTF(LOG_NOTICE, "Timestamp failed %c%c%c%c",
1470 : SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
1471 : SEQ_GT(tsval, src->scrub->pfss_tsval +
1472 : tsval_from_last) ? '1' : ' ',
1473 : SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
1474 : SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' ');
1475 0 : DPFPRINTF(LOG_NOTICE, " tsval: %u tsecr: %u "
1476 : "+ticks: %u idle: %llu.%06lus", tsval, tsecr,
1477 : tsval_from_last, (long long)delta_ts.tv_sec,
1478 : delta_ts.tv_usec);
1479 0 : DPFPRINTF(LOG_NOTICE, " src->tsval: %u tsecr: %u",
1480 : src->scrub->pfss_tsval, src->scrub->pfss_tsecr);
1481 0 : DPFPRINTF(LOG_NOTICE, " dst->tsval: %u tsecr: %u "
1482 : "tsval0: %u", dst->scrub->pfss_tsval,
1483 : dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0);
1484 0 : if (pf_status.debug >= LOG_NOTICE) {
1485 0 : log(LOG_NOTICE, "pf: ");
1486 0 : pf_print_state(state);
1487 0 : pf_print_flags(th->th_flags);
1488 0 : addlog("\n");
1489 0 : }
1490 0 : REASON_SET(reason, PFRES_TS);
1491 0 : return (PF_DROP);
1492 : }
1493 : /* XXX I'd really like to require tsecr but it's optional */
1494 0 : } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
1495 0 : ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
1496 0 : || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
1497 0 : src->scrub && dst->scrub &&
1498 0 : (src->scrub->pfss_flags & PFSS_PAWS) &&
1499 0 : (dst->scrub->pfss_flags & PFSS_PAWS)) {
1500 : /* Didn't send a timestamp. Timestamps aren't really useful
1501 : * when:
1502 : * - connection opening or closing (often not even sent).
1503 : * but we must not let an attacker to put a FIN on a
1504 : * data packet to sneak it through our ESTABLISHED check.
1505 : * - on a TCP reset. RFC suggests not even looking at TS.
1506 : * - on an empty ACK. The TS will not be echoed so it will
1507 : * probably not help keep the RTT calculation in sync and
1508 : * there isn't as much danger when the sequence numbers
1509 : * got wrapped. So some stacks don't include TS on empty
1510 : * ACKs :-(
1511 : *
1512 : * To minimize the disruption to mostly RFC1323 conformant
1513 : * stacks, we will only require timestamps on data packets.
1514 : *
1515 : * And what do ya know, we cannot require timestamps on data
1516 : * packets. There appear to be devices that do legitimate
1517 : * TCP connection hijacking. There are HTTP devices that allow
1518 : * a 3whs (with timestamps) and then buffer the HTTP request.
1519 : * If the intermediate device has the HTTP response cache, it
1520 : * will spoof the response but not bother timestamping its
1521 : * packets. So we can look for the presence of a timestamp in
1522 : * the first data packet and if there, require it in all future
1523 : * packets.
1524 : */
1525 :
1526 0 : if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
1527 : /*
1528 : * Hey! Someone tried to sneak a packet in. Or the
1529 : * stack changed its RFC1323 behavior?!?!
1530 : */
1531 0 : if (pf_status.debug >= LOG_NOTICE) {
1532 0 : log(LOG_NOTICE,
1533 : "pf: did not receive expected RFC1323 "
1534 : "timestamp");
1535 0 : pf_print_state(state);
1536 0 : pf_print_flags(th->th_flags);
1537 0 : addlog("\n");
1538 0 : }
1539 0 : REASON_SET(reason, PFRES_TS);
1540 0 : return (PF_DROP);
1541 : }
1542 : }
1543 :
1544 : /*
1545 : * We will note if a host sends his data packets with or without
1546 : * timestamps. And require all data packets to contain a timestamp
1547 : * if the first does. PAWS implicitly requires that all data packets be
1548 : * timestamped. But I think there are middle-man devices that hijack
1549 : * TCP streams immediately after the 3whs and don't timestamp their
1550 : * packets (seen in a WWW accelerator or cache).
1551 : */
1552 0 : if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
1553 0 : (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
1554 0 : if (got_ts)
1555 0 : src->scrub->pfss_flags |= PFSS_DATA_TS;
1556 : else {
1557 0 : src->scrub->pfss_flags |= PFSS_DATA_NOTS;
1558 0 : if (pf_status.debug >= LOG_NOTICE && dst->scrub &&
1559 0 : (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
1560 : /* Don't warn if other host rejected RFC1323 */
1561 0 : log(LOG_NOTICE,
1562 : "pf: broken RFC1323 stack did not "
1563 : "timestamp data packet. Disabled PAWS "
1564 : "security.");
1565 0 : pf_print_state(state);
1566 0 : pf_print_flags(th->th_flags);
1567 0 : addlog("\n");
1568 0 : }
1569 : }
1570 : }
1571 :
1572 : /*
1573 : * Update PAWS values
1574 : */
1575 0 : if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
1576 : (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
1577 0 : getmicrouptime(&src->scrub->pfss_last);
1578 0 : if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
1579 0 : (src->scrub->pfss_flags & PFSS_PAWS) == 0)
1580 0 : src->scrub->pfss_tsval = tsval;
1581 :
1582 0 : if (tsecr) {
1583 0 : if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
1584 0 : (src->scrub->pfss_flags & PFSS_PAWS) == 0)
1585 0 : src->scrub->pfss_tsecr = tsecr;
1586 :
1587 0 : if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
1588 0 : (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
1589 0 : src->scrub->pfss_tsval0 == 0)) {
1590 : /* tsval0 MUST be the lowest timestamp */
1591 0 : src->scrub->pfss_tsval0 = tsval;
1592 0 : }
1593 :
1594 : /* Only fully initialized after a TS gets echoed */
1595 0 : if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
1596 0 : src->scrub->pfss_flags |= PFSS_PAWS;
1597 : }
1598 : }
1599 :
1600 : /* I have a dream.... TCP segment reassembly.... */
1601 0 : return (0);
1602 0 : }
1603 :
1604 : int
1605 0 : pf_normalize_mss(struct pf_pdesc *pd, u_int16_t maxmss)
1606 : {
1607 : int olen, optsoff;
1608 0 : u_int8_t opts[MAX_TCPOPTLEN], *opt;
1609 :
1610 0 : olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
1611 0 : optsoff = pd->off + sizeof(struct tcphdr);
1612 0 : if (olen < TCPOLEN_MAXSEG ||
1613 0 : !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af))
1614 0 : return (0);
1615 :
1616 : opt = opts;
1617 0 : while ((opt = pf_find_tcpopt(opt, opts, olen,
1618 0 : TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) {
1619 0 : u_int16_t mss;
1620 0 : u_int8_t *mssp = opt + 2;
1621 0 : memcpy(&mss, mssp, sizeof(mss));
1622 0 : if (ntohs(mss) > maxmss) {
1623 0 : size_t mssoffopts = mssp - opts;
1624 0 : pf_patch_16_unaligned(pd, &mss,
1625 0 : htons(maxmss), PF_ALGNMNT(mssoffopts));
1626 0 : m_copyback(pd->m, optsoff + mssoffopts,
1627 : sizeof(mss), &mss, M_NOWAIT);
1628 0 : m_copyback(pd->m, pd->off,
1629 0 : sizeof(struct tcphdr), &pd->hdr.tcp, M_NOWAIT);
1630 0 : }
1631 :
1632 0 : opt += opt[1];
1633 0 : }
1634 :
1635 0 : return (0);
1636 0 : }
1637 :
1638 : void
1639 0 : pf_scrub(struct mbuf *m, u_int16_t flags, sa_family_t af, u_int8_t min_ttl,
1640 : u_int8_t tos)
1641 : {
1642 0 : struct ip *h = mtod(m, struct ip *);
1643 : #ifdef INET6
1644 0 : struct ip6_hdr *h6 = mtod(m, struct ip6_hdr *);
1645 : #endif /* INET6 */
1646 :
1647 : /* Clear IP_DF if no-df was requested */
1648 0 : if (flags & PFSTATE_NODF && af == AF_INET && h->ip_off & htons(IP_DF))
1649 0 : h->ip_off &= htons(~IP_DF);
1650 :
1651 : /* Enforce a minimum ttl, may cause endless packet loops */
1652 0 : if (min_ttl && af == AF_INET && h->ip_ttl < min_ttl)
1653 0 : h->ip_ttl = min_ttl;
1654 : #ifdef INET6
1655 0 : if (min_ttl && af == AF_INET6 && h6->ip6_hlim < min_ttl)
1656 0 : h6->ip6_hlim = min_ttl;
1657 : #endif /* INET6 */
1658 :
1659 : /* Enforce tos */
1660 0 : if (flags & PFSTATE_SETTOS) {
1661 0 : if (af == AF_INET)
1662 0 : h->ip_tos = tos | (h->ip_tos & IPTOS_ECN_MASK);
1663 : #ifdef INET6
1664 0 : if (af == AF_INET6) {
1665 : /* drugs are unable to explain such idiocy */
1666 0 : h6->ip6_flow &= ~htonl(0x0fc00000);
1667 0 : h6->ip6_flow |= htonl(((u_int32_t)tos) << 20);
1668 0 : }
1669 : #endif /* INET6 */
1670 : }
1671 :
1672 : /* random-id, but not for fragments */
1673 0 : if (flags & PFSTATE_RANDOMID && af == AF_INET &&
1674 0 : !(h->ip_off & ~htons(IP_DF)))
1675 0 : h->ip_id = htons(ip_randomid());
1676 0 : }
|