LCOV - code coverage report
Current view: top level - net - pf_norm.c (source / functions) Hit Total Coverage
Test: 6.4 Lines: 0 745 0.0 %
Date: 2018-10-19 03:25:38 Functions: 0 44 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*      $OpenBSD: pf_norm.c,v 1.216 2018/09/10 16:14:07 bluhm Exp $ */
       2             : 
       3             : /*
       4             :  * Copyright 2001 Niels Provos <provos@citi.umich.edu>
       5             :  * Copyright 2009 Henning Brauer <henning@openbsd.org>
       6             :  * Copyright 2011-2018 Alexander Bluhm <bluhm@openbsd.org>
       7             :  * All rights reserved.
       8             :  *
       9             :  * Redistribution and use in source and binary forms, with or without
      10             :  * modification, are permitted provided that the following conditions
      11             :  * are met:
      12             :  * 1. Redistributions of source code must retain the above copyright
      13             :  *    notice, this list of conditions and the following disclaimer.
      14             :  * 2. Redistributions in binary form must reproduce the above copyright
      15             :  *    notice, this list of conditions and the following disclaimer in the
      16             :  *    documentation and/or other materials provided with the distribution.
      17             :  *
      18             :  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
      19             :  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
      20             :  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
      21             :  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
      22             :  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
      23             :  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
      24             :  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
      25             :  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
      26             :  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
      27             :  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      28             :  */
      29             : 
      30             : #include "pflog.h"
      31             : 
      32             : #include <sys/param.h>
      33             : #include <sys/systm.h>
      34             : #include <sys/mbuf.h>
      35             : #include <sys/filio.h>
      36             : #include <sys/fcntl.h>
      37             : #include <sys/socket.h>
      38             : #include <sys/kernel.h>
      39             : #include <sys/time.h>
      40             : #include <sys/pool.h>
      41             : #include <sys/syslog.h>
      42             : #include <sys/mutex.h>
      43             : 
      44             : #include <net/if.h>
      45             : #include <net/if_var.h>
      46             : #include <net/if_pflog.h>
      47             : 
      48             : #include <netinet/in.h>
      49             : #include <netinet/ip.h>
      50             : #include <netinet/ip_var.h>
      51             : #include <netinet/ip_icmp.h>
      52             : #include <netinet/tcp.h>
      53             : #include <netinet/tcp_seq.h>
      54             : #include <netinet/tcp_fsm.h>
      55             : #include <netinet/udp.h>
      56             : 
      57             : #ifdef INET6
      58             : #include <netinet6/in6_var.h>
      59             : #include <netinet/ip6.h>
      60             : #include <netinet6/ip6_var.h>
      61             : #include <netinet/icmp6.h>
      62             : #include <netinet6/nd6.h>
      63             : #endif /* INET6 */
      64             : 
      65             : #include <net/pfvar.h>
      66             : #include <net/pfvar_priv.h>
      67             : 
      68             : struct pf_frent {
      69             :         TAILQ_ENTRY(pf_frent) fr_next;
      70             :         struct mbuf     *fe_m;
      71             :         u_int16_t        fe_hdrlen;     /* ipv4 header length with ip options
      72             :                                            ipv6, extension, fragment header */
      73             :         u_int16_t        fe_extoff;     /* last extension header offset or 0 */
      74             :         u_int16_t        fe_len;        /* fragment length */
      75             :         u_int16_t        fe_off;        /* fragment offset */
      76             :         u_int16_t        fe_mff;        /* more fragment flag */
      77             : };
      78             : 
      79             : RB_HEAD(pf_frag_tree, pf_fragment);
      80             : struct pf_frnode {
      81             :         struct pf_addr  fn_src;         /* ip source address */
      82             :         struct pf_addr  fn_dst;         /* ip destination address */
      83             :         sa_family_t     fn_af;          /* address family */
      84             :         u_int8_t        fn_proto;       /* protocol for fragments in fn_tree */
      85             :         u_int8_t        fn_direction;   /* pf packet direction */
      86             :         u_int32_t       fn_fragments;   /* number of entries in fn_tree */
      87             :         u_int32_t       fn_gen;         /* fr_gen of newest entry in fn_tree */
      88             : 
      89             :         RB_ENTRY(pf_frnode) fn_entry;
      90             :         struct pf_frag_tree fn_tree;    /* matching fragments, lookup by id */
      91             : };
      92             : 
      93             : struct pf_fragment {
      94             :         struct pf_frent *fr_firstoff[PF_FRAG_ENTRY_POINTS];
      95             :                                         /* pointers to queue element */
      96             :         u_int8_t        fr_entries[PF_FRAG_ENTRY_POINTS];
      97             :                                         /* count entries between pointers */
      98             :         RB_ENTRY(pf_fragment) fr_entry;
      99             :         TAILQ_ENTRY(pf_fragment) frag_next;
     100             :         TAILQ_HEAD(pf_fragq, pf_frent) fr_queue;
     101             :         u_int32_t       fr_id;          /* fragment id for reassemble */
     102             :         int32_t         fr_timeout;
     103             :         u_int32_t       fr_gen;         /* generation number (per pf_frnode) */
     104             :         u_int16_t       fr_maxlen;      /* maximum length of single fragment */
     105             :         u_int16_t       fr_holes;       /* number of holes in the queue */
     106             :         struct pf_frnode *fr_node;      /* ip src/dst/proto/af for fragments */
     107             : };
     108             : 
     109             : struct pf_fragment_tag {
     110             :         u_int16_t        ft_hdrlen;     /* header length of reassembled pkt */
     111             :         u_int16_t        ft_extoff;     /* last extension header offset or 0 */
     112             :         u_int16_t        ft_maxlen;     /* maximum fragment payload length */
     113             : };
     114             : 
     115             : TAILQ_HEAD(pf_fragqueue, pf_fragment)   pf_fragqueue;
     116             : 
     117             : static __inline int      pf_frnode_compare(struct pf_frnode *,
     118             :                             struct pf_frnode *);
     119             : RB_HEAD(pf_frnode_tree, pf_frnode)      pf_frnode_tree;
     120             : RB_PROTOTYPE(pf_frnode_tree, pf_frnode, fn_entry, pf_frnode_compare);
     121           0 : RB_GENERATE(pf_frnode_tree, pf_frnode, fn_entry, pf_frnode_compare);
     122             : 
     123             : static __inline int      pf_frag_compare(struct pf_fragment *,
     124             :                             struct pf_fragment *);
     125             : RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
     126           0 : RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
     127             : 
     128             : /* Private prototypes */
     129             : void                     pf_flush_fragments(void);
     130             : void                     pf_free_fragment(struct pf_fragment *);
     131             : struct pf_fragment      *pf_find_fragment(struct pf_frnode *, u_int32_t);
     132             : struct pf_frent         *pf_create_fragment(u_short *);
     133             : int                      pf_frent_holes(struct pf_frent *);
     134             : static inline int        pf_frent_index(struct pf_frent *);
     135             : int                      pf_frent_insert(struct pf_fragment *,
     136             :                             struct pf_frent *, struct pf_frent *);
     137             : void                     pf_frent_remove(struct pf_fragment *,
     138             :                             struct pf_frent *);
     139             : struct pf_frent         *pf_frent_previous(struct pf_fragment *,
     140             :                             struct pf_frent *);
     141             : struct pf_fragment      *pf_fillup_fragment(struct pf_frnode *, u_int32_t,
     142             :                             struct pf_frent *, u_short *);
     143             : struct mbuf             *pf_join_fragment(struct pf_fragment *);
     144             : int                      pf_reassemble(struct mbuf **, int, u_short *);
     145             : #ifdef INET6
     146             : int                      pf_reassemble6(struct mbuf **, struct ip6_frag *,
     147             :                             u_int16_t, u_int16_t, int, u_short *);
     148             : #endif /* INET6 */
     149             : 
     150             : /* Globals */
     151             : struct pool              pf_frent_pl, pf_frag_pl, pf_frnode_pl;
     152             : struct pool              pf_state_scrub_pl;
     153             : int                      pf_nfrents;
     154             : 
     155             : #ifdef WITH_PF_LOCK
     156             : struct mutex             pf_frag_mtx;
     157             : 
     158             : #define PF_FRAG_LOCK_INIT()     mtx_init(&pf_frag_mtx, IPL_SOFTNET)
     159             : #define PF_FRAG_LOCK()          mtx_enter(&pf_frag_mtx)
     160             : #define PF_FRAG_UNLOCK()        mtx_leave(&pf_frag_mtx)
     161             : #else /* !WITH_PF_LOCK */
     162             : #define PF_FRAG_LOCK_INIT()     (void)(0)
     163             : #define PF_FRAG_LOCK()          (void)(0)
     164             : #define PF_FRAG_UNLOCK()        (void)(0)
     165             : #endif /* WITH_PF_LOCK */
     166             : 
     167             : void
     168           0 : pf_normalize_init(void)
     169             : {
     170           0 :         pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0,
     171             :             IPL_SOFTNET, 0, "pffrent", NULL);
     172           0 :         pool_init(&pf_frnode_pl, sizeof(struct pf_frnode), 0,
     173             :             IPL_SOFTNET, 0, "pffrnode", NULL);
     174           0 :         pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0,
     175             :             IPL_SOFTNET, 0, "pffrag", NULL);
     176           0 :         pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0,
     177             :             IPL_SOFTNET, 0, "pfstscr", NULL);
     178             : 
     179           0 :         pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
     180           0 :         pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
     181             : 
     182           0 :         TAILQ_INIT(&pf_fragqueue);
     183             : 
     184             :         PF_FRAG_LOCK_INIT();
     185           0 : }
     186             : 
     187             : static __inline int
     188           0 : pf_frnode_compare(struct pf_frnode *a, struct pf_frnode *b)
     189             : {
     190             :         int     diff;
     191             : 
     192           0 :         if ((diff = a->fn_proto - b->fn_proto) != 0)
     193           0 :                 return (diff);
     194           0 :         if ((diff = a->fn_af - b->fn_af) != 0)
     195           0 :                 return (diff);
     196           0 :         if ((diff = pf_addr_compare(&a->fn_src, &b->fn_src, a->fn_af)) != 0)
     197           0 :                 return (diff);
     198           0 :         if ((diff = pf_addr_compare(&a->fn_dst, &b->fn_dst, a->fn_af)) != 0)
     199           0 :                 return (diff);
     200             : 
     201           0 :         return (0);
     202           0 : }
     203             : 
     204             : static __inline int
     205           0 : pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
     206             : {
     207             :         int     diff;
     208             : 
     209           0 :         if ((diff = a->fr_id - b->fr_id) != 0)
     210           0 :                 return (diff);
     211             : 
     212           0 :         return (0);
     213           0 : }
     214             : 
     215             : void
     216           0 : pf_purge_expired_fragments(void)
     217             : {
     218             :         struct pf_fragment      *frag;
     219             :         int32_t                  expire;
     220             : 
     221             :         PF_ASSERT_UNLOCKED();
     222             : 
     223           0 :         expire = time_uptime - pf_default_rule.timeout[PFTM_FRAG];
     224             : 
     225             :         PF_FRAG_LOCK();
     226           0 :         while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
     227           0 :                 if (frag->fr_timeout > expire)
     228             :                         break;
     229           0 :                 DPFPRINTF(LOG_NOTICE, "expiring %d(%p)", frag->fr_id, frag);
     230           0 :                 pf_free_fragment(frag);
     231             :         }
     232             :         PF_FRAG_UNLOCK();
     233           0 : }
     234             : 
     235             : /*
     236             :  * Try to flush old fragments to make space for new ones
     237             :  */
     238             : void
     239           0 : pf_flush_fragments(void)
     240             : {
     241             :         struct pf_fragment      *frag;
     242             :         int                      goal;
     243             : 
     244           0 :         goal = pf_nfrents * 9 / 10;
     245           0 :         DPFPRINTF(LOG_NOTICE, "trying to free > %d frents", pf_nfrents - goal);
     246           0 :         while (goal < pf_nfrents) {
     247           0 :                 if ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) == NULL)
     248             :                         break;
     249           0 :                 pf_free_fragment(frag);
     250             :         }
     251           0 : }
     252             : 
     253             : /*
     254             :  * Remove a fragment from the fragment queue, free its fragment entries,
     255             :  * and free the fragment itself.
     256             :  */
     257             : void
     258           0 : pf_free_fragment(struct pf_fragment *frag)
     259             : {
     260             :         struct pf_frent         *frent;
     261             :         struct pf_frnode        *frnode;
     262             : 
     263           0 :         frnode = frag->fr_node;
     264           0 :         RB_REMOVE(pf_frag_tree, &frnode->fn_tree, frag);
     265           0 :         KASSERT(frnode->fn_fragments >= 1);
     266           0 :         frnode->fn_fragments--;
     267           0 :         if (frnode->fn_fragments == 0) {
     268           0 :                 KASSERT(RB_EMPTY(&frnode->fn_tree));
     269           0 :                 RB_REMOVE(pf_frnode_tree, &pf_frnode_tree, frnode);
     270           0 :                 pool_put(&pf_frnode_pl, frnode);
     271           0 :         }
     272           0 :         TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
     273             : 
     274             :         /* Free all fragment entries */
     275           0 :         while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) {
     276           0 :                 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
     277           0 :                 m_freem(frent->fe_m);
     278           0 :                 pool_put(&pf_frent_pl, frent);
     279           0 :                 pf_nfrents--;
     280             :         }
     281           0 :         pool_put(&pf_frag_pl, frag);
     282           0 : }
     283             : 
     284             : struct pf_fragment *
     285           0 : pf_find_fragment(struct pf_frnode *key, u_int32_t id)
     286             : {
     287           0 :         struct pf_fragment      *frag, idkey;
     288             :         struct pf_frnode        *frnode;
     289             :         u_int32_t                stale;
     290             : 
     291           0 :         frnode = RB_FIND(pf_frnode_tree, &pf_frnode_tree, key);
     292           0 :         if (frnode == NULL)
     293           0 :                 return (NULL);
     294           0 :         KASSERT(frnode->fn_fragments >= 1);
     295           0 :         idkey.fr_id = id;
     296           0 :         frag = RB_FIND(pf_frag_tree, &frnode->fn_tree, &idkey);
     297           0 :         if (frag == NULL)
     298           0 :                 return (NULL);
     299             :         /*
     300             :          * Limit the number of fragments we accept for each (proto,src,dst,af)
     301             :          * combination (aka pf_frnode), so we can deal better with a high rate
     302             :          * of fragments.  Problem analysis is in RFC 4963.
     303             :          * Store the current generation for each pf_frnode in fn_gen and on
     304             :          * lookup discard 'stale' fragments (pf_fragment, based on the fr_gen
     305             :          * member).  Instead of adding another button interpret the pf fragment
     306             :          * timeout in multiples of 200 fragments.  This way the default of 60s
     307             :          * means: pf_fragment objects older than 60*200 = 12,000 generations
     308             :          * are considered stale.
     309             :          */
     310           0 :         stale = pf_default_rule.timeout[PFTM_FRAG] * PF_FRAG_STALE;
     311           0 :         if ((frnode->fn_gen - frag->fr_gen) >= stale) {
     312           0 :                 DPFPRINTF(LOG_NOTICE, "stale fragment %d(%p), gen %u, num %u",
     313             :                     frag->fr_id, frag, frag->fr_gen, frnode->fn_fragments);
     314           0 :                 pf_free_fragment(frag);
     315           0 :                 return (NULL);
     316             :         }
     317           0 :         TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
     318           0 :         TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
     319             : 
     320           0 :         return (frag);
     321           0 : }
     322             : 
     323             : struct pf_frent *
     324           0 : pf_create_fragment(u_short *reason)
     325             : {
     326             :         struct pf_frent *frent;
     327             : 
     328           0 :         frent = pool_get(&pf_frent_pl, PR_NOWAIT);
     329           0 :         if (frent == NULL) {
     330           0 :                 pf_flush_fragments();
     331           0 :                 frent = pool_get(&pf_frent_pl, PR_NOWAIT);
     332           0 :                 if (frent == NULL) {
     333           0 :                         REASON_SET(reason, PFRES_MEMORY);
     334           0 :                         return (NULL);
     335             :                 }
     336             :         }
     337           0 :         pf_nfrents++;
     338             : 
     339           0 :         return (frent);
     340           0 : }
     341             : 
     342             : /*
     343             :  * Calculate the additional holes that were created in the fragment
     344             :  * queue by inserting this fragment.  A fragment in the middle
     345             :  * creates one more hole by splitting.  For each connected side,
     346             :  * it loses one hole.
     347             :  * Fragment entry must be in the queue when calling this function.
     348             :  */
     349             : int
     350           0 : pf_frent_holes(struct pf_frent *frent)
     351             : {
     352           0 :         struct pf_frent *prev = TAILQ_PREV(frent, pf_fragq, fr_next);
     353           0 :         struct pf_frent *next = TAILQ_NEXT(frent, fr_next);
     354             :         int holes = 1;
     355             : 
     356           0 :         if (prev == NULL) {
     357           0 :                 if (frent->fe_off == 0)
     358           0 :                         holes--;
     359             :         } else {
     360           0 :                 KASSERT(frent->fe_off != 0);
     361           0 :                 if (frent->fe_off == prev->fe_off + prev->fe_len)
     362           0 :                         holes--;
     363             :         }
     364           0 :         if (next == NULL) {
     365           0 :                 if (!frent->fe_mff)
     366           0 :                         holes--;
     367             :         } else {
     368           0 :                 KASSERT(frent->fe_mff);
     369           0 :                 if (next->fe_off == frent->fe_off + frent->fe_len)
     370           0 :                         holes--;
     371             :         }
     372           0 :         return holes;
     373             : }
     374             : 
     375             : static inline int
     376           0 : pf_frent_index(struct pf_frent *frent)
     377             : {
     378             :         /*
     379             :          * We have an array of 16 entry points to the queue.  A full size
     380             :          * 65535 octet IP packet can have 8192 fragments.  So the queue
     381             :          * traversal length is at most 512 and at most 16 entry points are
     382             :          * checked.  We need 128 additional bytes on a 64 bit architecture.
     383             :          */
     384             :         CTASSERT(((u_int16_t)0xffff &~ 7) / (0x10000 / PF_FRAG_ENTRY_POINTS) ==
     385             :             16 - 1);
     386             :         CTASSERT(((u_int16_t)0xffff >> 3) / PF_FRAG_ENTRY_POINTS == 512 - 1);
     387             : 
     388           0 :         return frent->fe_off / (0x10000 / PF_FRAG_ENTRY_POINTS);
     389             : }
     390             : 
     391             : int
     392           0 : pf_frent_insert(struct pf_fragment *frag, struct pf_frent *frent,
     393             :     struct pf_frent *prev)
     394             : {
     395             :         CTASSERT(PF_FRAG_ENTRY_LIMIT <= 0xff);
     396             :         int index;
     397             : 
     398             :         /*
     399             :          * A packet has at most 65536 octets.  With 16 entry points, each one
     400             :          * spawns 4096 octets.  We limit these to 64 fragments each, which
     401             :          * means on average every fragment must have at least 64 octets.
     402             :          */
     403           0 :         index = pf_frent_index(frent);
     404           0 :         if (frag->fr_entries[index] >= PF_FRAG_ENTRY_LIMIT)
     405           0 :                 return ENOBUFS;
     406           0 :         frag->fr_entries[index]++;
     407             : 
     408           0 :         if (prev == NULL) {
     409           0 :                 TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
     410           0 :         } else {
     411           0 :                 KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off);
     412           0 :                 TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
     413             :         }
     414             : 
     415           0 :         if (frag->fr_firstoff[index] == NULL) {
     416           0 :                 KASSERT(prev == NULL || pf_frent_index(prev) < index);
     417           0 :                 frag->fr_firstoff[index] = frent;
     418           0 :         } else {
     419           0 :                 if (frent->fe_off < frag->fr_firstoff[index]->fe_off) {
     420           0 :                         KASSERT(prev == NULL || pf_frent_index(prev) < index);
     421           0 :                         frag->fr_firstoff[index] = frent;
     422           0 :                 } else {
     423           0 :                         KASSERT(prev != NULL);
     424           0 :                         KASSERT(pf_frent_index(prev) == index);
     425             :                 }
     426             :         }
     427             : 
     428           0 :         frag->fr_holes += pf_frent_holes(frent);
     429             : 
     430           0 :         return 0;
     431           0 : }
     432             : 
     433             : void
     434           0 : pf_frent_remove(struct pf_fragment *frag, struct pf_frent *frent)
     435             : {
     436           0 :         struct pf_frent *prev = TAILQ_PREV(frent, pf_fragq, fr_next);
     437           0 :         struct pf_frent *next = TAILQ_NEXT(frent, fr_next);
     438             :         int index;
     439             : 
     440           0 :         frag->fr_holes -= pf_frent_holes(frent);
     441             : 
     442           0 :         index = pf_frent_index(frent);
     443           0 :         KASSERT(frag->fr_firstoff[index] != NULL);
     444           0 :         if (frag->fr_firstoff[index]->fe_off == frent->fe_off) {
     445           0 :                 if (next == NULL) {
     446           0 :                         frag->fr_firstoff[index] = NULL;
     447           0 :                 } else {
     448           0 :                         KASSERT(frent->fe_off + frent->fe_len <= next->fe_off);
     449           0 :                         if (pf_frent_index(next) == index) {
     450           0 :                                 frag->fr_firstoff[index] = next;
     451           0 :                         } else {
     452           0 :                                 frag->fr_firstoff[index] = NULL;
     453             :                         }
     454             :                 }
     455             :         } else {
     456           0 :                 KASSERT(frag->fr_firstoff[index]->fe_off < frent->fe_off);
     457           0 :                 KASSERT(prev != NULL);
     458           0 :                 KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off);
     459           0 :                 KASSERT(pf_frent_index(prev) == index);
     460             :         }
     461             : 
     462           0 :         TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
     463             : 
     464           0 :         KASSERT(frag->fr_entries[index] > 0);
     465           0 :         frag->fr_entries[index]--;
     466           0 : }
     467             : 
     468             : struct pf_frent *
     469           0 : pf_frent_previous(struct pf_fragment *frag, struct pf_frent *frent)
     470             : {
     471             :         struct pf_frent *prev, *next;
     472             :         int index;
     473             : 
     474             :         /*
     475             :          * If there are no fragments after frag, take the final one.  Assume
     476             :          * that the global queue is not empty.
     477             :          */
     478           0 :         prev = TAILQ_LAST(&frag->fr_queue, pf_fragq);
     479           0 :         KASSERT(prev != NULL);
     480           0 :         if (prev->fe_off <= frent->fe_off)
     481           0 :                 return prev;
     482             :         /*
     483             :          * We want to find a fragment entry that is before frag, but still
     484             :          * close to it.  Find the first fragment entry that is in the same
     485             :          * entry point or in the first entry point after that.  As we have
     486             :          * already checked that there are entries behind frag, this will
     487             :          * succeed.
     488             :          */
     489           0 :         for (index = pf_frent_index(frent); index < PF_FRAG_ENTRY_POINTS;
     490           0 :             index++) {
     491           0 :                 prev = frag->fr_firstoff[index];
     492           0 :                 if (prev != NULL)
     493             :                         break;
     494             :         }
     495           0 :         KASSERT(prev != NULL);
     496             :         /*
     497             :          * In prev we may have a fragment from the same entry point that is
     498             :          * before frent, or one that is just one position behind frent.
     499             :          * In the latter case, we go back one step and have the predecessor.
     500             :          * There may be none if the new fragment will be the first one.
     501             :          */
     502           0 :         if (prev->fe_off > frent->fe_off) {
     503           0 :                 prev = TAILQ_PREV(prev, pf_fragq, fr_next);
     504           0 :                 if (prev == NULL)
     505           0 :                         return NULL;
     506           0 :                 KASSERT(prev->fe_off <= frent->fe_off);
     507           0 :                 return prev;
     508             :         }
     509             :         /*
     510             :          * In prev is the first fragment of the entry point.  The offset
     511             :          * of frag is behind it.  Find the closest previous fragment.
     512             :          */
     513           0 :         for (next = TAILQ_NEXT(prev, fr_next); next != NULL;
     514           0 :             next = TAILQ_NEXT(next, fr_next)) {
     515           0 :                 if (next->fe_off > frent->fe_off)
     516             :                         break;
     517             :                 prev = next;
     518             :         }
     519           0 :         return prev;
     520           0 : }
     521             : 
     522             : struct pf_fragment *
     523           0 : pf_fillup_fragment(struct pf_frnode *key, u_int32_t id,
     524             :     struct pf_frent *frent, u_short *reason)
     525             : {
     526             :         struct pf_frent         *after, *next, *prev;
     527             :         struct pf_fragment      *frag;
     528             :         struct pf_frnode        *frnode;
     529             :         u_int16_t                total;
     530             : 
     531             :         /* No empty fragments */
     532           0 :         if (frent->fe_len == 0) {
     533           0 :                 DPFPRINTF(LOG_NOTICE, "bad fragment: len 0");
     534             :                 goto bad_fragment;
     535             :         }
     536             : 
     537             :         /* All fragments are 8 byte aligned */
     538           0 :         if (frent->fe_mff && (frent->fe_len & 0x7)) {
     539           0 :                 DPFPRINTF(LOG_NOTICE, "bad fragment: mff and len %d",
     540             :                     frent->fe_len);
     541             :                 goto bad_fragment;
     542             :         }
     543             : 
     544             :         /* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET */
     545           0 :         if (frent->fe_off + frent->fe_len > IP_MAXPACKET) {
     546           0 :                 DPFPRINTF(LOG_NOTICE, "bad fragment: max packet %d",
     547             :                     frent->fe_off + frent->fe_len);
     548             :                 goto bad_fragment;
     549             :         }
     550             : 
     551           0 :         DPFPRINTF(LOG_INFO, key->fn_af == AF_INET ?
     552             :             "reass frag %d @ %d-%d" : "reass frag %#08x @ %d-%d",
     553             :             id, frent->fe_off, frent->fe_off + frent->fe_len);
     554             : 
     555             :         /* Fully buffer all of the fragments in this fragment queue */
     556           0 :         frag = pf_find_fragment(key, id);
     557             : 
     558             :         /* Create a new reassembly queue for this packet */
     559           0 :         if (frag == NULL) {
     560           0 :                 frag = pool_get(&pf_frag_pl, PR_NOWAIT);
     561           0 :                 if (frag == NULL) {
     562           0 :                         pf_flush_fragments();
     563           0 :                         frag = pool_get(&pf_frag_pl, PR_NOWAIT);
     564           0 :                         if (frag == NULL) {
     565           0 :                                 REASON_SET(reason, PFRES_MEMORY);
     566             :                                 goto drop_fragment;
     567             :                         }
     568             :                 }
     569           0 :                 frnode = RB_FIND(pf_frnode_tree, &pf_frnode_tree, key);
     570           0 :                 if (frnode == NULL) {
     571           0 :                         frnode = pool_get(&pf_frnode_pl, PR_NOWAIT);
     572           0 :                         if (frnode == NULL) {
     573           0 :                                 pf_flush_fragments();
     574           0 :                                 frnode = pool_get(&pf_frnode_pl, PR_NOWAIT);
     575           0 :                                 if (frnode == NULL) {
     576           0 :                                         REASON_SET(reason, PFRES_MEMORY);
     577           0 :                                         pool_put(&pf_frag_pl, frag);
     578           0 :                                         goto drop_fragment;
     579             :                                 }
     580             :                         }
     581           0 :                         *frnode = *key;
     582           0 :                         RB_INIT(&frnode->fn_tree);
     583           0 :                         frnode->fn_fragments = 0;
     584           0 :                         frnode->fn_gen = 0;
     585           0 :                 }
     586           0 :                 memset(frag->fr_firstoff, 0, sizeof(frag->fr_firstoff));
     587           0 :                 memset(frag->fr_entries, 0, sizeof(frag->fr_entries));
     588           0 :                 TAILQ_INIT(&frag->fr_queue);
     589           0 :                 frag->fr_id = id;
     590           0 :                 frag->fr_timeout = time_uptime;
     591           0 :                 frag->fr_gen = frnode->fn_gen++;
     592           0 :                 frag->fr_maxlen = frent->fe_len;
     593           0 :                 frag->fr_holes = 1;
     594           0 :                 frag->fr_node = frnode;
     595             :                 /* RB_INSERT cannot fail as pf_find_fragment() found nothing */
     596           0 :                 RB_INSERT(pf_frag_tree, &frnode->fn_tree, frag);
     597           0 :                 frnode->fn_fragments++;
     598           0 :                 if (frnode->fn_fragments == 1)
     599           0 :                         RB_INSERT(pf_frnode_tree, &pf_frnode_tree, frnode);
     600           0 :                 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
     601             : 
     602             :                 /* We do not have a previous fragment, cannot fail. */
     603           0 :                 pf_frent_insert(frag, frent, NULL);
     604             : 
     605           0 :                 return (frag);
     606             :         }
     607             : 
     608           0 :         KASSERT(!TAILQ_EMPTY(&frag->fr_queue));
     609           0 :         KASSERT(frag->fr_node);
     610             : 
     611             :         /* Remember maximum fragment len for refragmentation */
     612           0 :         if (frent->fe_len > frag->fr_maxlen)
     613           0 :                 frag->fr_maxlen = frent->fe_len;
     614             : 
     615             :         /* Maximum data we have seen already */
     616           0 :         total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
     617           0 :             TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
     618             : 
     619             :         /* Non terminal fragments must have more fragments flag */
     620           0 :         if (frent->fe_off + frent->fe_len < total && !frent->fe_mff)
     621             :                 goto free_ipv6_fragment;
     622             : 
     623             :         /* Check if we saw the last fragment already */
     624           0 :         if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) {
     625           0 :                 if (frent->fe_off + frent->fe_len > total ||
     626           0 :                     (frent->fe_off + frent->fe_len == total && frent->fe_mff))
     627             :                         goto free_ipv6_fragment;
     628             :         } else {
     629           0 :                 if (frent->fe_off + frent->fe_len == total && !frent->fe_mff)
     630             :                         goto free_ipv6_fragment;
     631             :         }
     632             : 
     633             :         /* Find neighbors for newly inserted fragment */
     634           0 :         prev = pf_frent_previous(frag, frent);
     635           0 :         if (prev == NULL) {
     636           0 :                 after = TAILQ_FIRST(&frag->fr_queue);
     637           0 :                 KASSERT(after != NULL);
     638             :         } else {
     639           0 :                 after = TAILQ_NEXT(prev, fr_next);
     640             :         }
     641             : 
     642           0 :         if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) {
     643             :                 u_int16_t       precut;
     644             : 
     645             : #ifdef INET6
     646           0 :                 if (frag->fr_node->fn_af == AF_INET6)
     647           0 :                         goto free_ipv6_fragment;
     648             : #endif /* INET6 */
     649             : 
     650           0 :                 precut = prev->fe_off + prev->fe_len - frent->fe_off;
     651           0 :                 if (precut >= frent->fe_len) {
     652           0 :                         DPFPRINTF(LOG_NOTICE, "new frag overlapped");
     653           0 :                         goto drop_fragment;
     654             :                 }
     655           0 :                 DPFPRINTF(LOG_NOTICE, "frag head overlap %d", precut);
     656           0 :                 m_adj(frent->fe_m, precut);
     657           0 :                 frent->fe_off += precut;
     658           0 :                 frent->fe_len -= precut;
     659           0 :         }
     660             : 
     661           0 :         for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off;
     662             :             after = next) {
     663             :                 u_int16_t       aftercut;
     664             : 
     665             : #ifdef INET6
     666           0 :                 if (frag->fr_node->fn_af == AF_INET6)
     667           0 :                         goto free_ipv6_fragment;
     668             : #endif /* INET6 */
     669             : 
     670           0 :                 aftercut = frent->fe_off + frent->fe_len - after->fe_off;
     671           0 :                 if (aftercut < after->fe_len) {
     672           0 :                         DPFPRINTF(LOG_NOTICE, "frag tail overlap %d", aftercut);
     673           0 :                         m_adj(after->fe_m, aftercut);
     674           0 :                         after->fe_off += aftercut;
     675           0 :                         after->fe_len -= aftercut;
     676           0 :                         break;
     677             :                 }
     678             : 
     679             :                 /* This fragment is completely overlapped, lose it */
     680           0 :                 DPFPRINTF(LOG_NOTICE, "old frag overlapped");
     681           0 :                 next = TAILQ_NEXT(after, fr_next);
     682           0 :                 pf_frent_remove(frag, after);
     683           0 :                 m_freem(after->fe_m);
     684           0 :                 pool_put(&pf_frent_pl, after);
     685           0 :                 pf_nfrents--;
     686           0 :         }
     687             : 
     688             :         /* If part of the queue gets too long, there is not way to recover. */
     689           0 :         if (pf_frent_insert(frag, frent, prev)) {
     690           0 :                 DPFPRINTF(LOG_WARNING, "fragment queue limit exceeded");
     691             :                 goto free_fragment;
     692             :         }
     693             : 
     694           0 :         return (frag);
     695             : 
     696             : free_ipv6_fragment:
     697           0 :         if (frag->fr_node->fn_af == AF_INET)
     698             :                 goto bad_fragment;
     699             :         /*
     700             :          * RFC 5722, Errata 3089:  When reassembling an IPv6 datagram, if one
     701             :          * or more its constituent fragments is determined to be an overlapping
     702             :          * fragment, the entire datagram (and any constituent fragments) MUST
     703             :          * be silently discarded.
     704             :          */
     705           0 :         DPFPRINTF(LOG_NOTICE, "flush overlapping fragments");
     706             : free_fragment:
     707           0 :         pf_free_fragment(frag);
     708             : bad_fragment:
     709           0 :         REASON_SET(reason, PFRES_FRAG);
     710             : drop_fragment:
     711           0 :         pool_put(&pf_frent_pl, frent);
     712           0 :         pf_nfrents--;
     713           0 :         return (NULL);
     714           0 : }
     715             : 
     716             : struct mbuf *
     717           0 : pf_join_fragment(struct pf_fragment *frag)
     718             : {
     719             :         struct mbuf             *m, *m2;
     720             :         struct pf_frent         *frent;
     721             : 
     722           0 :         frent = TAILQ_FIRST(&frag->fr_queue);
     723           0 :         TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
     724             : 
     725           0 :         m = frent->fe_m;
     726             :         /* Strip off any trailing bytes */
     727           0 :         if ((frent->fe_hdrlen + frent->fe_len) < m->m_pkthdr.len)
     728           0 :                 m_adj(m, (frent->fe_hdrlen + frent->fe_len) - m->m_pkthdr.len);
     729             :         /* Magic from ip_input */
     730           0 :         m2 = m->m_next;
     731           0 :         m->m_next = NULL;
     732           0 :         m_cat(m, m2);
     733           0 :         pool_put(&pf_frent_pl, frent);
     734           0 :         pf_nfrents--;
     735             : 
     736           0 :         while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) {
     737           0 :                 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
     738           0 :                 m2 = frent->fe_m;
     739             :                 /* Strip off ip header */
     740           0 :                 m_adj(m2, frent->fe_hdrlen);
     741             :                 /* Strip off any trailing bytes */
     742           0 :                 if (frent->fe_len < m2->m_pkthdr.len)
     743           0 :                         m_adj(m2, frent->fe_len - m2->m_pkthdr.len);
     744           0 :                 pool_put(&pf_frent_pl, frent);
     745           0 :                 pf_nfrents--;
     746           0 :                 m_removehdr(m2);
     747           0 :                 m_cat(m, m2);
     748             :         }
     749             : 
     750             :         /* Remove from fragment queue */
     751           0 :         pf_free_fragment(frag);
     752             : 
     753           0 :         return (m);
     754             : }
     755             : 
     756             : int
     757           0 : pf_reassemble(struct mbuf **m0, int dir, u_short *reason)
     758             : {
     759           0 :         struct mbuf             *m = *m0;
     760           0 :         struct ip               *ip = mtod(m, struct ip *);
     761             :         struct pf_frent         *frent;
     762             :         struct pf_fragment      *frag;
     763           0 :         struct pf_frnode         key;
     764             :         u_int16_t                total, hdrlen;
     765             : 
     766             :         /* Get an entry for the fragment queue */
     767           0 :         if ((frent = pf_create_fragment(reason)) == NULL)
     768           0 :                 return (PF_DROP);
     769             : 
     770           0 :         frent->fe_m = m;
     771           0 :         frent->fe_hdrlen = ip->ip_hl << 2;
     772           0 :         frent->fe_extoff = 0;
     773           0 :         frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2);
     774           0 :         frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
     775           0 :         frent->fe_mff = ntohs(ip->ip_off) & IP_MF;
     776             : 
     777           0 :         key.fn_src.v4 = ip->ip_src;
     778           0 :         key.fn_dst.v4 = ip->ip_dst;
     779           0 :         key.fn_af = AF_INET;
     780           0 :         key.fn_proto = ip->ip_p;
     781           0 :         key.fn_direction = dir;
     782             : 
     783             :         PF_FRAG_LOCK();
     784           0 :         if ((frag = pf_fillup_fragment(&key, ip->ip_id, frent, reason))
     785           0 :             == NULL) {
     786             :                 PF_FRAG_UNLOCK();
     787           0 :                 return (PF_DROP);
     788             :         }
     789             : 
     790             :         /* The mbuf is part of the fragment entry, no direct free or access */
     791           0 :         m = *m0 = NULL;
     792             : 
     793           0 :         if (frag->fr_holes) {
     794           0 :                 DPFPRINTF(LOG_DEBUG, "frag %d, holes %d",
     795             :                     frag->fr_id, frag->fr_holes);
     796             :                 PF_FRAG_UNLOCK();
     797           0 :                 return (PF_PASS);  /* drop because *m0 is NULL, no error */
     798             :         }
     799             : 
     800             :         /* We have all the data */
     801           0 :         frent = TAILQ_FIRST(&frag->fr_queue);
     802           0 :         KASSERT(frent != NULL);
     803           0 :         total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
     804           0 :             TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
     805           0 :         hdrlen = frent->fe_hdrlen;
     806           0 :         m = *m0 = pf_join_fragment(frag);
     807             :         frag = NULL;
     808           0 :         m_calchdrlen(m);
     809             : 
     810           0 :         ip = mtod(m, struct ip *);
     811           0 :         ip->ip_len = htons(hdrlen + total);
     812           0 :         ip->ip_off &= ~(IP_MF|IP_OFFMASK);
     813             : 
     814           0 :         if (hdrlen + total > IP_MAXPACKET) {
     815             :                 PF_FRAG_UNLOCK();
     816           0 :                 DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total);
     817           0 :                 ip->ip_len = 0;
     818           0 :                 REASON_SET(reason, PFRES_SHORT);
     819             :                 /* PF_DROP requires a valid mbuf *m0 in pf_test() */
     820           0 :                 return (PF_DROP);
     821             :         }
     822             : 
     823             :         PF_FRAG_UNLOCK();
     824           0 :         DPFPRINTF(LOG_INFO, "complete: %p(%d)", m, ntohs(ip->ip_len));
     825           0 :         return (PF_PASS);
     826           0 : }
     827             : 
     828             : #ifdef INET6
     829             : int
     830           0 : pf_reassemble6(struct mbuf **m0, struct ip6_frag *fraghdr,
     831             :     u_int16_t hdrlen, u_int16_t extoff, int dir, u_short *reason)
     832             : {
     833           0 :         struct mbuf             *m = *m0;
     834           0 :         struct ip6_hdr          *ip6 = mtod(m, struct ip6_hdr *);
     835             :         struct m_tag            *mtag;
     836             :         struct pf_fragment_tag  *ftag;
     837             :         struct pf_frent         *frent;
     838             :         struct pf_fragment      *frag;
     839           0 :         struct pf_frnode         key;
     840           0 :         int                      off;
     841             :         u_int16_t                total, maxlen;
     842             :         u_int8_t                 proto;
     843             : 
     844             :         /* Get an entry for the fragment queue */
     845           0 :         if ((frent = pf_create_fragment(reason)) == NULL)
     846           0 :                 return (PF_DROP);
     847             : 
     848           0 :         frent->fe_m = m;
     849           0 :         frent->fe_hdrlen = hdrlen;
     850           0 :         frent->fe_extoff = extoff;
     851           0 :         frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen;
     852           0 :         frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK);
     853           0 :         frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG;
     854             : 
     855           0 :         key.fn_src.v6 = ip6->ip6_src;
     856           0 :         key.fn_dst.v6 = ip6->ip6_dst;
     857           0 :         key.fn_af = AF_INET6;
     858             :         /* Only the first fragment's protocol is relevant */
     859           0 :         key.fn_proto = 0;
     860           0 :         key.fn_direction = dir;
     861             : 
     862             :         PF_FRAG_LOCK();
     863           0 :         if ((frag = pf_fillup_fragment(&key, fraghdr->ip6f_ident, frent,
     864           0 :             reason)) == NULL) {
     865             :                 PF_FRAG_UNLOCK();
     866           0 :                 return (PF_DROP);
     867             :         }
     868             : 
     869             :         /* The mbuf is part of the fragment entry, no direct free or access */
     870           0 :         m = *m0 = NULL;
     871             : 
     872           0 :         if (frag->fr_holes) {
     873           0 :                 DPFPRINTF(LOG_DEBUG, "frag %#08x, holes %d",
     874             :                     frag->fr_id, frag->fr_holes);
     875             :                 PF_FRAG_UNLOCK();
     876           0 :                 return (PF_PASS);  /* drop because *m0 is NULL, no error */
     877             :         }
     878             : 
     879             :         /* We have all the data */
     880           0 :         extoff = frent->fe_extoff;
     881           0 :         maxlen = frag->fr_maxlen;
     882           0 :         frent = TAILQ_FIRST(&frag->fr_queue);
     883           0 :         KASSERT(frent != NULL);
     884           0 :         total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
     885           0 :             TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
     886           0 :         hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag);
     887           0 :         m = *m0 = pf_join_fragment(frag);
     888             :         frag = NULL;
     889             : 
     890             :         /* Take protocol from first fragment header */
     891           0 :         if ((m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt),
     892           0 :             &off)) == NULL)
     893           0 :                 panic("%s: short frag mbuf chain", __func__);
     894           0 :         proto = *(mtod(m, caddr_t) + off);
     895           0 :         m = *m0;
     896             : 
     897             :         /* Delete frag6 header */
     898           0 :         if (frag6_deletefraghdr(m, hdrlen) != 0)
     899             :                 goto fail;
     900             : 
     901           0 :         m_calchdrlen(m);
     902             : 
     903           0 :         if ((mtag = m_tag_get(PACKET_TAG_PF_REASSEMBLED, sizeof(struct
     904           0 :             pf_fragment_tag), M_NOWAIT)) == NULL)
     905             :                 goto fail;
     906           0 :         ftag = (struct pf_fragment_tag *)(mtag + 1);
     907           0 :         ftag->ft_hdrlen = hdrlen;
     908           0 :         ftag->ft_extoff = extoff;
     909           0 :         ftag->ft_maxlen = maxlen;
     910           0 :         m_tag_prepend(m, mtag);
     911             : 
     912           0 :         ip6 = mtod(m, struct ip6_hdr *);
     913           0 :         ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total);
     914           0 :         if (extoff) {
     915             :                 /* Write protocol into next field of last extension header */
     916           0 :                 if ((m = m_getptr(m, extoff + offsetof(struct ip6_ext,
     917           0 :                     ip6e_nxt), &off)) == NULL)
     918           0 :                         panic("%s: short ext mbuf chain", __func__);
     919           0 :                 *(mtod(m, caddr_t) + off) = proto;
     920           0 :                 m = *m0;
     921           0 :         } else
     922           0 :                 ip6->ip6_nxt = proto;
     923             : 
     924           0 :         if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) {
     925             :                 PF_FRAG_UNLOCK();
     926           0 :                 DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total);
     927           0 :                 ip6->ip6_plen = 0;
     928           0 :                 REASON_SET(reason, PFRES_SHORT);
     929             :                 /* PF_DROP requires a valid mbuf *m0 in pf_test6() */
     930           0 :                 return (PF_DROP);
     931             :         }
     932             :         PF_FRAG_UNLOCK();
     933             : 
     934           0 :         DPFPRINTF(LOG_INFO, "complete: %p(%d)", m, ntohs(ip6->ip6_plen));
     935           0 :         return (PF_PASS);
     936             : 
     937             : fail:
     938             :         PF_FRAG_UNLOCK();
     939           0 :         REASON_SET(reason, PFRES_MEMORY);
     940             :         /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later */
     941           0 :         return (PF_DROP);
     942           0 : }
     943             : 
     944             : int
     945           0 : pf_refragment6(struct mbuf **m0, struct m_tag *mtag, struct sockaddr_in6 *dst,
     946             :     struct ifnet *ifp, struct rtentry *rt)
     947             : {
     948           0 :         struct mbuf             *m = *m0, *t;
     949           0 :         struct pf_fragment_tag  *ftag = (struct pf_fragment_tag *)(mtag + 1);
     950             :         u_int32_t                mtu;
     951             :         u_int16_t                hdrlen, extoff, maxlen;
     952             :         u_int8_t                 proto;
     953             :         int                      error, action;
     954             : 
     955           0 :         hdrlen = ftag->ft_hdrlen;
     956           0 :         extoff = ftag->ft_extoff;
     957           0 :         maxlen = ftag->ft_maxlen;
     958           0 :         m_tag_delete(m, mtag);
     959             :         mtag = NULL;
     960             :         ftag = NULL;
     961             : 
     962             :         /* Checksum must be calculated for the whole packet */
     963           0 :         in6_proto_cksum_out(m, NULL);
     964             : 
     965           0 :         if (extoff) {
     966           0 :                 int off;
     967             : 
     968             :                 /* Use protocol from next field of last extension header */
     969           0 :                 if ((m = m_getptr(m, extoff + offsetof(struct ip6_ext,
     970           0 :                     ip6e_nxt), &off)) == NULL)
     971           0 :                         panic("%s: short ext mbuf chain", __func__);
     972           0 :                 proto = *(mtod(m, caddr_t) + off);
     973           0 :                 *(mtod(m, caddr_t) + off) = IPPROTO_FRAGMENT;
     974           0 :                 m = *m0;
     975           0 :         } else {
     976             :                 struct ip6_hdr *hdr;
     977             : 
     978           0 :                 hdr = mtod(m, struct ip6_hdr *);
     979           0 :                 proto = hdr->ip6_nxt;
     980           0 :                 hdr->ip6_nxt = IPPROTO_FRAGMENT;
     981             :         }
     982             : 
     983             :         /*
     984             :          * Maxlen may be less than 8 iff there was only a single
     985             :          * fragment.  As it was fragmented before, add a fragment
     986             :          * header also for a single fragment.  If total or maxlen
     987             :          * is less than 8, ip6_fragment() will return EMSGSIZE and
     988             :          * we drop the packet.
     989             :          */
     990           0 :         mtu = hdrlen + sizeof(struct ip6_frag) + maxlen;
     991           0 :         error = ip6_fragment(m, hdrlen, proto, mtu);
     992             : 
     993           0 :         m = (*m0)->m_nextpkt;
     994           0 :         (*m0)->m_nextpkt = NULL;
     995           0 :         if (error == 0) {
     996             :                 /* The first mbuf contains the unfragmented packet */
     997           0 :                 m_freemp(m0);
     998             :                 action = PF_PASS;
     999           0 :         } else {
    1000             :                 /* Drop expects an mbuf to free */
    1001           0 :                 DPFPRINTF(LOG_NOTICE, "refragment error %d", error);
    1002             :                 action = PF_DROP;
    1003             :         }
    1004             : 
    1005           0 :         for (t = m; m; m = t) {
    1006           0 :                 t = m->m_nextpkt;
    1007           0 :                 m->m_nextpkt = NULL;
    1008           0 :                 m->m_pkthdr.pf.flags |= PF_TAG_REFRAGMENTED;
    1009           0 :                 if (error == 0) {
    1010           0 :                         if (ifp == NULL) {
    1011           0 :                                 ip6_forward(m, NULL, 0);
    1012           0 :                         } else if ((u_long)m->m_pkthdr.len <= ifp->if_mtu) {
    1013           0 :                                 ifp->if_output(ifp, m, sin6tosa(dst), rt);
    1014           0 :                         } else {
    1015           0 :                                 icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0,
    1016             :                                     ifp->if_mtu);
    1017             :                         }
    1018             :                 } else {
    1019           0 :                         m_freem(m);
    1020             :                 }
    1021             :         }
    1022             : 
    1023           0 :         return (action);
    1024             : }
    1025             : #endif /* INET6 */
    1026             : 
    1027             : int
    1028           0 : pf_normalize_ip(struct pf_pdesc *pd, u_short *reason)
    1029             : {
    1030           0 :         struct ip       *h = mtod(pd->m, struct ip *);
    1031           0 :         u_int16_t        fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
    1032           0 :         u_int16_t        mff = (ntohs(h->ip_off) & IP_MF);
    1033             : 
    1034           0 :         if (!fragoff && !mff)
    1035             :                 goto no_fragment;
    1036             : 
    1037             :         /* Clear IP_DF if we're in no-df mode */
    1038           0 :         if (pf_status.reass & PF_REASS_NODF && h->ip_off & htons(IP_DF))
    1039           0 :                 h->ip_off &= htons(~IP_DF);
    1040             : 
    1041             :         /* We're dealing with a fragment now. Don't allow fragments
    1042             :          * with IP_DF to enter the cache. If the flag was cleared by
    1043             :          * no-df above, fine. Otherwise drop it.
    1044             :          */
    1045           0 :         if (h->ip_off & htons(IP_DF)) {
    1046           0 :                 DPFPRINTF(LOG_NOTICE, "bad fragment: IP_DF");
    1047           0 :                 REASON_SET(reason, PFRES_FRAG);
    1048           0 :                 return (PF_DROP);
    1049             :         }
    1050             : 
    1051           0 :         if (!pf_status.reass)
    1052           0 :                 return (PF_PASS);       /* no reassembly */
    1053             : 
    1054             :         /* Returns PF_DROP or m is NULL or completely reassembled mbuf */
    1055           0 :         if (pf_reassemble(&pd->m, pd->dir, reason) != PF_PASS)
    1056           0 :                 return (PF_DROP);
    1057           0 :         if (pd->m == NULL)
    1058           0 :                 return (PF_PASS);  /* packet has been reassembled, no error */
    1059             : 
    1060           0 :         h = mtod(pd->m, struct ip *);
    1061             : 
    1062             : no_fragment:
    1063             :         /* At this point, only IP_DF is allowed in ip_off */
    1064           0 :         if (h->ip_off & ~htons(IP_DF))
    1065           0 :                 h->ip_off &= htons(IP_DF);
    1066             : 
    1067           0 :         return (PF_PASS);
    1068           0 : }
    1069             : 
    1070             : #ifdef INET6
    1071             : int
    1072           0 : pf_normalize_ip6(struct pf_pdesc *pd, u_short *reason)
    1073             : {
    1074           0 :         struct ip6_frag          frag;
    1075             : 
    1076           0 :         if (pd->fragoff == 0)
    1077             :                 goto no_fragment;
    1078             : 
    1079           0 :         if (!pf_pull_hdr(pd->m, pd->fragoff, &frag, sizeof(frag), NULL, reason,
    1080             :             AF_INET6))
    1081           0 :                 return (PF_DROP);
    1082             : 
    1083           0 :         if (!pf_status.reass)
    1084           0 :                 return (PF_PASS);       /* no reassembly */
    1085             : 
    1086             :         /* Returns PF_DROP or m is NULL or completely reassembled mbuf */
    1087           0 :         if (pf_reassemble6(&pd->m, &frag, pd->fragoff + sizeof(frag),
    1088           0 :             pd->extoff, pd->dir, reason) != PF_PASS)
    1089           0 :                 return (PF_DROP);
    1090           0 :         if (pd->m == NULL)
    1091           0 :                 return (PF_PASS);  /* packet has been reassembled, no error */
    1092             : 
    1093             : no_fragment:
    1094           0 :         return (PF_PASS);
    1095           0 : }
    1096             : #endif /* INET6 */
    1097             : 
    1098             : int
    1099           0 : pf_normalize_tcp(struct pf_pdesc *pd)
    1100             : {
    1101           0 :         struct tcphdr   *th = &pd->hdr.tcp;
    1102             :         u_short          reason;
    1103             :         u_int8_t         flags;
    1104             :         u_int            rewrite = 0;
    1105             : 
    1106           0 :         flags = th->th_flags;
    1107           0 :         if (flags & TH_SYN) {
    1108             :                 /* Illegal packet */
    1109           0 :                 if (flags & TH_RST)
    1110             :                         goto tcp_drop;
    1111             : 
    1112           0 :                 if (flags & TH_FIN) /* XXX why clear instead of drop? */
    1113           0 :                         flags &= ~TH_FIN;
    1114             :         } else {
    1115             :                 /* Illegal packet */
    1116           0 :                 if (!(flags & (TH_ACK|TH_RST)))
    1117             :                         goto tcp_drop;
    1118             :         }
    1119             : 
    1120           0 :         if (!(flags & TH_ACK)) {
    1121             :                 /* These flags are only valid if ACK is set */
    1122           0 :                 if (flags & (TH_FIN|TH_PUSH|TH_URG))
    1123             :                         goto tcp_drop;
    1124             :         }
    1125             : 
    1126             :         /* If flags changed, or reserved data set, then adjust */
    1127           0 :         if (flags != th->th_flags || th->th_x2 != 0) {
    1128             :                 /* hack: set 4-bit th_x2 = 0 */
    1129           0 :                 u_int8_t *th_off = (u_int8_t*)(&th->th_ack+1);
    1130           0 :                 pf_patch_8(pd, th_off, th->th_off << 4, PF_HI);
    1131             : 
    1132           0 :                 pf_patch_8(pd, &th->th_flags, flags, PF_LO);
    1133             :                 rewrite = 1;
    1134           0 :         }
    1135             : 
    1136             :         /* Remove urgent pointer, if TH_URG is not set */
    1137           0 :         if (!(flags & TH_URG) && th->th_urp) {
    1138           0 :                 pf_patch_16(pd, &th->th_urp, 0);
    1139             :                 rewrite = 1;
    1140           0 :         }
    1141             : 
    1142             :         /* copy back packet headers if we sanitized */
    1143           0 :         if (rewrite) {
    1144           0 :                 m_copyback(pd->m, pd->off, sizeof(*th), th, M_NOWAIT);
    1145           0 :         }
    1146             : 
    1147           0 :         return (PF_PASS);
    1148             : 
    1149             : tcp_drop:
    1150           0 :         REASON_SET(&reason, PFRES_NORM);
    1151           0 :         return (PF_DROP);
    1152           0 : }
    1153             : 
    1154             : int
    1155           0 : pf_normalize_tcp_init(struct pf_pdesc *pd, struct pf_state_peer *src)
    1156             : {
    1157           0 :         struct tcphdr   *th = &pd->hdr.tcp;
    1158             :         u_int32_t        tsval, tsecr;
    1159             :         int              olen;
    1160           0 :         u_int8_t         opts[MAX_TCPOPTLEN], *opt;
    1161             : 
    1162             : 
    1163           0 :         KASSERT(src->scrub == NULL);
    1164             : 
    1165           0 :         src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
    1166           0 :         if (src->scrub == NULL)
    1167           0 :                 return (1);
    1168           0 :         memset(src->scrub, 0, sizeof(*src->scrub));
    1169             : 
    1170           0 :         switch (pd->af) {
    1171             :         case AF_INET: {
    1172           0 :                 struct ip *h = mtod(pd->m, struct ip *);
    1173           0 :                 src->scrub->pfss_ttl = h->ip_ttl;
    1174             :                 break;
    1175             :         }
    1176             : #ifdef INET6
    1177             :         case AF_INET6: {
    1178           0 :                 struct ip6_hdr *h = mtod(pd->m, struct ip6_hdr *);
    1179           0 :                 src->scrub->pfss_ttl = h->ip6_hlim;
    1180             :                 break;
    1181             :         }
    1182             : #endif /* INET6 */
    1183             :         default:
    1184           0 :                 unhandled_af(pd->af);
    1185             :         }
    1186             : 
    1187             :         /*
    1188             :          * All normalizations below are only begun if we see the start of
    1189             :          * the connections.  They must all set an enabled bit in pfss_flags
    1190             :          */
    1191           0 :         if ((th->th_flags & TH_SYN) == 0)
    1192           0 :                 return (0);
    1193             : 
    1194           0 :         olen = (th->th_off << 2) - sizeof(*th);
    1195           0 :         if (olen < TCPOLEN_TIMESTAMP || !pf_pull_hdr(pd->m,
    1196           0 :             pd->off + sizeof(*th), opts, olen, NULL, NULL, pd->af))
    1197           0 :                 return (0);
    1198             : 
    1199             :         opt = opts;
    1200           0 :         while ((opt = pf_find_tcpopt(opt, opts, olen,
    1201           0 :                     TCPOPT_TIMESTAMP, TCPOLEN_TIMESTAMP)) != NULL) {
    1202             : 
    1203           0 :                 src->scrub->pfss_flags |= PFSS_TIMESTAMP;
    1204           0 :                 src->scrub->pfss_ts_mod = arc4random();
    1205             :                 /* note PFSS_PAWS not set yet */
    1206           0 :                 memcpy(&tsval, &opt[2], sizeof(u_int32_t));
    1207           0 :                 memcpy(&tsecr, &opt[6], sizeof(u_int32_t));
    1208           0 :                 src->scrub->pfss_tsval0 = ntohl(tsval);
    1209           0 :                 src->scrub->pfss_tsval = ntohl(tsval);
    1210           0 :                 src->scrub->pfss_tsecr = ntohl(tsecr);
    1211           0 :                 getmicrouptime(&src->scrub->pfss_last);
    1212             : 
    1213           0 :                 opt += opt[1];
    1214             :         }
    1215             : 
    1216           0 :         return (0);
    1217           0 : }
    1218             : 
    1219             : void
    1220           0 : pf_normalize_tcp_cleanup(struct pf_state *state)
    1221             : {
    1222           0 :         if (state->src.scrub)
    1223           0 :                 pool_put(&pf_state_scrub_pl, state->src.scrub);
    1224           0 :         if (state->dst.scrub)
    1225           0 :                 pool_put(&pf_state_scrub_pl, state->dst.scrub);
    1226             : 
    1227             :         /* Someday... flush the TCP segment reassembly descriptors. */
    1228           0 : }
    1229             : 
    1230             : int
    1231           0 : pf_normalize_tcp_stateful(struct pf_pdesc *pd, u_short *reason,
    1232             :     struct pf_state *state, struct pf_state_peer *src,
    1233             :     struct pf_state_peer *dst, int *writeback)
    1234             : {
    1235           0 :         struct tcphdr   *th = &pd->hdr.tcp;
    1236           0 :         struct timeval   uptime;
    1237             :         u_int            tsval_from_last;
    1238             :         u_int32_t        tsval, tsecr;
    1239             :         int              copyback = 0;
    1240             :         int              got_ts = 0;
    1241             :         int              olen;
    1242           0 :         u_int8_t         opts[MAX_TCPOPTLEN], *opt;
    1243             : 
    1244           0 :         KASSERT(src->scrub || dst->scrub);
    1245             : 
    1246             :         /*
    1247             :          * Enforce the minimum TTL seen for this connection.  Negate a common
    1248             :          * technique to evade an intrusion detection system and confuse
    1249             :          * firewall state code.
    1250             :          */
    1251           0 :         switch (pd->af) {
    1252             :         case AF_INET:
    1253           0 :                 if (src->scrub) {
    1254           0 :                         struct ip *h = mtod(pd->m, struct ip *);
    1255           0 :                         if (h->ip_ttl > src->scrub->pfss_ttl)
    1256           0 :                                 src->scrub->pfss_ttl = h->ip_ttl;
    1257           0 :                         h->ip_ttl = src->scrub->pfss_ttl;
    1258           0 :                 }
    1259             :                 break;
    1260             : #ifdef INET6
    1261             :         case AF_INET6:
    1262           0 :                 if (src->scrub) {
    1263           0 :                         struct ip6_hdr *h = mtod(pd->m, struct ip6_hdr *);
    1264           0 :                         if (h->ip6_hlim > src->scrub->pfss_ttl)
    1265           0 :                                 src->scrub->pfss_ttl = h->ip6_hlim;
    1266           0 :                         h->ip6_hlim = src->scrub->pfss_ttl;
    1267           0 :                 }
    1268             :                 break;
    1269             : #endif /* INET6 */
    1270             :         default:
    1271           0 :                 unhandled_af(pd->af);
    1272             :         }
    1273             : 
    1274           0 :         olen = (th->th_off << 2) - sizeof(*th);
    1275             : 
    1276           0 :         if (olen >= TCPOLEN_TIMESTAMP &&
    1277           0 :             ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
    1278           0 :             (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
    1279           0 :             pf_pull_hdr(pd->m, pd->off + sizeof(*th), opts, olen, NULL, NULL,
    1280           0 :             pd->af)) {
    1281             : 
    1282             :                 /* Modulate the timestamps.  Can be used for NAT detection, OS
    1283             :                  * uptime determination or reboot detection.
    1284             :                  */
    1285             :                 opt = opts;
    1286           0 :                 while ((opt = pf_find_tcpopt(opt, opts, olen,
    1287           0 :                             TCPOPT_TIMESTAMP, TCPOLEN_TIMESTAMP)) != NULL) {
    1288             : 
    1289           0 :                         u_int8_t *ts = opt + 2;
    1290           0 :                         u_int8_t *tsr = opt + 6;
    1291             : 
    1292           0 :                         if (got_ts) {
    1293             :                                 /* Huh?  Multiple timestamps!? */
    1294           0 :                                 if (pf_status.debug >= LOG_NOTICE) {
    1295           0 :                                         log(LOG_NOTICE,
    1296             :                                             "pf: %s: multiple TS??", __func__);
    1297           0 :                                         pf_print_state(state);
    1298           0 :                                         addlog("\n");
    1299           0 :                                 }
    1300           0 :                                 REASON_SET(reason, PFRES_TS);
    1301           0 :                                 return (PF_DROP);
    1302             :                         }
    1303             : 
    1304           0 :                         memcpy(&tsval, ts, sizeof(u_int32_t));
    1305           0 :                         memcpy(&tsecr, tsr, sizeof(u_int32_t));
    1306             : 
    1307             :                         /* modulate TS */
    1308           0 :                         if (tsval && src->scrub &&
    1309           0 :                             (src->scrub->pfss_flags & PFSS_TIMESTAMP)) {
    1310             :                                 /* tsval used further on */
    1311           0 :                                 tsval = ntohl(tsval);
    1312           0 :                                 pf_patch_32_unaligned(pd,
    1313           0 :                                     ts, htonl(tsval + src->scrub->pfss_ts_mod),
    1314           0 :                                     PF_ALGNMNT(ts - opts));
    1315             :                                 copyback = 1;
    1316           0 :                         }
    1317             : 
    1318             :                         /* modulate TS reply if any (!0) */
    1319           0 :                         if (tsecr && dst->scrub &&
    1320           0 :                             (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
    1321             :                                 /* tsecr used further on */
    1322           0 :                                 tsecr = ntohl(tsecr) - dst->scrub->pfss_ts_mod;
    1323           0 :                                 pf_patch_32_unaligned(pd,
    1324           0 :                                     tsr, htonl(tsecr), PF_ALGNMNT(tsr - opts));
    1325             :                                 copyback = 1;
    1326           0 :                         }
    1327             : 
    1328             :                         got_ts = 1;
    1329           0 :                         opt += opt[1];
    1330           0 :                 }
    1331             : 
    1332           0 :                 if (copyback) {
    1333             :                         /* Copyback the options, caller copys back header */
    1334           0 :                         *writeback = 1;
    1335           0 :                         m_copyback(pd->m, pd->off + sizeof(*th), olen, opts, M_NOWAIT);
    1336           0 :                 }
    1337             :         }
    1338             : 
    1339             : 
    1340             :         /*
    1341             :          * Must invalidate PAWS checks on connections idle for too long.
    1342             :          * The fastest allowed timestamp clock is 1ms.  That turns out to
    1343             :          * be about 24 days before it wraps.  XXX Right now our lowerbound
    1344             :          * TS echo check only works for the first 12 days of a connection
    1345             :          * when the TS has exhausted half its 32bit space
    1346             :          */
    1347             : #define TS_MAX_IDLE     (24*24*60*60)
    1348             : #define TS_MAX_CONN     (12*24*60*60)   /* XXX remove when better tsecr check */
    1349             : 
    1350           0 :         getmicrouptime(&uptime);
    1351           0 :         if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
    1352           0 :             (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
    1353           0 :             time_uptime - state->creation > TS_MAX_CONN))  {
    1354           0 :                 if (pf_status.debug >= LOG_NOTICE) {
    1355           0 :                         log(LOG_NOTICE, "pf: src idled out of PAWS ");
    1356           0 :                         pf_print_state(state);
    1357           0 :                         addlog("\n");
    1358           0 :                 }
    1359           0 :                 src->scrub->pfss_flags =
    1360           0 :                     (src->scrub->pfss_flags & ~PFSS_PAWS) | PFSS_PAWS_IDLED;
    1361           0 :         }
    1362           0 :         if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
    1363           0 :             uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
    1364           0 :                 if (pf_status.debug >= LOG_NOTICE) {
    1365           0 :                         log(LOG_NOTICE, "pf: dst idled out of PAWS ");
    1366           0 :                         pf_print_state(state);
    1367           0 :                         addlog("\n");
    1368           0 :                 }
    1369           0 :                 dst->scrub->pfss_flags =
    1370           0 :                     (dst->scrub->pfss_flags & ~PFSS_PAWS) | PFSS_PAWS_IDLED;
    1371           0 :         }
    1372             : 
    1373           0 :         if (got_ts && src->scrub && dst->scrub &&
    1374           0 :             (src->scrub->pfss_flags & PFSS_PAWS) &&
    1375           0 :             (dst->scrub->pfss_flags & PFSS_PAWS)) {
    1376             :                 /* Validate that the timestamps are "in-window".
    1377             :                  * RFC1323 describes TCP Timestamp options that allow
    1378             :                  * measurement of RTT (round trip time) and PAWS
    1379             :                  * (protection against wrapped sequence numbers).  PAWS
    1380             :                  * gives us a set of rules for rejecting packets on
    1381             :                  * long fat pipes (packets that were somehow delayed
    1382             :                  * in transit longer than the time it took to send the
    1383             :                  * full TCP sequence space of 4Gb).  We can use these
    1384             :                  * rules and infer a few others that will let us treat
    1385             :                  * the 32bit timestamp and the 32bit echoed timestamp
    1386             :                  * as sequence numbers to prevent a blind attacker from
    1387             :                  * inserting packets into a connection.
    1388             :                  *
    1389             :                  * RFC1323 tells us:
    1390             :                  *  - The timestamp on this packet must be greater than
    1391             :                  *    or equal to the last value echoed by the other
    1392             :                  *    endpoint.  The RFC says those will be discarded
    1393             :                  *    since it is a dup that has already been acked.
    1394             :                  *    This gives us a lowerbound on the timestamp.
    1395             :                  *        timestamp >= other last echoed timestamp
    1396             :                  *  - The timestamp will be less than or equal to
    1397             :                  *    the last timestamp plus the time between the
    1398             :                  *    last packet and now.  The RFC defines the max
    1399             :                  *    clock rate as 1ms.  We will allow clocks to be
    1400             :                  *    up to 10% fast and will allow a total difference
    1401             :                  *    or 30 seconds due to a route change.  And this
    1402             :                  *    gives us an upperbound on the timestamp.
    1403             :                  *        timestamp <= last timestamp + max ticks
    1404             :                  *    We have to be careful here.  Windows will send an
    1405             :                  *    initial timestamp of zero and then initialize it
    1406             :                  *    to a random value after the 3whs; presumably to
    1407             :                  *    avoid a DoS by having to call an expensive RNG
    1408             :                  *    during a SYN flood.  Proof MS has at least one
    1409             :                  *    good security geek.
    1410             :                  *
    1411             :                  *  - The TCP timestamp option must also echo the other
    1412             :                  *    endpoints timestamp.  The timestamp echoed is the
    1413             :                  *    one carried on the earliest unacknowledged segment
    1414             :                  *    on the left edge of the sequence window.  The RFC
    1415             :                  *    states that the host will reject any echoed
    1416             :                  *    timestamps that were larger than any ever sent.
    1417             :                  *    This gives us an upperbound on the TS echo.
    1418             :                  *        tescr <= largest_tsval
    1419             :                  *  - The lowerbound on the TS echo is a little more
    1420             :                  *    tricky to determine.  The other endpoint's echoed
    1421             :                  *    values will not decrease.  But there may be
    1422             :                  *    network conditions that re-order packets and
    1423             :                  *    cause our view of them to decrease.  For now the
    1424             :                  *    only lowerbound we can safely determine is that
    1425             :                  *    the TS echo will never be less than the original
    1426             :                  *    TS.  XXX There is probably a better lowerbound.
    1427             :                  *    Remove TS_MAX_CONN with better lowerbound check.
    1428             :                  *        tescr >= other original TS
    1429             :                  *
    1430             :                  * It is also important to note that the fastest
    1431             :                  * timestamp clock of 1ms will wrap its 32bit space in
    1432             :                  * 24 days.  So we just disable TS checking after 24
    1433             :                  * days of idle time.  We actually must use a 12d
    1434             :                  * connection limit until we can come up with a better
    1435             :                  * lowerbound to the TS echo check.
    1436             :                  */
    1437             :                 struct timeval  delta_ts;
    1438             :                 int             ts_fudge;
    1439             : 
    1440             :                 /*
    1441             :                  * PFTM_TS_DIFF is how many seconds of leeway to allow
    1442             :                  * a host's timestamp.  This can happen if the previous
    1443             :                  * packet got delayed in transit for much longer than
    1444             :                  * this packet.
    1445             :                  */
    1446           0 :                 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
    1447           0 :                         ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
    1448             : 
    1449             :                 /* Calculate max ticks since the last timestamp */
    1450             : #define TS_MAXFREQ      1100            /* RFC max TS freq of 1Khz + 10% skew */
    1451             : #define TS_MICROSECS    1000000         /* microseconds per second */
    1452           0 :                 timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
    1453           0 :                 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
    1454           0 :                 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
    1455             : 
    1456           0 :                 if ((src->state >= TCPS_ESTABLISHED &&
    1457           0 :                     dst->state >= TCPS_ESTABLISHED) &&
    1458           0 :                     (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
    1459           0 :                     SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
    1460           0 :                     (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
    1461           0 :                     SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
    1462             :                         /* Bad RFC1323 implementation or an insertion attack.
    1463             :                          *
    1464             :                          * - Solaris 2.6 and 2.7 are known to send another ACK
    1465             :                          *   after the FIN,FIN|ACK,ACK closing that carries
    1466             :                          *   an old timestamp.
    1467             :                          */
    1468             : 
    1469           0 :                         DPFPRINTF(LOG_NOTICE, "Timestamp failed %c%c%c%c",
    1470             :                             SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
    1471             :                             SEQ_GT(tsval, src->scrub->pfss_tsval +
    1472             :                             tsval_from_last) ? '1' : ' ',
    1473             :                             SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
    1474             :                             SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' ');
    1475           0 :                         DPFPRINTF(LOG_NOTICE, " tsval: %u  tsecr: %u  "
    1476             :                             "+ticks: %u  idle: %llu.%06lus", tsval, tsecr,
    1477             :                             tsval_from_last, (long long)delta_ts.tv_sec,
    1478             :                             delta_ts.tv_usec);
    1479           0 :                         DPFPRINTF(LOG_NOTICE, " src->tsval: %u  tsecr: %u",
    1480             :                             src->scrub->pfss_tsval, src->scrub->pfss_tsecr);
    1481           0 :                         DPFPRINTF(LOG_NOTICE, " dst->tsval: %u  tsecr: %u  "
    1482             :                             "tsval0: %u", dst->scrub->pfss_tsval,
    1483             :                             dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0);
    1484           0 :                         if (pf_status.debug >= LOG_NOTICE) {
    1485           0 :                                 log(LOG_NOTICE, "pf: ");
    1486           0 :                                 pf_print_state(state);
    1487           0 :                                 pf_print_flags(th->th_flags);
    1488           0 :                                 addlog("\n");
    1489           0 :                         }
    1490           0 :                         REASON_SET(reason, PFRES_TS);
    1491           0 :                         return (PF_DROP);
    1492             :                 }
    1493             :                 /* XXX I'd really like to require tsecr but it's optional */
    1494           0 :         } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
    1495           0 :             ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
    1496           0 :             || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
    1497           0 :             src->scrub && dst->scrub &&
    1498           0 :             (src->scrub->pfss_flags & PFSS_PAWS) &&
    1499           0 :             (dst->scrub->pfss_flags & PFSS_PAWS)) {
    1500             :                 /* Didn't send a timestamp.  Timestamps aren't really useful
    1501             :                  * when:
    1502             :                  *  - connection opening or closing (often not even sent).
    1503             :                  *    but we must not let an attacker to put a FIN on a
    1504             :                  *    data packet to sneak it through our ESTABLISHED check.
    1505             :                  *  - on a TCP reset.  RFC suggests not even looking at TS.
    1506             :                  *  - on an empty ACK.  The TS will not be echoed so it will
    1507             :                  *    probably not help keep the RTT calculation in sync and
    1508             :                  *    there isn't as much danger when the sequence numbers
    1509             :                  *    got wrapped.  So some stacks don't include TS on empty
    1510             :                  *    ACKs :-(
    1511             :                  *
    1512             :                  * To minimize the disruption to mostly RFC1323 conformant
    1513             :                  * stacks, we will only require timestamps on data packets.
    1514             :                  *
    1515             :                  * And what do ya know, we cannot require timestamps on data
    1516             :                  * packets.  There appear to be devices that do legitimate
    1517             :                  * TCP connection hijacking.  There are HTTP devices that allow
    1518             :                  * a 3whs (with timestamps) and then buffer the HTTP request.
    1519             :                  * If the intermediate device has the HTTP response cache, it
    1520             :                  * will spoof the response but not bother timestamping its
    1521             :                  * packets.  So we can look for the presence of a timestamp in
    1522             :                  * the first data packet and if there, require it in all future
    1523             :                  * packets.
    1524             :                  */
    1525             : 
    1526           0 :                 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
    1527             :                         /*
    1528             :                          * Hey!  Someone tried to sneak a packet in.  Or the
    1529             :                          * stack changed its RFC1323 behavior?!?!
    1530             :                          */
    1531           0 :                         if (pf_status.debug >= LOG_NOTICE) {
    1532           0 :                                 log(LOG_NOTICE,
    1533             :                                     "pf: did not receive expected RFC1323 "
    1534             :                                     "timestamp");
    1535           0 :                                 pf_print_state(state);
    1536           0 :                                 pf_print_flags(th->th_flags);
    1537           0 :                                 addlog("\n");
    1538           0 :                         }
    1539           0 :                         REASON_SET(reason, PFRES_TS);
    1540           0 :                         return (PF_DROP);
    1541             :                 }
    1542             :         }
    1543             : 
    1544             :         /*
    1545             :          * We will note if a host sends his data packets with or without
    1546             :          * timestamps.  And require all data packets to contain a timestamp
    1547             :          * if the first does.  PAWS implicitly requires that all data packets be
    1548             :          * timestamped.  But I think there are middle-man devices that hijack
    1549             :          * TCP streams immediately after the 3whs and don't timestamp their
    1550             :          * packets (seen in a WWW accelerator or cache).
    1551             :          */
    1552           0 :         if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
    1553           0 :             (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
    1554           0 :                 if (got_ts)
    1555           0 :                         src->scrub->pfss_flags |= PFSS_DATA_TS;
    1556             :                 else {
    1557           0 :                         src->scrub->pfss_flags |= PFSS_DATA_NOTS;
    1558           0 :                         if (pf_status.debug >= LOG_NOTICE && dst->scrub &&
    1559           0 :                             (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
    1560             :                                 /* Don't warn if other host rejected RFC1323 */
    1561           0 :                                 log(LOG_NOTICE,
    1562             :                                     "pf: broken RFC1323 stack did not "
    1563             :                                     "timestamp data packet. Disabled PAWS "
    1564             :                                     "security.");
    1565           0 :                                 pf_print_state(state);
    1566           0 :                                 pf_print_flags(th->th_flags);
    1567           0 :                                 addlog("\n");
    1568           0 :                         }
    1569             :                 }
    1570             :         }
    1571             : 
    1572             :         /*
    1573             :          * Update PAWS values
    1574             :          */
    1575           0 :         if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
    1576             :             (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
    1577           0 :                 getmicrouptime(&src->scrub->pfss_last);
    1578           0 :                 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
    1579           0 :                     (src->scrub->pfss_flags & PFSS_PAWS) == 0)
    1580           0 :                         src->scrub->pfss_tsval = tsval;
    1581             : 
    1582           0 :                 if (tsecr) {
    1583           0 :                         if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
    1584           0 :                             (src->scrub->pfss_flags & PFSS_PAWS) == 0)
    1585           0 :                                 src->scrub->pfss_tsecr = tsecr;
    1586             : 
    1587           0 :                         if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
    1588           0 :                             (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
    1589           0 :                             src->scrub->pfss_tsval0 == 0)) {
    1590             :                                 /* tsval0 MUST be the lowest timestamp */
    1591           0 :                                 src->scrub->pfss_tsval0 = tsval;
    1592           0 :                         }
    1593             : 
    1594             :                         /* Only fully initialized after a TS gets echoed */
    1595           0 :                         if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
    1596           0 :                                 src->scrub->pfss_flags |= PFSS_PAWS;
    1597             :                 }
    1598             :         }
    1599             : 
    1600             :         /* I have a dream....  TCP segment reassembly.... */
    1601           0 :         return (0);
    1602           0 : }
    1603             : 
    1604             : int
    1605           0 : pf_normalize_mss(struct pf_pdesc *pd, u_int16_t maxmss)
    1606             : {
    1607             :         int              olen, optsoff;
    1608           0 :         u_int8_t         opts[MAX_TCPOPTLEN], *opt;
    1609             : 
    1610           0 :         olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
    1611           0 :         optsoff = pd->off + sizeof(struct tcphdr);
    1612           0 :         if (olen < TCPOLEN_MAXSEG ||
    1613           0 :             !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af))
    1614           0 :                 return (0);
    1615             : 
    1616             :         opt = opts;
    1617           0 :         while ((opt = pf_find_tcpopt(opt, opts, olen,
    1618           0 :                     TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) {
    1619           0 :                 u_int16_t       mss;
    1620           0 :                 u_int8_t       *mssp = opt + 2;
    1621           0 :                 memcpy(&mss, mssp, sizeof(mss));
    1622           0 :                 if (ntohs(mss) > maxmss) {
    1623           0 :                         size_t mssoffopts = mssp - opts;
    1624           0 :                         pf_patch_16_unaligned(pd, &mss,
    1625           0 :                             htons(maxmss), PF_ALGNMNT(mssoffopts));
    1626           0 :                         m_copyback(pd->m, optsoff + mssoffopts,
    1627             :                             sizeof(mss), &mss, M_NOWAIT);
    1628           0 :                         m_copyback(pd->m, pd->off,
    1629           0 :                             sizeof(struct tcphdr), &pd->hdr.tcp, M_NOWAIT);
    1630           0 :                 }
    1631             : 
    1632           0 :                 opt += opt[1];
    1633           0 :         }
    1634             : 
    1635           0 :         return (0);
    1636           0 : }
    1637             : 
    1638             : void
    1639           0 : pf_scrub(struct mbuf *m, u_int16_t flags, sa_family_t af, u_int8_t min_ttl,
    1640             :     u_int8_t tos)
    1641             : {
    1642           0 :         struct ip               *h = mtod(m, struct ip *);
    1643             : #ifdef INET6
    1644           0 :         struct ip6_hdr          *h6 = mtod(m, struct ip6_hdr *);
    1645             : #endif  /* INET6 */
    1646             : 
    1647             :         /* Clear IP_DF if no-df was requested */
    1648           0 :         if (flags & PFSTATE_NODF && af == AF_INET && h->ip_off & htons(IP_DF))
    1649           0 :                 h->ip_off &= htons(~IP_DF);
    1650             : 
    1651             :         /* Enforce a minimum ttl, may cause endless packet loops */
    1652           0 :         if (min_ttl && af == AF_INET && h->ip_ttl < min_ttl)
    1653           0 :                 h->ip_ttl = min_ttl;
    1654             : #ifdef INET6
    1655           0 :         if (min_ttl && af == AF_INET6 && h6->ip6_hlim < min_ttl)
    1656           0 :                 h6->ip6_hlim = min_ttl;
    1657             : #endif  /* INET6 */
    1658             : 
    1659             :         /* Enforce tos */
    1660           0 :         if (flags & PFSTATE_SETTOS) {
    1661           0 :                 if (af == AF_INET)
    1662           0 :                         h->ip_tos = tos | (h->ip_tos & IPTOS_ECN_MASK);
    1663             : #ifdef INET6
    1664           0 :                 if (af == AF_INET6) {
    1665             :                         /* drugs are unable to explain such idiocy */
    1666           0 :                         h6->ip6_flow &= ~htonl(0x0fc00000);
    1667           0 :                         h6->ip6_flow |= htonl(((u_int32_t)tos) << 20);
    1668           0 :                 }
    1669             : #endif  /* INET6 */
    1670             :         }
    1671             : 
    1672             :         /* random-id, but not for fragments */
    1673           0 :         if (flags & PFSTATE_RANDOMID && af == AF_INET &&
    1674           0 :             !(h->ip_off & ~htons(IP_DF)))
    1675           0 :                 h->ip_id = htons(ip_randomid());
    1676           0 : }

Generated by: LCOV version 1.13