LCOV - code coverage report
Current view: top level - netinet - tcp_input.c (source / functions) Hit Total Coverage
Test: 6.4 Lines: 0 1735 0.0 %
Date: 2018-10-19 03:25:38 Functions: 0 30 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*      $OpenBSD: tcp_input.c,v 1.358 2018/07/23 21:14:00 bluhm Exp $   */
       2             : /*      $NetBSD: tcp_input.c,v 1.23 1996/02/13 23:43:44 christos Exp $  */
       3             : 
       4             : /*
       5             :  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994
       6             :  *      The Regents of the University of California.  All rights reserved.
       7             :  *
       8             :  * Redistribution and use in source and binary forms, with or without
       9             :  * modification, are permitted provided that the following conditions
      10             :  * are met:
      11             :  * 1. Redistributions of source code must retain the above copyright
      12             :  *    notice, this list of conditions and the following disclaimer.
      13             :  * 2. Redistributions in binary form must reproduce the above copyright
      14             :  *    notice, this list of conditions and the following disclaimer in the
      15             :  *    documentation and/or other materials provided with the distribution.
      16             :  * 3. Neither the name of the University nor the names of its contributors
      17             :  *    may be used to endorse or promote products derived from this software
      18             :  *    without specific prior written permission.
      19             :  *
      20             :  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
      21             :  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
      22             :  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
      23             :  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
      24             :  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
      25             :  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
      26             :  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
      27             :  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
      28             :  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
      29             :  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
      30             :  * SUCH DAMAGE.
      31             :  *
      32             :  *      @(#)COPYRIGHT   1.1 (NRL) 17 January 1995
      33             :  *
      34             :  * NRL grants permission for redistribution and use in source and binary
      35             :  * forms, with or without modification, of the software and documentation
      36             :  * created at NRL provided that the following conditions are met:
      37             :  *
      38             :  * 1. Redistributions of source code must retain the above copyright
      39             :  *    notice, this list of conditions and the following disclaimer.
      40             :  * 2. Redistributions in binary form must reproduce the above copyright
      41             :  *    notice, this list of conditions and the following disclaimer in the
      42             :  *    documentation and/or other materials provided with the distribution.
      43             :  * 3. All advertising materials mentioning features or use of this software
      44             :  *    must display the following acknowledgements:
      45             :  *      This product includes software developed by the University of
      46             :  *      California, Berkeley and its contributors.
      47             :  *      This product includes software developed at the Information
      48             :  *      Technology Division, US Naval Research Laboratory.
      49             :  * 4. Neither the name of the NRL nor the names of its contributors
      50             :  *    may be used to endorse or promote products derived from this software
      51             :  *    without specific prior written permission.
      52             :  *
      53             :  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
      54             :  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
      55             :  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
      56             :  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
      57             :  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
      58             :  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
      59             :  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
      60             :  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
      61             :  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
      62             :  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
      63             :  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      64             :  *
      65             :  * The views and conclusions contained in the software and documentation
      66             :  * are those of the authors and should not be interpreted as representing
      67             :  * official policies, either expressed or implied, of the US Naval
      68             :  * Research Laboratory (NRL).
      69             :  */
      70             : 
      71             : #include "pf.h"
      72             : 
      73             : #include <sys/param.h>
      74             : #include <sys/systm.h>
      75             : #include <sys/mbuf.h>
      76             : #include <sys/protosw.h>
      77             : #include <sys/socket.h>
      78             : #include <sys/socketvar.h>
      79             : #include <sys/timeout.h>
      80             : #include <sys/kernel.h>
      81             : #include <sys/pool.h>
      82             : 
      83             : #include <net/if.h>
      84             : #include <net/if_var.h>
      85             : #include <net/route.h>
      86             : 
      87             : #include <netinet/in.h>
      88             : #include <netinet/ip.h>
      89             : #include <netinet/in_pcb.h>
      90             : #include <netinet/ip_var.h>
      91             : #include <netinet/tcp.h>
      92             : #include <netinet/tcp_fsm.h>
      93             : #include <netinet/tcp_seq.h>
      94             : #include <netinet/tcp_timer.h>
      95             : #include <netinet/tcp_var.h>
      96             : #include <netinet/tcp_debug.h>
      97             : 
      98             : #if NPF > 0
      99             : #include <net/pfvar.h>
     100             : #endif
     101             : 
     102             : struct  tcpiphdr tcp_saveti;
     103             : 
     104             : int tcp_mss_adv(struct mbuf *, int);
     105             : int tcp_flush_queue(struct tcpcb *);
     106             : 
     107             : #ifdef INET6
     108             : #include <netinet6/in6_var.h>
     109             : #include <netinet6/nd6.h>
     110             : 
     111             : struct  tcpipv6hdr tcp_saveti6;
     112             : 
     113             : /* for the packet header length in the mbuf */
     114             : #define M_PH_LEN(m)      (((struct mbuf *)(m))->m_pkthdr.len)
     115             : #define M_V6_LEN(m)      (M_PH_LEN(m) - sizeof(struct ip6_hdr))
     116             : #define M_V4_LEN(m)      (M_PH_LEN(m) - sizeof(struct ip))
     117             : #endif /* INET6 */
     118             : 
     119             : int     tcprexmtthresh = 3;
     120             : int     tcptv_keep_init = TCPTV_KEEP_INIT;
     121             : 
     122             : int tcp_rst_ppslim = 100;               /* 100pps */
     123             : int tcp_rst_ppslim_count = 0;
     124             : struct timeval tcp_rst_ppslim_last;
     125             : 
     126             : int tcp_ackdrop_ppslim = 100;           /* 100pps */
     127             : int tcp_ackdrop_ppslim_count = 0;
     128             : struct timeval tcp_ackdrop_ppslim_last;
     129             : 
     130             : #define TCP_PAWS_IDLE   (24 * 24 * 60 * 60 * PR_SLOWHZ)
     131             : 
     132             : /* for modulo comparisons of timestamps */
     133             : #define TSTMP_LT(a,b)   ((int)((a)-(b)) < 0)
     134             : #define TSTMP_GEQ(a,b)  ((int)((a)-(b)) >= 0)
     135             : 
     136             : /* for TCP SACK comparisons */
     137             : #define SEQ_MIN(a,b)    (SEQ_LT(a,b) ? (a) : (b))
     138             : #define SEQ_MAX(a,b)    (SEQ_GT(a,b) ? (a) : (b))
     139             : 
     140             : /*
     141             :  * Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint.
     142             :  */
     143             : #ifdef INET6
     144             : #define ND6_HINT(tp) \
     145             : do { \
     146             :         if (tp && tp->t_inpcb && (tp->t_inpcb->inp_flags & INP_IPV6) &&    \
     147             :             rtisvalid(tp->t_inpcb->inp_route6.ro_rt)) {                   \
     148             :                 nd6_nud_hint(tp->t_inpcb->inp_route6.ro_rt);              \
     149             :         } \
     150             : } while (0)
     151             : #else
     152             : #define ND6_HINT(tp)
     153             : #endif
     154             : 
     155             : #ifdef TCP_ECN
     156             : /*
     157             :  * ECN (Explicit Congestion Notification) support based on RFC3168
     158             :  * implementation note:
     159             :  *   snd_last is used to track a recovery phase.
     160             :  *   when cwnd is reduced, snd_last is set to snd_max.
     161             :  *   while snd_last > snd_una, the sender is in a recovery phase and
     162             :  *   its cwnd should not be reduced again.
     163             :  *   snd_last follows snd_una when not in a recovery phase.
     164             :  */
     165             : #endif
     166             : 
     167             : /*
     168             :  * Macro to compute ACK transmission behavior.  Delay the ACK unless
     169             :  * we have already delayed an ACK (must send an ACK every two segments).
     170             :  * We also ACK immediately if we received a PUSH and the ACK-on-PUSH
     171             :  * option is enabled or when the packet is coming from a loopback
     172             :  * interface.
     173             :  */
     174             : #define TCP_SETUP_ACK(tp, tiflags, m) \
     175             : do { \
     176             :         struct ifnet *ifp = NULL; \
     177             :         if (m && (m->m_flags & M_PKTHDR)) \
     178             :                 ifp = if_get(m->m_pkthdr.ph_ifidx); \
     179             :         if (TCP_TIMER_ISARMED(tp, TCPT_DELACK) || \
     180             :             (tcp_ack_on_push && (tiflags) & TH_PUSH) || \
     181             :             (ifp && (ifp->if_flags & IFF_LOOPBACK))) \
     182             :                 tp->t_flags |= TF_ACKNOW; \
     183             :         else \
     184             :                 TCP_TIMER_ARM_MSEC(tp, TCPT_DELACK, tcp_delack_msecs); \
     185             :         if_put(ifp); \
     186             : } while (0)
     187             : 
     188             : void     tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
     189             : void     tcp_newreno_partialack(struct tcpcb *, struct tcphdr *);
     190             : 
     191             : void     syn_cache_put(struct syn_cache *);
     192             : void     syn_cache_rm(struct syn_cache *);
     193             : int      syn_cache_respond(struct syn_cache *, struct mbuf *);
     194             : void     syn_cache_timer(void *);
     195             : void     syn_cache_reaper(void *);
     196             : void     syn_cache_insert(struct syn_cache *, struct tcpcb *);
     197             : void     syn_cache_reset(struct sockaddr *, struct sockaddr *,
     198             :                 struct tcphdr *, u_int);
     199             : int      syn_cache_add(struct sockaddr *, struct sockaddr *, struct tcphdr *,
     200             :                 unsigned int, struct socket *, struct mbuf *, u_char *, int,
     201             :                 struct tcp_opt_info *, tcp_seq *);
     202             : struct socket *syn_cache_get(struct sockaddr *, struct sockaddr *,
     203             :                 struct tcphdr *, unsigned int, unsigned int, struct socket *,
     204             :                 struct mbuf *);
     205             : struct syn_cache *syn_cache_lookup(struct sockaddr *, struct sockaddr *,
     206             :                 struct syn_cache_head **, u_int);
     207             : 
     208             : /*
     209             :  * Insert segment ti into reassembly queue of tcp with
     210             :  * control block tp.  Return TH_FIN if reassembly now includes
     211             :  * a segment with FIN.  The macro form does the common case inline
     212             :  * (segment is the next to be received on an established connection,
     213             :  * and the queue is empty), avoiding linkage into and removal
     214             :  * from the queue and repetition of various conversions.
     215             :  * Set DELACK for segments received in order, but ack immediately
     216             :  * when segments are out of order (so fast retransmit can work).
     217             :  */
     218             : 
     219             : int
     220           0 : tcp_reass(struct tcpcb *tp, struct tcphdr *th, struct mbuf *m, int *tlen)
     221             : {
     222             :         struct tcpqent *p, *q, *nq, *tiqe;
     223             : 
     224             :         /*
     225             :          * Allocate a new queue entry, before we throw away any data.
     226             :          * If we can't, just drop the packet.  XXX
     227             :          */
     228           0 :         tiqe = pool_get(&tcpqe_pool, PR_NOWAIT);
     229           0 :         if (tiqe == NULL) {
     230           0 :                 tiqe = TAILQ_LAST(&tp->t_segq, tcpqehead);
     231           0 :                 if (tiqe != NULL && th->th_seq == tp->rcv_nxt) {
     232             :                         /* Reuse last entry since new segment fills a hole */
     233           0 :                         m_freem(tiqe->tcpqe_m);
     234           0 :                         TAILQ_REMOVE(&tp->t_segq, tiqe, tcpqe_q);
     235           0 :                 }
     236           0 :                 if (tiqe == NULL || th->th_seq != tp->rcv_nxt) {
     237             :                         /* Flush segment queue for this connection */
     238           0 :                         tcp_freeq(tp);
     239           0 :                         tcpstat_inc(tcps_rcvmemdrop);
     240           0 :                         m_freem(m);
     241           0 :                         return (0);
     242             :                 }
     243             :         }
     244             : 
     245             :         /*
     246             :          * Find a segment which begins after this one does.
     247             :          */
     248           0 :         for (p = NULL, q = TAILQ_FIRST(&tp->t_segq); q != NULL;
     249           0 :             p = q, q = TAILQ_NEXT(q, tcpqe_q))
     250           0 :                 if (SEQ_GT(q->tcpqe_tcp->th_seq, th->th_seq))
     251             :                         break;
     252             : 
     253             :         /*
     254             :          * If there is a preceding segment, it may provide some of
     255             :          * our data already.  If so, drop the data from the incoming
     256             :          * segment.  If it provides all of our data, drop us.
     257             :          */
     258           0 :         if (p != NULL) {
     259           0 :                 struct tcphdr *phdr = p->tcpqe_tcp;
     260             :                 int i;
     261             : 
     262             :                 /* conversion to int (in i) handles seq wraparound */
     263           0 :                 i = phdr->th_seq + phdr->th_reseqlen - th->th_seq;
     264           0 :                 if (i > 0) {
     265           0 :                         if (i >= *tlen) {
     266           0 :                                 tcpstat_pkt(tcps_rcvduppack, tcps_rcvdupbyte,
     267           0 :                                     *tlen);
     268           0 :                                 m_freem(m);
     269           0 :                                 pool_put(&tcpqe_pool, tiqe);
     270           0 :                                 return (0);
     271             :                         }
     272           0 :                         m_adj(m, i);
     273           0 :                         *tlen -= i;
     274           0 :                         th->th_seq += i;
     275           0 :                 }
     276           0 :         }
     277           0 :         tcpstat_pkt(tcps_rcvoopack, tcps_rcvoobyte, *tlen);
     278             : 
     279             :         /*
     280             :          * While we overlap succeeding segments trim them or,
     281             :          * if they are completely covered, dequeue them.
     282             :          */
     283           0 :         for (; q != NULL; q = nq) {
     284           0 :                 struct tcphdr *qhdr = q->tcpqe_tcp;
     285           0 :                 int i = (th->th_seq + *tlen) - qhdr->th_seq;
     286             : 
     287           0 :                 if (i <= 0)
     288           0 :                         break;
     289           0 :                 if (i < qhdr->th_reseqlen) {
     290           0 :                         qhdr->th_seq += i;
     291           0 :                         qhdr->th_reseqlen -= i;
     292           0 :                         m_adj(q->tcpqe_m, i);
     293           0 :                         break;
     294             :                 }
     295           0 :                 nq = TAILQ_NEXT(q, tcpqe_q);
     296           0 :                 m_freem(q->tcpqe_m);
     297           0 :                 TAILQ_REMOVE(&tp->t_segq, q, tcpqe_q);
     298           0 :                 pool_put(&tcpqe_pool, q);
     299           0 :         }
     300             : 
     301             :         /* Insert the new segment queue entry into place. */
     302           0 :         tiqe->tcpqe_m = m;
     303           0 :         th->th_reseqlen = *tlen;
     304           0 :         tiqe->tcpqe_tcp = th;
     305           0 :         if (p == NULL) {
     306           0 :                 TAILQ_INSERT_HEAD(&tp->t_segq, tiqe, tcpqe_q);
     307           0 :         } else {
     308           0 :                 TAILQ_INSERT_AFTER(&tp->t_segq, p, tiqe, tcpqe_q);
     309             :         }
     310             : 
     311           0 :         if (th->th_seq != tp->rcv_nxt)
     312           0 :                 return (0);
     313             : 
     314           0 :         return (tcp_flush_queue(tp));
     315           0 : }
     316             : 
     317             : int
     318           0 : tcp_flush_queue(struct tcpcb *tp)
     319             : {
     320           0 :         struct socket *so = tp->t_inpcb->inp_socket;
     321             :         struct tcpqent *q, *nq;
     322             :         int flags;
     323             : 
     324             :         /*
     325             :          * Present data to user, advancing rcv_nxt through
     326             :          * completed sequence space.
     327             :          */
     328           0 :         if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
     329           0 :                 return (0);
     330           0 :         q = TAILQ_FIRST(&tp->t_segq);
     331           0 :         if (q == NULL || q->tcpqe_tcp->th_seq != tp->rcv_nxt)
     332           0 :                 return (0);
     333           0 :         if (tp->t_state == TCPS_SYN_RECEIVED && q->tcpqe_tcp->th_reseqlen)
     334           0 :                 return (0);
     335           0 :         do {
     336           0 :                 tp->rcv_nxt += q->tcpqe_tcp->th_reseqlen;
     337           0 :                 flags = q->tcpqe_tcp->th_flags & TH_FIN;
     338             : 
     339           0 :                 nq = TAILQ_NEXT(q, tcpqe_q);
     340           0 :                 TAILQ_REMOVE(&tp->t_segq, q, tcpqe_q);
     341           0 :                 ND6_HINT(tp);
     342           0 :                 if (so->so_state & SS_CANTRCVMORE)
     343           0 :                         m_freem(q->tcpqe_m);
     344             :                 else
     345           0 :                         sbappendstream(so, &so->so_rcv, q->tcpqe_m);
     346           0 :                 pool_put(&tcpqe_pool, q);
     347             :                 q = nq;
     348           0 :         } while (q != NULL && q->tcpqe_tcp->th_seq == tp->rcv_nxt);
     349           0 :         tp->t_flags |= TF_BLOCKOUTPUT;
     350           0 :         sorwakeup(so);
     351           0 :         tp->t_flags &= ~TF_BLOCKOUTPUT;
     352           0 :         return (flags);
     353           0 : }
     354             : 
     355             : /*
     356             :  * TCP input routine, follows pages 65-76 of the
     357             :  * protocol specification dated September, 1981 very closely.
     358             :  */
     359             : int
     360           0 : tcp_input(struct mbuf **mp, int *offp, int proto, int af)
     361             : {
     362           0 :         struct mbuf *m = *mp;
     363           0 :         int iphlen = *offp;
     364             :         struct ip *ip = NULL;
     365             :         struct inpcb *inp = NULL;
     366             :         u_int8_t *optp = NULL;
     367             :         int optlen = 0;
     368           0 :         int tlen, off;
     369             :         struct tcpcb *otp = NULL, *tp = NULL;
     370             :         int tiflags;
     371             :         struct socket *so = NULL;
     372             :         int todrop, acked, ourfinisacked;
     373             :         int hdroptlen = 0;
     374             :         short ostate;
     375             :         caddr_t saveti;
     376           0 :         tcp_seq iss, *reuse = NULL;
     377             :         u_long tiwin;
     378           0 :         struct tcp_opt_info opti;
     379             :         struct tcphdr *th;
     380             : #ifdef INET6
     381             :         struct ip6_hdr *ip6 = NULL;
     382             : #endif /* INET6 */
     383             : #ifdef IPSEC
     384             :         struct m_tag *mtag;
     385             :         struct tdb_ident *tdbi;
     386             :         struct tdb *tdb;
     387           0 :         int error;
     388             : #endif /* IPSEC */
     389             : #ifdef TCP_ECN
     390             :         u_char iptos;
     391             : #endif
     392             : 
     393           0 :         tcpstat_inc(tcps_rcvtotal);
     394             : 
     395           0 :         opti.ts_present = 0;
     396           0 :         opti.maxseg = 0;
     397             : 
     398             :         /*
     399             :          * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
     400             :          */
     401           0 :         if (m->m_flags & (M_BCAST|M_MCAST))
     402             :                 goto drop;
     403             : 
     404             :         /*
     405             :          * Get IP and TCP header together in first mbuf.
     406             :          * Note: IP leaves IP header in first mbuf.
     407             :          */
     408           0 :         IP6_EXTHDR_GET(th, struct tcphdr *, m, iphlen, sizeof(*th));
     409           0 :         if (!th) {
     410           0 :                 tcpstat_inc(tcps_rcvshort);
     411           0 :                 return IPPROTO_DONE;
     412             :         }
     413             : 
     414           0 :         tlen = m->m_pkthdr.len - iphlen;
     415           0 :         switch (af) {
     416             :         case AF_INET:
     417           0 :                 ip = mtod(m, struct ip *);
     418             : #ifdef TCP_ECN
     419             :                 /* save ip_tos before clearing it for checksum */
     420           0 :                 iptos = ip->ip_tos;
     421             : #endif
     422           0 :                 break;
     423             : #ifdef INET6
     424             :         case AF_INET6:
     425           0 :                 ip6 = mtod(m, struct ip6_hdr *);
     426             : #ifdef TCP_ECN
     427           0 :                 iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
     428             : #endif
     429             : 
     430             :                 /*
     431             :                  * Be proactive about unspecified IPv6 address in source.
     432             :                  * As we use all-zero to indicate unbounded/unconnected pcb,
     433             :                  * unspecified IPv6 address can be used to confuse us.
     434             :                  *
     435             :                  * Note that packets with unspecified IPv6 destination is
     436             :                  * already dropped in ip6_input.
     437             :                  */
     438           0 :                 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
     439             :                         /* XXX stat */
     440             :                         goto drop;
     441             :                 }
     442             : 
     443             :                 /* Discard packets to multicast */
     444           0 :                 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
     445             :                         /* XXX stat */
     446             :                         goto drop;
     447             :                 }
     448             :                 break;
     449             : #endif
     450             :         default:
     451           0 :                 unhandled_af(af);
     452             :         }
     453             : 
     454             :         /*
     455             :          * Checksum extended TCP header and data.
     456             :          */
     457           0 :         if ((m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_OK) == 0) {
     458             :                 int sum;
     459             : 
     460           0 :                 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_BAD) {
     461           0 :                         tcpstat_inc(tcps_rcvbadsum);
     462           0 :                         goto drop;
     463             :                 }
     464           0 :                 tcpstat_inc(tcps_inswcsum);
     465           0 :                 switch (af) {
     466             :                 case AF_INET:
     467           0 :                         sum = in4_cksum(m, IPPROTO_TCP, iphlen, tlen);
     468           0 :                         break;
     469             : #ifdef INET6
     470             :                 case AF_INET6:
     471           0 :                         sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
     472           0 :                             tlen);
     473           0 :                         break;
     474             : #endif
     475             :                 }
     476           0 :                 if (sum != 0) {
     477           0 :                         tcpstat_inc(tcps_rcvbadsum);
     478           0 :                         goto drop;
     479             :                 }
     480           0 :         }
     481             : 
     482             :         /*
     483             :          * Check that TCP offset makes sense,
     484             :          * pull out TCP options and adjust length.              XXX
     485             :          */
     486           0 :         off = th->th_off << 2;
     487           0 :         if (off < sizeof(struct tcphdr) || off > tlen) {
     488           0 :                 tcpstat_inc(tcps_rcvbadoff);
     489           0 :                 goto drop;
     490             :         }
     491           0 :         tlen -= off;
     492           0 :         if (off > sizeof(struct tcphdr)) {
     493           0 :                 IP6_EXTHDR_GET(th, struct tcphdr *, m, iphlen, off);
     494           0 :                 if (!th) {
     495           0 :                         tcpstat_inc(tcps_rcvshort);
     496           0 :                         return IPPROTO_DONE;
     497             :                 }
     498           0 :                 optlen = off - sizeof(struct tcphdr);
     499           0 :                 optp = (u_int8_t *)(th + 1);
     500             :                 /*
     501             :                  * Do quick retrieval of timestamp options ("options
     502             :                  * prediction?").  If timestamp is the only option and it's
     503             :                  * formatted as recommended in RFC 1323 appendix A, we
     504             :                  * quickly get the values now and not bother calling
     505             :                  * tcp_dooptions(), etc.
     506             :                  */
     507           0 :                 if ((optlen == TCPOLEN_TSTAMP_APPA ||
     508           0 :                      (optlen > TCPOLEN_TSTAMP_APPA &&
     509           0 :                         optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) &&
     510           0 :                      *(u_int32_t *)optp == htonl(TCPOPT_TSTAMP_HDR) &&
     511           0 :                      (th->th_flags & TH_SYN) == 0) {
     512           0 :                         opti.ts_present = 1;
     513           0 :                         opti.ts_val = ntohl(*(u_int32_t *)(optp + 4));
     514           0 :                         opti.ts_ecr = ntohl(*(u_int32_t *)(optp + 8));
     515             :                         optp = NULL;    /* we've parsed the options */
     516           0 :                 }
     517             :         }
     518           0 :         tiflags = th->th_flags;
     519             : 
     520             :         /*
     521             :          * Convert TCP protocol specific fields to host format.
     522             :          */
     523           0 :         th->th_seq = ntohl(th->th_seq);
     524           0 :         th->th_ack = ntohl(th->th_ack);
     525           0 :         th->th_win = ntohs(th->th_win);
     526           0 :         th->th_urp = ntohs(th->th_urp);
     527             : 
     528             :         /*
     529             :          * Locate pcb for segment.
     530             :          */
     531             : #if NPF > 0
     532           0 :         inp = pf_inp_lookup(m);
     533             : #endif
     534             : findpcb:
     535           0 :         if (inp == NULL) {
     536           0 :                 switch (af) {
     537             : #ifdef INET6
     538             :                 case AF_INET6:
     539           0 :                         inp = in6_pcbhashlookup(&tcbtable, &ip6->ip6_src,
     540           0 :                             th->th_sport, &ip6->ip6_dst, th->th_dport,
     541           0 :                             m->m_pkthdr.ph_rtableid);
     542           0 :                         break;
     543             : #endif
     544             :                 case AF_INET:
     545           0 :                         inp = in_pcbhashlookup(&tcbtable, ip->ip_src,
     546           0 :                             th->th_sport, ip->ip_dst, th->th_dport,
     547           0 :                             m->m_pkthdr.ph_rtableid);
     548           0 :                         break;
     549             :                 }
     550             :         }
     551           0 :         if (inp == NULL) {
     552           0 :                 tcpstat_inc(tcps_pcbhashmiss);
     553           0 :                 switch (af) {
     554             : #ifdef INET6
     555             :                 case AF_INET6:
     556           0 :                         inp = in6_pcblookup_listen(&tcbtable, &ip6->ip6_dst,
     557           0 :                             th->th_dport, m, m->m_pkthdr.ph_rtableid);
     558           0 :                         break;
     559             : #endif /* INET6 */
     560             :                 case AF_INET:
     561           0 :                         inp = in_pcblookup_listen(&tcbtable, ip->ip_dst,
     562           0 :                             th->th_dport, m, m->m_pkthdr.ph_rtableid);
     563           0 :                         break;
     564             :                 }
     565             :                 /*
     566             :                  * If the state is CLOSED (i.e., TCB does not exist) then
     567             :                  * all data in the incoming segment is discarded.
     568             :                  * If the TCB exists but is in CLOSED state, it is embryonic,
     569             :                  * but should either do a listen or a connect soon.
     570             :                  */
     571           0 :                 if (inp == NULL) {
     572           0 :                         tcpstat_inc(tcps_noport);
     573           0 :                         goto dropwithreset_ratelim;
     574             :                 }
     575             :         }
     576           0 :         KASSERT(sotoinpcb(inp->inp_socket) == inp);
     577           0 :         KASSERT(intotcpcb(inp) == NULL || intotcpcb(inp)->t_inpcb == inp);
     578           0 :         soassertlocked(inp->inp_socket);
     579             : 
     580             :         /* Check the minimum TTL for socket. */
     581           0 :         switch (af) {
     582             :         case AF_INET:
     583           0 :                 if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl)
     584             :                         goto drop;
     585             :                 break;
     586             : #ifdef INET6
     587             :         case AF_INET6:
     588           0 :                 if (inp->inp_ip6_minhlim &&
     589           0 :                     inp->inp_ip6_minhlim > ip6->ip6_hlim)
     590             :                         goto drop;
     591             :                 break;
     592             : #endif
     593             :         }
     594             : 
     595           0 :         tp = intotcpcb(inp);
     596           0 :         if (tp == NULL)
     597             :                 goto dropwithreset_ratelim;
     598           0 :         if (tp->t_state == TCPS_CLOSED)
     599             :                 goto drop;
     600             : 
     601             :         /* Unscale the window into a 32-bit value. */
     602           0 :         if ((tiflags & TH_SYN) == 0)
     603           0 :                 tiwin = th->th_win << tp->snd_scale;
     604             :         else
     605           0 :                 tiwin = th->th_win;
     606             : 
     607           0 :         so = inp->inp_socket;
     608           0 :         if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
     609           0 :                 union syn_cache_sa src;
     610           0 :                 union syn_cache_sa dst;
     611             : 
     612           0 :                 bzero(&src, sizeof(src));
     613           0 :                 bzero(&dst, sizeof(dst));
     614           0 :                 switch (af) {
     615             :                 case AF_INET:
     616           0 :                         src.sin.sin_len = sizeof(struct sockaddr_in);
     617           0 :                         src.sin.sin_family = AF_INET;
     618           0 :                         src.sin.sin_addr = ip->ip_src;
     619           0 :                         src.sin.sin_port = th->th_sport;
     620             : 
     621           0 :                         dst.sin.sin_len = sizeof(struct sockaddr_in);
     622           0 :                         dst.sin.sin_family = AF_INET;
     623           0 :                         dst.sin.sin_addr = ip->ip_dst;
     624           0 :                         dst.sin.sin_port = th->th_dport;
     625           0 :                         break;
     626             : #ifdef INET6
     627             :                 case AF_INET6:
     628           0 :                         src.sin6.sin6_len = sizeof(struct sockaddr_in6);
     629           0 :                         src.sin6.sin6_family = AF_INET6;
     630           0 :                         src.sin6.sin6_addr = ip6->ip6_src;
     631           0 :                         src.sin6.sin6_port = th->th_sport;
     632             : 
     633           0 :                         dst.sin6.sin6_len = sizeof(struct sockaddr_in6);
     634           0 :                         dst.sin6.sin6_family = AF_INET6;
     635           0 :                         dst.sin6.sin6_addr = ip6->ip6_dst;
     636           0 :                         dst.sin6.sin6_port = th->th_dport;
     637           0 :                         break;
     638             : #endif /* INET6 */
     639             :                 }
     640             : 
     641           0 :                 if (so->so_options & SO_DEBUG) {
     642             :                         otp = tp;
     643           0 :                         ostate = tp->t_state;
     644           0 :                         switch (af) {
     645             : #ifdef INET6
     646             :                         case AF_INET6:
     647             :                                 saveti = (caddr_t) &tcp_saveti6;
     648           0 :                                 memcpy(&tcp_saveti6.ti6_i, ip6, sizeof(*ip6));
     649           0 :                                 memcpy(&tcp_saveti6.ti6_t, th, sizeof(*th));
     650           0 :                                 break;
     651             : #endif
     652             :                         case AF_INET:
     653             :                                 saveti = (caddr_t) &tcp_saveti;
     654           0 :                                 memcpy(&tcp_saveti.ti_i, ip, sizeof(*ip));
     655           0 :                                 memcpy(&tcp_saveti.ti_t, th, sizeof(*th));
     656           0 :                                 break;
     657             :                         }
     658             :                 }
     659           0 :                 if (so->so_options & SO_ACCEPTCONN) {
     660           0 :                         switch (tiflags & (TH_RST|TH_SYN|TH_ACK)) {
     661             : 
     662             :                         case TH_SYN|TH_ACK|TH_RST:
     663             :                         case TH_SYN|TH_RST:
     664             :                         case TH_ACK|TH_RST:
     665             :                         case TH_RST:
     666           0 :                                 syn_cache_reset(&src.sa, &dst.sa, th,
     667           0 :                                     inp->inp_rtableid);
     668           0 :                                 goto drop;
     669             : 
     670             :                         case TH_SYN|TH_ACK:
     671             :                                 /*
     672             :                                  * Received a SYN,ACK.  This should
     673             :                                  * never happen while we are in
     674             :                                  * LISTEN.  Send an RST.
     675             :                                  */
     676           0 :                                 goto badsyn;
     677             : 
     678             :                         case TH_ACK:
     679           0 :                                 so = syn_cache_get(&src.sa, &dst.sa,
     680           0 :                                         th, iphlen, tlen, so, m);
     681           0 :                                 if (so == NULL) {
     682             :                                         /*
     683             :                                          * We don't have a SYN for
     684             :                                          * this ACK; send an RST.
     685             :                                          */
     686           0 :                                         goto badsyn;
     687           0 :                                 } else if (so == (struct socket *)(-1)) {
     688             :                                         /*
     689             :                                          * We were unable to create
     690             :                                          * the connection.  If the
     691             :                                          * 3-way handshake was
     692             :                                          * completed, and RST has
     693             :                                          * been sent to the peer.
     694             :                                          * Since the mbuf might be
     695             :                                          * in use for the reply,
     696             :                                          * do not free it.
     697             :                                          */
     698           0 :                                         m = *mp = NULL;
     699           0 :                                         goto drop;
     700             :                                 } else {
     701             :                                         /*
     702             :                                          * We have created a
     703             :                                          * full-blown connection.
     704             :                                          */
     705             :                                         tp = NULL;
     706           0 :                                         inp = sotoinpcb(so);
     707           0 :                                         tp = intotcpcb(inp);
     708           0 :                                         if (tp == NULL)
     709           0 :                                                 goto badsyn;    /*XXX*/
     710             : 
     711             :                                 }
     712             :                                 break;
     713             : 
     714             :                         default:
     715             :                                 /*
     716             :                                  * None of RST, SYN or ACK was set.
     717             :                                  * This is an invalid packet for a
     718             :                                  * TCB in LISTEN state.  Send a RST.
     719             :                                  */
     720           0 :                                 goto badsyn;
     721             : 
     722             :                         case TH_SYN:
     723             :                                 /*
     724             :                                  * Received a SYN.
     725             :                                  */
     726             : #ifdef INET6
     727             :                                 /*
     728             :                                  * If deprecated address is forbidden, we do
     729             :                                  * not accept SYN to deprecated interface
     730             :                                  * address to prevent any new inbound
     731             :                                  * connection from getting established.
     732             :                                  * When we do not accept SYN, we send a TCP
     733             :                                  * RST, with deprecated source address (instead
     734             :                                  * of dropping it).  We compromise it as it is
     735             :                                  * much better for peer to send a RST, and
     736             :                                  * RST will be the final packet for the
     737             :                                  * exchange.
     738             :                                  *
     739             :                                  * If we do not forbid deprecated addresses, we
     740             :                                  * accept the SYN packet.  RFC2462 does not
     741             :                                  * suggest dropping SYN in this case.
     742             :                                  * If we decipher RFC2462 5.5.4, it says like
     743             :                                  * this:
     744             :                                  * 1. use of deprecated addr with existing
     745             :                                  *    communication is okay - "SHOULD continue
     746             :                                  *    to be used"
     747             :                                  * 2. use of it with new communication:
     748             :                                  *   (2a) "SHOULD NOT be used if alternate
     749             :                                  *        address with sufficient scope is
     750             :                                  *        available"
     751             :                                  *   (2b) nothing mentioned otherwise.
     752             :                                  * Here we fall into (2b) case as we have no
     753             :                                  * choice in our source address selection - we
     754             :                                  * must obey the peer.
     755             :                                  *
     756             :                                  * The wording in RFC2462 is confusing, and
     757             :                                  * there are multiple description text for
     758             :                                  * deprecated address handling - worse, they
     759             :                                  * are not exactly the same.  I believe 5.5.4
     760             :                                  * is the best one, so we follow 5.5.4.
     761             :                                  */
     762           0 :                                 if (ip6 && !ip6_use_deprecated) {
     763             :                                         struct in6_ifaddr *ia6;
     764             :                                         struct ifnet *ifp =
     765           0 :                                             if_get(m->m_pkthdr.ph_ifidx);
     766             : 
     767           0 :                                         if (ifp &&
     768           0 :                                             (ia6 = in6ifa_ifpwithaddr(ifp,
     769           0 :                                             &ip6->ip6_dst)) &&
     770           0 :                                             (ia6->ia6_flags &
     771             :                                             IN6_IFF_DEPRECATED)) {
     772             :                                                 tp = NULL;
     773           0 :                                                 if_put(ifp);
     774           0 :                                                 goto dropwithreset;
     775             :                                         }
     776           0 :                                         if_put(ifp);
     777           0 :                                 }
     778             : #endif
     779             : 
     780             :                                 /*
     781             :                                  * LISTEN socket received a SYN
     782             :                                  * from itself?  This can't possibly
     783             :                                  * be valid; drop the packet.
     784             :                                  */
     785           0 :                                 if (th->th_dport == th->th_sport) {
     786           0 :                                         switch (af) {
     787             : #ifdef INET6
     788             :                                         case AF_INET6:
     789           0 :                                                 if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_src,
     790             :                                                     &ip6->ip6_dst)) {
     791           0 :                                                         tcpstat_inc(tcps_badsyn);
     792           0 :                                                         goto drop;
     793             :                                                 }
     794             :                                                 break;
     795             : #endif /* INET6 */
     796             :                                         case AF_INET:
     797           0 :                                                 if (ip->ip_dst.s_addr == ip->ip_src.s_addr) {
     798           0 :                                                         tcpstat_inc(tcps_badsyn);
     799           0 :                                                         goto drop;
     800             :                                                 }
     801             :                                                 break;
     802             :                                         }
     803             :                                 }
     804             : 
     805             :                                 /*
     806             :                                  * SYN looks ok; create compressed TCP
     807             :                                  * state for it.
     808             :                                  */
     809           0 :                                 if (so->so_qlen > so->so_qlimit ||
     810           0 :                                     syn_cache_add(&src.sa, &dst.sa, th, iphlen,
     811           0 :                                     so, m, optp, optlen, &opti, reuse) == -1) {
     812           0 :                                         tcpstat_inc(tcps_dropsyn);
     813           0 :                                         goto drop;
     814             :                                 }
     815           0 :                                 return IPPROTO_DONE;
     816             :                         }
     817             :                 }
     818           0 :         }
     819             : 
     820             : #ifdef DIAGNOSTIC
     821             :         /*
     822             :          * Should not happen now that all embryonic connections
     823             :          * are handled with compressed state.
     824             :          */
     825           0 :         if (tp->t_state == TCPS_LISTEN)
     826           0 :                 panic("tcp_input: TCPS_LISTEN");
     827             : #endif
     828             : 
     829             : #if NPF > 0
     830           0 :         pf_inp_link(m, inp);
     831             : #endif
     832             : 
     833             : #ifdef IPSEC
     834             :         /* Find most recent IPsec tag */
     835           0 :         mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
     836           0 :         if (mtag != NULL) {
     837           0 :                 tdbi = (struct tdb_ident *)(mtag + 1);
     838           0 :                 tdb = gettdb(tdbi->rdomain, tdbi->spi,
     839           0 :                     &tdbi->dst, tdbi->proto);
     840           0 :         } else
     841             :                 tdb = NULL;
     842           0 :         ipsp_spd_lookup(m, af, iphlen, &error, IPSP_DIRECTION_IN,
     843             :             tdb, inp, 0);
     844           0 :         if (error) {
     845           0 :                 tcpstat_inc(tcps_rcvnosec);
     846           0 :                 goto drop;
     847             :         }
     848             : #endif /* IPSEC */
     849             : 
     850             :         /*
     851             :          * Segment received on connection.
     852             :          * Reset idle time and keep-alive timer.
     853             :          */
     854           0 :         tp->t_rcvtime = tcp_now;
     855           0 :         if (TCPS_HAVEESTABLISHED(tp->t_state))
     856           0 :                 TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
     857             : 
     858           0 :         if (tp->sack_enable)
     859           0 :                 tcp_del_sackholes(tp, th); /* Delete stale SACK holes */
     860             : 
     861             :         /*
     862             :          * Process options.
     863             :          */
     864             : #ifdef TCP_SIGNATURE
     865           0 :         if (optp || (tp->t_flags & TF_SIGNATURE))
     866             : #else
     867             :         if (optp)
     868             : #endif
     869           0 :                 if (tcp_dooptions(tp, optp, optlen, th, m, iphlen, &opti,
     870           0 :                     m->m_pkthdr.ph_rtableid))
     871             :                         goto drop;
     872             : 
     873           0 :         if (opti.ts_present && opti.ts_ecr) {
     874             :                 int rtt_test;
     875             : 
     876             :                 /* subtract out the tcp timestamp modulator */
     877           0 :                 opti.ts_ecr -= tp->ts_modulate;
     878             : 
     879             :                 /* make sure ts_ecr is sensible */
     880           0 :                 rtt_test = tcp_now - opti.ts_ecr;
     881           0 :                 if (rtt_test < 0 || rtt_test > TCP_RTT_MAX)
     882           0 :                         opti.ts_ecr = 0;
     883           0 :         }
     884             : 
     885             : #ifdef TCP_ECN
     886             :         /* if congestion experienced, set ECE bit in subsequent packets. */
     887           0 :         if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) {
     888           0 :                 tp->t_flags |= TF_RCVD_CE;
     889           0 :                 tcpstat_inc(tcps_ecn_rcvce);
     890           0 :         }
     891             : #endif
     892             :         /*
     893             :          * Header prediction: check for the two common cases
     894             :          * of a uni-directional data xfer.  If the packet has
     895             :          * no control flags, is in-sequence, the window didn't
     896             :          * change and we're not retransmitting, it's a
     897             :          * candidate.  If the length is zero and the ack moved
     898             :          * forward, we're the sender side of the xfer.  Just
     899             :          * free the data acked & wake any higher level process
     900             :          * that was blocked waiting for space.  If the length
     901             :          * is non-zero and the ack didn't move, we're the
     902             :          * receiver side.  If we're getting packets in-order
     903             :          * (the reassembly queue is empty), add the data to
     904             :          * the socket buffer and note that we need a delayed ack.
     905             :          */
     906           0 :         if (tp->t_state == TCPS_ESTABLISHED &&
     907             : #ifdef TCP_ECN
     908           0 :             (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ECE|TH_CWR|TH_ACK)) == TH_ACK &&
     909             : #else
     910             :             (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
     911             : #endif
     912           0 :             (!opti.ts_present || TSTMP_GEQ(opti.ts_val, tp->ts_recent)) &&
     913           0 :             th->th_seq == tp->rcv_nxt &&
     914           0 :             tiwin && tiwin == tp->snd_wnd &&
     915           0 :             tp->snd_nxt == tp->snd_max) {
     916             : 
     917             :                 /*
     918             :                  * If last ACK falls within this segment's sequence numbers,
     919             :                  *  record the timestamp.
     920             :                  * Fix from Braden, see Stevens p. 870
     921             :                  */
     922           0 :                 if (opti.ts_present && SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
     923           0 :                         tp->ts_recent_age = tcp_now;
     924           0 :                         tp->ts_recent = opti.ts_val;
     925           0 :                 }
     926             : 
     927           0 :                 if (tlen == 0) {
     928           0 :                         if (SEQ_GT(th->th_ack, tp->snd_una) &&
     929           0 :                             SEQ_LEQ(th->th_ack, tp->snd_max) &&
     930           0 :                             tp->snd_cwnd >= tp->snd_wnd &&
     931           0 :                             tp->t_dupacks == 0) {
     932             :                                 /*
     933             :                                  * this is a pure ack for outstanding data.
     934             :                                  */
     935           0 :                                 tcpstat_inc(tcps_predack);
     936           0 :                                 if (opti.ts_present && opti.ts_ecr)
     937           0 :                                         tcp_xmit_timer(tp, tcp_now - opti.ts_ecr);
     938           0 :                                 else if (tp->t_rtttime &&
     939           0 :                                     SEQ_GT(th->th_ack, tp->t_rtseq))
     940           0 :                                         tcp_xmit_timer(tp,
     941           0 :                                             tcp_now - tp->t_rtttime);
     942           0 :                                 acked = th->th_ack - tp->snd_una;
     943           0 :                                 tcpstat_pkt(tcps_rcvackpack, tcps_rcvackbyte,
     944           0 :                                     acked);
     945           0 :                                 ND6_HINT(tp);
     946           0 :                                 sbdrop(so, &so->so_snd, acked);
     947             : 
     948             :                                 /*
     949             :                                  * If we had a pending ICMP message that
     950             :                                  * refers to data that have just been
     951             :                                  * acknowledged, disregard the recorded ICMP
     952             :                                  * message.
     953             :                                  */
     954           0 :                                 if ((tp->t_flags & TF_PMTUD_PEND) &&
     955           0 :                                     SEQ_GT(th->th_ack, tp->t_pmtud_th_seq))
     956           0 :                                         tp->t_flags &= ~TF_PMTUD_PEND;
     957             : 
     958             :                                 /*
     959             :                                  * Keep track of the largest chunk of data
     960             :                                  * acknowledged since last PMTU update
     961             :                                  */
     962           0 :                                 if (tp->t_pmtud_mss_acked < acked)
     963           0 :                                         tp->t_pmtud_mss_acked = acked;
     964             : 
     965           0 :                                 tp->snd_una = th->th_ack;
     966             :                                 /*
     967             :                                  * We want snd_last to track snd_una so
     968             :                                  * as to avoid sequence wraparound problems
     969             :                                  * for very large transfers.
     970             :                                  */
     971             : #ifdef TCP_ECN
     972           0 :                                 if (SEQ_GT(tp->snd_una, tp->snd_last))
     973             : #endif
     974           0 :                                 tp->snd_last = tp->snd_una;
     975           0 :                                 m_freem(m);
     976             : 
     977             :                                 /*
     978             :                                  * If all outstanding data are acked, stop
     979             :                                  * retransmit timer, otherwise restart timer
     980             :                                  * using current (possibly backed-off) value.
     981             :                                  * If process is waiting for space,
     982             :                                  * wakeup/selwakeup/signal.  If data
     983             :                                  * are ready to send, let tcp_output
     984             :                                  * decide between more output or persist.
     985             :                                  */
     986           0 :                                 if (tp->snd_una == tp->snd_max)
     987           0 :                                         TCP_TIMER_DISARM(tp, TCPT_REXMT);
     988           0 :                                 else if (TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0)
     989           0 :                                         TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
     990             : 
     991           0 :                                 tcp_update_sndspace(tp);
     992           0 :                                 if (sb_notify(so, &so->so_snd)) {
     993           0 :                                         tp->t_flags |= TF_BLOCKOUTPUT;
     994           0 :                                         sowwakeup(so);
     995           0 :                                         tp->t_flags &= ~TF_BLOCKOUTPUT;
     996           0 :                                 }
     997           0 :                                 if (so->so_snd.sb_cc ||
     998           0 :                                     tp->t_flags & TF_NEEDOUTPUT)
     999           0 :                                         (void) tcp_output(tp);
    1000           0 :                                 return IPPROTO_DONE;
    1001             :                         }
    1002           0 :                 } else if (th->th_ack == tp->snd_una &&
    1003           0 :                     TAILQ_EMPTY(&tp->t_segq) &&
    1004           0 :                     tlen <= sbspace(so, &so->so_rcv)) {
    1005             :                         /*
    1006             :                          * This is a pure, in-sequence data packet
    1007             :                          * with nothing on the reassembly queue and
    1008             :                          * we have enough buffer space to take it.
    1009             :                          */
    1010             :                         /* Clean receiver SACK report if present */
    1011           0 :                         if (tp->sack_enable && tp->rcv_numsacks)
    1012           0 :                                 tcp_clean_sackreport(tp);
    1013           0 :                         tcpstat_inc(tcps_preddat);
    1014           0 :                         tp->rcv_nxt += tlen;
    1015           0 :                         tcpstat_pkt(tcps_rcvpack, tcps_rcvbyte, tlen);
    1016           0 :                         ND6_HINT(tp);
    1017             : 
    1018           0 :                         TCP_SETUP_ACK(tp, tiflags, m);
    1019             :                         /*
    1020             :                          * Drop TCP, IP headers and TCP options then add data
    1021             :                          * to socket buffer.
    1022             :                          */
    1023           0 :                         if (so->so_state & SS_CANTRCVMORE)
    1024           0 :                                 m_freem(m);
    1025             :                         else {
    1026           0 :                                 if (opti.ts_present && opti.ts_ecr) {
    1027           0 :                                         if (tp->rfbuf_ts < opti.ts_ecr &&
    1028           0 :                                             opti.ts_ecr - tp->rfbuf_ts < hz) {
    1029           0 :                                                 tcp_update_rcvspace(tp);
    1030             :                                                 /* Start over with next RTT. */
    1031           0 :                                                 tp->rfbuf_cnt = 0;
    1032           0 :                                                 tp->rfbuf_ts = 0;
    1033           0 :                                         } else
    1034           0 :                                                 tp->rfbuf_cnt += tlen;
    1035             :                                 }
    1036           0 :                                 m_adj(m, iphlen + off);
    1037           0 :                                 sbappendstream(so, &so->so_rcv, m);
    1038             :                         }
    1039           0 :                         tp->t_flags |= TF_BLOCKOUTPUT;
    1040           0 :                         sorwakeup(so);
    1041           0 :                         tp->t_flags &= ~TF_BLOCKOUTPUT;
    1042           0 :                         if (tp->t_flags & (TF_ACKNOW|TF_NEEDOUTPUT))
    1043           0 :                                 (void) tcp_output(tp);
    1044           0 :                         return IPPROTO_DONE;
    1045             :                 }
    1046             :         }
    1047             : 
    1048             :         /*
    1049             :          * Compute mbuf offset to TCP data segment.
    1050             :          */
    1051           0 :         hdroptlen = iphlen + off;
    1052             : 
    1053             :         /*
    1054             :          * Calculate amount of space in receive window,
    1055             :          * and then do TCP input processing.
    1056             :          * Receive window is amount of space in rcv queue,
    1057             :          * but not less than advertised window.
    1058             :          */
    1059             :         { int win;
    1060             : 
    1061           0 :         win = sbspace(so, &so->so_rcv);
    1062           0 :         if (win < 0)
    1063             :                 win = 0;
    1064           0 :         tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
    1065             :         }
    1066             : 
    1067             :         /* Reset receive buffer auto scaling when not in bulk receive mode. */
    1068           0 :         tp->rfbuf_cnt = 0;
    1069           0 :         tp->rfbuf_ts = 0;
    1070             : 
    1071           0 :         switch (tp->t_state) {
    1072             : 
    1073             :         /*
    1074             :          * If the state is SYN_RECEIVED:
    1075             :          *      if seg contains SYN/ACK, send an RST.
    1076             :          *      if seg contains an ACK, but not for our SYN/ACK, send an RST
    1077             :          */
    1078             : 
    1079             :         case TCPS_SYN_RECEIVED:
    1080           0 :                 if (tiflags & TH_ACK) {
    1081           0 :                         if (tiflags & TH_SYN) {
    1082           0 :                                 tcpstat_inc(tcps_badsyn);
    1083           0 :                                 goto dropwithreset;
    1084             :                         }
    1085           0 :                         if (SEQ_LEQ(th->th_ack, tp->snd_una) ||
    1086           0 :                             SEQ_GT(th->th_ack, tp->snd_max))
    1087             :                                 goto dropwithreset;
    1088             :                 }
    1089             :                 break;
    1090             : 
    1091             :         /*
    1092             :          * If the state is SYN_SENT:
    1093             :          *      if seg contains an ACK, but not for our SYN, drop the input.
    1094             :          *      if seg contains a RST, then drop the connection.
    1095             :          *      if seg does not contain SYN, then drop it.
    1096             :          * Otherwise this is an acceptable SYN segment
    1097             :          *      initialize tp->rcv_nxt and tp->irs
    1098             :          *      if seg contains ack then advance tp->snd_una
    1099             :          *      if SYN has been acked change to ESTABLISHED else SYN_RCVD state
    1100             :          *      arrange for segment to be acked (eventually)
    1101             :          *      continue processing rest of data/controls, beginning with URG
    1102             :          */
    1103             :         case TCPS_SYN_SENT:
    1104           0 :                 if ((tiflags & TH_ACK) &&
    1105           0 :                     (SEQ_LEQ(th->th_ack, tp->iss) ||
    1106           0 :                      SEQ_GT(th->th_ack, tp->snd_max)))
    1107             :                         goto dropwithreset;
    1108           0 :                 if (tiflags & TH_RST) {
    1109             : #ifdef TCP_ECN
    1110             :                         /* if ECN is enabled, fall back to non-ecn at rexmit */
    1111           0 :                         if (tcp_do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
    1112             :                                 goto drop;
    1113             : #endif
    1114           0 :                         if (tiflags & TH_ACK)
    1115           0 :                                 tp = tcp_drop(tp, ECONNREFUSED);
    1116             :                         goto drop;
    1117             :                 }
    1118           0 :                 if ((tiflags & TH_SYN) == 0)
    1119             :                         goto drop;
    1120           0 :                 if (tiflags & TH_ACK) {
    1121           0 :                         tp->snd_una = th->th_ack;
    1122           0 :                         if (SEQ_LT(tp->snd_nxt, tp->snd_una))
    1123           0 :                                 tp->snd_nxt = tp->snd_una;
    1124             :                 }
    1125           0 :                 TCP_TIMER_DISARM(tp, TCPT_REXMT);
    1126           0 :                 tp->irs = th->th_seq;
    1127           0 :                 tcp_mss(tp, opti.maxseg);
    1128             :                 /* Reset initial window to 1 segment for retransmit */
    1129           0 :                 if (tp->t_rxtshift > 0)
    1130           0 :                         tp->snd_cwnd = tp->t_maxseg;
    1131           0 :                 tcp_rcvseqinit(tp);
    1132           0 :                 tp->t_flags |= TF_ACKNOW;
    1133             :                 /*
    1134             :                  * If we've sent a SACK_PERMITTED option, and the peer
    1135             :                  * also replied with one, then TF_SACK_PERMIT should have
    1136             :                  * been set in tcp_dooptions().  If it was not, disable SACKs.
    1137             :                  */
    1138           0 :                 if (tp->sack_enable)
    1139           0 :                         tp->sack_enable = tp->t_flags & TF_SACK_PERMIT;
    1140             : #ifdef TCP_ECN
    1141             :                 /*
    1142             :                  * if ECE is set but CWR is not set for SYN-ACK, or
    1143             :                  * both ECE and CWR are set for simultaneous open,
    1144             :                  * peer is ECN capable.
    1145             :                  */
    1146           0 :                 if (tcp_do_ecn) {
    1147           0 :                         switch (tiflags & (TH_ACK|TH_ECE|TH_CWR)) {
    1148             :                         case TH_ACK|TH_ECE:
    1149             :                         case TH_ECE|TH_CWR:
    1150           0 :                                 tp->t_flags |= TF_ECN_PERMIT;
    1151           0 :                                 tiflags &= ~(TH_ECE|TH_CWR);
    1152           0 :                                 tcpstat_inc(tcps_ecn_accepts);
    1153           0 :                         }
    1154             :                 }
    1155             : #endif
    1156             : 
    1157           0 :                 if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
    1158           0 :                         tcpstat_inc(tcps_connects);
    1159           0 :                         tp->t_flags |= TF_BLOCKOUTPUT;
    1160           0 :                         soisconnected(so);
    1161           0 :                         tp->t_flags &= ~TF_BLOCKOUTPUT;
    1162           0 :                         tp->t_state = TCPS_ESTABLISHED;
    1163           0 :                         TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
    1164             :                         /* Do window scaling on this connection? */
    1165           0 :                         if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
    1166             :                                 (TF_RCVD_SCALE|TF_REQ_SCALE)) {
    1167           0 :                                 tp->snd_scale = tp->requested_s_scale;
    1168           0 :                                 tp->rcv_scale = tp->request_r_scale;
    1169           0 :                         }
    1170           0 :                         tcp_flush_queue(tp);
    1171             : 
    1172             :                         /*
    1173             :                          * if we didn't have to retransmit the SYN,
    1174             :                          * use its rtt as our initial srtt & rtt var.
    1175             :                          */
    1176           0 :                         if (tp->t_rtttime)
    1177           0 :                                 tcp_xmit_timer(tp, tcp_now - tp->t_rtttime);
    1178             :                         /*
    1179             :                          * Since new data was acked (the SYN), open the
    1180             :                          * congestion window by one MSS.  We do this
    1181             :                          * here, because we won't go through the normal
    1182             :                          * ACK processing below.  And since this is the
    1183             :                          * start of the connection, we know we are in
    1184             :                          * the exponential phase of slow-start.
    1185             :                          */
    1186           0 :                         tp->snd_cwnd += tp->t_maxseg;
    1187           0 :                 } else
    1188           0 :                         tp->t_state = TCPS_SYN_RECEIVED;
    1189             : 
    1190             : #if 0
    1191             : trimthenstep6:
    1192             : #endif
    1193             :                 /*
    1194             :                  * Advance th->th_seq to correspond to first data byte.
    1195             :                  * If data, trim to stay within window,
    1196             :                  * dropping FIN if necessary.
    1197             :                  */
    1198           0 :                 th->th_seq++;
    1199           0 :                 if (tlen > tp->rcv_wnd) {
    1200           0 :                         todrop = tlen - tp->rcv_wnd;
    1201           0 :                         m_adj(m, -todrop);
    1202           0 :                         tlen = tp->rcv_wnd;
    1203           0 :                         tiflags &= ~TH_FIN;
    1204           0 :                         tcpstat_pkt(tcps_rcvpackafterwin, tcps_rcvbyteafterwin,
    1205           0 :                             todrop);
    1206           0 :                 }
    1207           0 :                 tp->snd_wl1 = th->th_seq - 1;
    1208           0 :                 tp->rcv_up = th->th_seq;
    1209           0 :                 goto step6;
    1210             :         /*
    1211             :          * If a new connection request is received while in TIME_WAIT,
    1212             :          * drop the old connection and start over if the if the
    1213             :          * timestamp or the sequence numbers are above the previous
    1214             :          * ones.
    1215             :          */
    1216             :         case TCPS_TIME_WAIT:
    1217           0 :                 if (((tiflags & (TH_SYN|TH_ACK)) == TH_SYN) &&
    1218           0 :                     ((opti.ts_present &&
    1219           0 :                     TSTMP_LT(tp->ts_recent, opti.ts_val)) ||
    1220           0 :                     SEQ_GT(th->th_seq, tp->rcv_nxt))) {
    1221             : #if NPF > 0
    1222             :                         /*
    1223             :                          * The socket will be recreated but the new state
    1224             :                          * has already been linked to the socket.  Remove the
    1225             :                          * link between old socket and new state.
    1226             :                          */
    1227           0 :                         pf_inp_unlink(inp);
    1228             : #endif
    1229             :                         /*
    1230             :                         * Advance the iss by at least 32768, but
    1231             :                         * clear the msb in order to make sure
    1232             :                         * that SEG_LT(snd_nxt, iss).
    1233             :                         */
    1234           0 :                         iss = tp->snd_nxt +
    1235           0 :                             ((arc4random() & 0x7fffffff) | 0x8000);
    1236             :                         reuse = &iss;
    1237           0 :                         tp = tcp_close(tp);
    1238             :                         inp = NULL;
    1239           0 :                         goto findpcb;
    1240             :                 }
    1241             :         }
    1242             : 
    1243             :         /*
    1244             :          * States other than LISTEN or SYN_SENT.
    1245             :          * First check timestamp, if present.
    1246             :          * Then check that at least some bytes of segment are within
    1247             :          * receive window.  If segment begins before rcv_nxt,
    1248             :          * drop leading data (and SYN); if nothing left, just ack.
    1249             :          *
    1250             :          * RFC 1323 PAWS: If we have a timestamp reply on this segment
    1251             :          * and it's less than opti.ts_recent, drop it.
    1252             :          */
    1253           0 :         if (opti.ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent &&
    1254           0 :             TSTMP_LT(opti.ts_val, tp->ts_recent)) {
    1255             : 
    1256             :                 /* Check to see if ts_recent is over 24 days old.  */
    1257           0 :                 if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
    1258             :                         /*
    1259             :                          * Invalidate ts_recent.  If this segment updates
    1260             :                          * ts_recent, the age will be reset later and ts_recent
    1261             :                          * will get a valid value.  If it does not, setting
    1262             :                          * ts_recent to zero will at least satisfy the
    1263             :                          * requirement that zero be placed in the timestamp
    1264             :                          * echo reply when ts_recent isn't valid.  The
    1265             :                          * age isn't reset until we get a valid ts_recent
    1266             :                          * because we don't want out-of-order segments to be
    1267             :                          * dropped when ts_recent is old.
    1268             :                          */
    1269           0 :                         tp->ts_recent = 0;
    1270             :                 } else {
    1271           0 :                         tcpstat_pkt(tcps_rcvduppack, tcps_rcvdupbyte, tlen);
    1272           0 :                         tcpstat_inc(tcps_pawsdrop);
    1273           0 :                         goto dropafterack;
    1274             :                 }
    1275           0 :         }
    1276             : 
    1277           0 :         todrop = tp->rcv_nxt - th->th_seq;
    1278           0 :         if (todrop > 0) {
    1279           0 :                 if (tiflags & TH_SYN) {
    1280           0 :                         tiflags &= ~TH_SYN;
    1281           0 :                         th->th_seq++;
    1282           0 :                         if (th->th_urp > 1)
    1283           0 :                                 th->th_urp--;
    1284             :                         else
    1285           0 :                                 tiflags &= ~TH_URG;
    1286           0 :                         todrop--;
    1287           0 :                 }
    1288           0 :                 if (todrop > tlen ||
    1289           0 :                     (todrop == tlen && (tiflags & TH_FIN) == 0)) {
    1290             :                         /*
    1291             :                          * Any valid FIN must be to the left of the
    1292             :                          * window.  At this point, FIN must be a
    1293             :                          * duplicate or out-of-sequence, so drop it.
    1294             :                          */
    1295           0 :                         tiflags &= ~TH_FIN;
    1296             :                         /*
    1297             :                          * Send ACK to resynchronize, and drop any data,
    1298             :                          * but keep on processing for RST or ACK.
    1299             :                          */
    1300           0 :                         tp->t_flags |= TF_ACKNOW;
    1301           0 :                         todrop = tlen;
    1302           0 :                         tcpstat_pkt(tcps_rcvduppack, tcps_rcvdupbyte, todrop);
    1303           0 :                 } else {
    1304           0 :                         tcpstat_pkt(tcps_rcvpartduppack, tcps_rcvpartdupbyte,
    1305           0 :                             todrop);
    1306             :                 }
    1307           0 :                 hdroptlen += todrop;    /* drop from head afterwards */
    1308           0 :                 th->th_seq += todrop;
    1309           0 :                 tlen -= todrop;
    1310           0 :                 if (th->th_urp > todrop)
    1311           0 :                         th->th_urp -= todrop;
    1312             :                 else {
    1313           0 :                         tiflags &= ~TH_URG;
    1314           0 :                         th->th_urp = 0;
    1315             :                 }
    1316             :         }
    1317             : 
    1318             :         /*
    1319             :          * If new data are received on a connection after the
    1320             :          * user processes are gone, then RST the other end.
    1321             :          */
    1322           0 :         if ((so->so_state & SS_NOFDREF) &&
    1323           0 :             tp->t_state > TCPS_CLOSE_WAIT && tlen) {
    1324           0 :                 tp = tcp_close(tp);
    1325           0 :                 tcpstat_inc(tcps_rcvafterclose);
    1326           0 :                 goto dropwithreset;
    1327             :         }
    1328             : 
    1329             :         /*
    1330             :          * If segment ends after window, drop trailing data
    1331             :          * (and PUSH and FIN); if nothing left, just ACK.
    1332             :          */
    1333           0 :         todrop = (th->th_seq + tlen) - (tp->rcv_nxt+tp->rcv_wnd);
    1334           0 :         if (todrop > 0) {
    1335           0 :                 tcpstat_inc(tcps_rcvpackafterwin);
    1336           0 :                 if (todrop >= tlen) {
    1337           0 :                         tcpstat_add(tcps_rcvbyteafterwin, tlen);
    1338             :                         /*
    1339             :                          * If window is closed can only take segments at
    1340             :                          * window edge, and have to drop data and PUSH from
    1341             :                          * incoming segments.  Continue processing, but
    1342             :                          * remember to ack.  Otherwise, drop segment
    1343             :                          * and ack.
    1344             :                          */
    1345           0 :                         if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
    1346           0 :                                 tp->t_flags |= TF_ACKNOW;
    1347           0 :                                 tcpstat_inc(tcps_rcvwinprobe);
    1348             :                         } else
    1349             :                                 goto dropafterack;
    1350           0 :                 } else
    1351           0 :                         tcpstat_add(tcps_rcvbyteafterwin, todrop);
    1352           0 :                 m_adj(m, -todrop);
    1353           0 :                 tlen -= todrop;
    1354           0 :                 tiflags &= ~(TH_PUSH|TH_FIN);
    1355           0 :         }
    1356             : 
    1357             :         /*
    1358             :          * If last ACK falls within this segment's sequence numbers,
    1359             :          * record its timestamp if it's more recent.
    1360             :          * NOTE that the test is modified according to the latest
    1361             :          * proposal of the tcplw@cray.com list (Braden 1993/04/26).
    1362             :          */
    1363           0 :         if (opti.ts_present && TSTMP_GEQ(opti.ts_val, tp->ts_recent) &&
    1364           0 :             SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
    1365           0 :                 tp->ts_recent_age = tcp_now;
    1366           0 :                 tp->ts_recent = opti.ts_val;
    1367           0 :         }
    1368             : 
    1369             :         /*
    1370             :          * If the RST bit is set examine the state:
    1371             :          *    SYN_RECEIVED STATE:
    1372             :          *      If passive open, return to LISTEN state.
    1373             :          *      If active open, inform user that connection was refused.
    1374             :          *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
    1375             :          *      Inform user that connection was reset, and close tcb.
    1376             :          *    CLOSING, LAST_ACK, TIME_WAIT STATES
    1377             :          *      Close the tcb.
    1378             :          */
    1379           0 :         if (tiflags & TH_RST) {
    1380           0 :                 if (th->th_seq != tp->last_ack_sent &&
    1381           0 :                     th->th_seq != tp->rcv_nxt &&
    1382           0 :                     th->th_seq != (tp->rcv_nxt + 1))
    1383             :                         goto drop;
    1384             : 
    1385           0 :                 switch (tp->t_state) {
    1386             :                 case TCPS_SYN_RECEIVED:
    1387             : #ifdef TCP_ECN
    1388             :                         /* if ECN is enabled, fall back to non-ecn at rexmit */
    1389           0 :                         if (tcp_do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
    1390             :                                 goto drop;
    1391             : #endif
    1392           0 :                         so->so_error = ECONNREFUSED;
    1393           0 :                         goto close;
    1394             : 
    1395             :                 case TCPS_ESTABLISHED:
    1396             :                 case TCPS_FIN_WAIT_1:
    1397             :                 case TCPS_FIN_WAIT_2:
    1398             :                 case TCPS_CLOSE_WAIT:
    1399           0 :                         so->so_error = ECONNRESET;
    1400             :                 close:
    1401           0 :                         tp->t_state = TCPS_CLOSED;
    1402           0 :                         tcpstat_inc(tcps_drops);
    1403           0 :                         tp = tcp_close(tp);
    1404           0 :                         goto drop;
    1405             :                 case TCPS_CLOSING:
    1406             :                 case TCPS_LAST_ACK:
    1407             :                 case TCPS_TIME_WAIT:
    1408           0 :                         tp = tcp_close(tp);
    1409           0 :                         goto drop;
    1410             :                 }
    1411             :         }
    1412             : 
    1413             :         /*
    1414             :          * If a SYN is in the window, then this is an
    1415             :          * error and we ACK and drop the packet.
    1416             :          */
    1417           0 :         if (tiflags & TH_SYN)
    1418             :                 goto dropafterack_ratelim;
    1419             : 
    1420             :         /*
    1421             :          * If the ACK bit is off we drop the segment and return.
    1422             :          */
    1423           0 :         if ((tiflags & TH_ACK) == 0) {
    1424           0 :                 if (tp->t_flags & TF_ACKNOW)
    1425             :                         goto dropafterack;
    1426             :                 else
    1427             :                         goto drop;
    1428             :         }
    1429             : 
    1430             :         /*
    1431             :          * Ack processing.
    1432             :          */
    1433           0 :         switch (tp->t_state) {
    1434             : 
    1435             :         /*
    1436             :          * In SYN_RECEIVED state, the ack ACKs our SYN, so enter
    1437             :          * ESTABLISHED state and continue processing.
    1438             :          * The ACK was checked above.
    1439             :          */
    1440             :         case TCPS_SYN_RECEIVED:
    1441           0 :                 tcpstat_inc(tcps_connects);
    1442           0 :                 tp->t_flags |= TF_BLOCKOUTPUT;
    1443           0 :                 soisconnected(so);
    1444           0 :                 tp->t_flags &= ~TF_BLOCKOUTPUT;
    1445           0 :                 tp->t_state = TCPS_ESTABLISHED;
    1446           0 :                 TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
    1447             :                 /* Do window scaling? */
    1448           0 :                 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
    1449             :                         (TF_RCVD_SCALE|TF_REQ_SCALE)) {
    1450           0 :                         tp->snd_scale = tp->requested_s_scale;
    1451           0 :                         tp->rcv_scale = tp->request_r_scale;
    1452           0 :                         tiwin = th->th_win << tp->snd_scale;
    1453           0 :                 }
    1454           0 :                 tcp_flush_queue(tp);
    1455           0 :                 tp->snd_wl1 = th->th_seq - 1;
    1456             :                 /* fall into ... */
    1457             : 
    1458             :         /*
    1459             :          * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
    1460             :          * ACKs.  If the ack is in the range
    1461             :          *      tp->snd_una < th->th_ack <= tp->snd_max
    1462             :          * then advance tp->snd_una to th->th_ack and drop
    1463             :          * data from the retransmission queue.  If this ACK reflects
    1464             :          * more up to date window information we update our window information.
    1465             :          */
    1466             :         case TCPS_ESTABLISHED:
    1467             :         case TCPS_FIN_WAIT_1:
    1468             :         case TCPS_FIN_WAIT_2:
    1469             :         case TCPS_CLOSE_WAIT:
    1470             :         case TCPS_CLOSING:
    1471             :         case TCPS_LAST_ACK:
    1472             :         case TCPS_TIME_WAIT:
    1473             : #ifdef TCP_ECN
    1474             :                 /*
    1475             :                  * if we receive ECE and are not already in recovery phase,
    1476             :                  * reduce cwnd by half but don't slow-start.
    1477             :                  * advance snd_last to snd_max not to reduce cwnd again
    1478             :                  * until all outstanding packets are acked.
    1479             :                  */
    1480           0 :                 if (tcp_do_ecn && (tiflags & TH_ECE)) {
    1481           0 :                         if ((tp->t_flags & TF_ECN_PERMIT) &&
    1482           0 :                             SEQ_GEQ(tp->snd_una, tp->snd_last)) {
    1483             :                                 u_int win;
    1484             : 
    1485           0 :                                 win = min(tp->snd_wnd, tp->snd_cwnd) / tp->t_maxseg;
    1486           0 :                                 if (win > 1) {
    1487           0 :                                         tp->snd_ssthresh = win / 2 * tp->t_maxseg;
    1488           0 :                                         tp->snd_cwnd = tp->snd_ssthresh;
    1489           0 :                                         tp->snd_last = tp->snd_max;
    1490           0 :                                         tp->t_flags |= TF_SEND_CWR;
    1491           0 :                                         tcpstat_inc(tcps_cwr_ecn);
    1492           0 :                                 }
    1493           0 :                         }
    1494           0 :                         tcpstat_inc(tcps_ecn_rcvece);
    1495           0 :                 }
    1496             :                 /*
    1497             :                  * if we receive CWR, we know that the peer has reduced
    1498             :                  * its congestion window.  stop sending ecn-echo.
    1499             :                  */
    1500           0 :                 if ((tiflags & TH_CWR)) {
    1501           0 :                         tp->t_flags &= ~TF_RCVD_CE;
    1502           0 :                         tcpstat_inc(tcps_ecn_rcvcwr);
    1503           0 :                 }
    1504             : #endif /* TCP_ECN */
    1505             : 
    1506           0 :                 if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
    1507             :                         /*
    1508             :                          * Duplicate/old ACK processing.
    1509             :                          * Increments t_dupacks:
    1510             :                          *      Pure duplicate (same seq/ack/window, no data)
    1511             :                          * Doesn't affect t_dupacks:
    1512             :                          *      Data packets.
    1513             :                          *      Normal window updates (window opens)
    1514             :                          * Resets t_dupacks:
    1515             :                          *      New data ACKed.
    1516             :                          *      Window shrinks
    1517             :                          *      Old ACK
    1518             :                          */
    1519           0 :                         if (tlen) {
    1520             :                                 /* Drop very old ACKs unless th_seq matches */
    1521           0 :                                 if (th->th_seq != tp->rcv_nxt &&
    1522           0 :                                    SEQ_LT(th->th_ack,
    1523             :                                    tp->snd_una - tp->max_sndwnd)) {
    1524           0 :                                         tcpstat_inc(tcps_rcvacktooold);
    1525           0 :                                         goto drop;
    1526             :                                 }
    1527             :                                 break;
    1528             :                         }
    1529             :                         /*
    1530             :                          * If we get an old ACK, there is probably packet
    1531             :                          * reordering going on.  Be conservative and reset
    1532             :                          * t_dupacks so that we are less aggressive in
    1533             :                          * doing a fast retransmit.
    1534             :                          */
    1535           0 :                         if (th->th_ack != tp->snd_una) {
    1536           0 :                                 tp->t_dupacks = 0;
    1537           0 :                                 break;
    1538             :                         }
    1539           0 :                         if (tiwin == tp->snd_wnd) {
    1540           0 :                                 tcpstat_inc(tcps_rcvdupack);
    1541             :                                 /*
    1542             :                                  * If we have outstanding data (other than
    1543             :                                  * a window probe), this is a completely
    1544             :                                  * duplicate ack (ie, window info didn't
    1545             :                                  * change), the ack is the biggest we've
    1546             :                                  * seen and we've seen exactly our rexmt
    1547             :                                  * threshold of them, assume a packet
    1548             :                                  * has been dropped and retransmit it.
    1549             :                                  * Kludge snd_nxt & the congestion
    1550             :                                  * window so we send only this one
    1551             :                                  * packet.
    1552             :                                  *
    1553             :                                  * We know we're losing at the current
    1554             :                                  * window size so do congestion avoidance
    1555             :                                  * (set ssthresh to half the current window
    1556             :                                  * and pull our congestion window back to
    1557             :                                  * the new ssthresh).
    1558             :                                  *
    1559             :                                  * Dup acks mean that packets have left the
    1560             :                                  * network (they're now cached at the receiver)
    1561             :                                  * so bump cwnd by the amount in the receiver
    1562             :                                  * to keep a constant cwnd packets in the
    1563             :                                  * network.
    1564             :                                  */
    1565           0 :                                 if (TCP_TIMER_ISARMED(tp, TCPT_REXMT) == 0)
    1566           0 :                                         tp->t_dupacks = 0;
    1567           0 :                                 else if (++tp->t_dupacks == tcprexmtthresh) {
    1568           0 :                                         tcp_seq onxt = tp->snd_nxt;
    1569             :                                         u_long win =
    1570           0 :                                             ulmin(tp->snd_wnd, tp->snd_cwnd) /
    1571           0 :                                                 2 / tp->t_maxseg;
    1572             : 
    1573           0 :                                         if (SEQ_LT(th->th_ack, tp->snd_last)){
    1574             :                                                 /*
    1575             :                                                  * False fast retx after
    1576             :                                                  * timeout.  Do not cut window.
    1577             :                                                  */
    1578           0 :                                                 tp->t_dupacks = 0;
    1579           0 :                                                 goto drop;
    1580             :                                         }
    1581           0 :                                         if (win < 2)
    1582           0 :                                                 win = 2;
    1583           0 :                                         tp->snd_ssthresh = win * tp->t_maxseg;
    1584           0 :                                         tp->snd_last = tp->snd_max;
    1585           0 :                                         if (tp->sack_enable) {
    1586             :                                                 TCP_TIMER_DISARM(tp, TCPT_REXMT);
    1587             :                                                 tp->t_rtttime = 0;
    1588             : #ifdef TCP_ECN
    1589           0 :                                                 tp->t_flags |= TF_SEND_CWR;
    1590             : #endif
    1591           0 :                                                 tcpstat_inc(tcps_cwr_frecovery);
    1592           0 :                                                 tcpstat_inc(tcps_sack_recovery_episode);
    1593             :                                                 /*
    1594             :                                                  * tcp_output() will send
    1595             :                                                  * oldest SACK-eligible rtx.
    1596             :                                                  */
    1597           0 :                                                 (void) tcp_output(tp);
    1598           0 :                                                 tp->snd_cwnd = tp->snd_ssthresh+
    1599           0 :                                                    tp->t_maxseg * tp->t_dupacks;
    1600           0 :                                                 goto drop;
    1601             :                                         }
    1602             :                                         TCP_TIMER_DISARM(tp, TCPT_REXMT);
    1603             :                                         tp->t_rtttime = 0;
    1604           0 :                                         tp->snd_nxt = th->th_ack;
    1605           0 :                                         tp->snd_cwnd = tp->t_maxseg;
    1606             : #ifdef TCP_ECN
    1607           0 :                                         tp->t_flags |= TF_SEND_CWR;
    1608             : #endif
    1609           0 :                                         tcpstat_inc(tcps_cwr_frecovery);
    1610           0 :                                         tcpstat_inc(tcps_sndrexmitfast);
    1611           0 :                                         (void) tcp_output(tp);
    1612             : 
    1613           0 :                                         tp->snd_cwnd = tp->snd_ssthresh +
    1614           0 :                                             tp->t_maxseg * tp->t_dupacks;
    1615           0 :                                         if (SEQ_GT(onxt, tp->snd_nxt))
    1616           0 :                                                 tp->snd_nxt = onxt;
    1617           0 :                                         goto drop;
    1618           0 :                                 } else if (tp->t_dupacks > tcprexmtthresh) {
    1619           0 :                                         tp->snd_cwnd += tp->t_maxseg;
    1620           0 :                                         (void) tcp_output(tp);
    1621           0 :                                         goto drop;
    1622             :                                 }
    1623           0 :                         } else if (tiwin < tp->snd_wnd) {
    1624             :                                 /*
    1625             :                                  * The window was retracted!  Previous dup
    1626             :                                  * ACKs may have been due to packets arriving
    1627             :                                  * after the shrunken window, not a missing
    1628             :                                  * packet, so play it safe and reset t_dupacks
    1629             :                                  */
    1630           0 :                                 tp->t_dupacks = 0;
    1631           0 :                         }
    1632             :                         break;
    1633             :                 }
    1634             :                 /*
    1635             :                  * If the congestion window was inflated to account
    1636             :                  * for the other side's cached packets, retract it.
    1637             :                  */
    1638           0 :                 if (tp->t_dupacks >= tcprexmtthresh) {
    1639             :                         /* Check for a partial ACK */
    1640           0 :                         if (SEQ_LT(th->th_ack, tp->snd_last)) {
    1641           0 :                                 if (tp->sack_enable)
    1642           0 :                                         tcp_sack_partialack(tp, th);
    1643             :                                 else
    1644           0 :                                         tcp_newreno_partialack(tp, th);
    1645             :                         } else {
    1646             :                                 /* Out of fast recovery */
    1647           0 :                                 tp->snd_cwnd = tp->snd_ssthresh;
    1648           0 :                                 if (tcp_seq_subtract(tp->snd_max, th->th_ack) <
    1649           0 :                                     tp->snd_ssthresh)
    1650           0 :                                         tp->snd_cwnd =
    1651           0 :                                             tcp_seq_subtract(tp->snd_max,
    1652           0 :                                             th->th_ack);
    1653           0 :                                 tp->t_dupacks = 0;
    1654             :                         }
    1655             :                 } else {
    1656             :                         /*
    1657             :                          * Reset the duplicate ACK counter if we
    1658             :                          * were not in fast recovery.
    1659             :                          */
    1660           0 :                         tp->t_dupacks = 0;
    1661             :                 }
    1662           0 :                 if (SEQ_GT(th->th_ack, tp->snd_max)) {
    1663           0 :                         tcpstat_inc(tcps_rcvacktoomuch);
    1664           0 :                         goto dropafterack_ratelim;
    1665             :                 }
    1666           0 :                 acked = th->th_ack - tp->snd_una;
    1667           0 :                 tcpstat_pkt(tcps_rcvackpack, tcps_rcvackbyte, acked);
    1668             : 
    1669             :                 /*
    1670             :                  * If we have a timestamp reply, update smoothed
    1671             :                  * round trip time.  If no timestamp is present but
    1672             :                  * transmit timer is running and timed sequence
    1673             :                  * number was acked, update smoothed round trip time.
    1674             :                  * Since we now have an rtt measurement, cancel the
    1675             :                  * timer backoff (cf., Phil Karn's retransmit alg.).
    1676             :                  * Recompute the initial retransmit timer.
    1677             :                  */
    1678           0 :                 if (opti.ts_present && opti.ts_ecr)
    1679           0 :                         tcp_xmit_timer(tp, tcp_now - opti.ts_ecr);
    1680           0 :                 else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq))
    1681           0 :                         tcp_xmit_timer(tp, tcp_now - tp->t_rtttime);
    1682             : 
    1683             :                 /*
    1684             :                  * If all outstanding data is acked, stop retransmit
    1685             :                  * timer and remember to restart (more output or persist).
    1686             :                  * If there is more data to be acked, restart retransmit
    1687             :                  * timer, using current (possibly backed-off) value.
    1688             :                  */
    1689           0 :                 if (th->th_ack == tp->snd_max) {
    1690           0 :                         TCP_TIMER_DISARM(tp, TCPT_REXMT);
    1691           0 :                         tp->t_flags |= TF_NEEDOUTPUT;
    1692           0 :                 } else if (TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0)
    1693           0 :                         TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
    1694             :                 /*
    1695             :                  * When new data is acked, open the congestion window.
    1696             :                  * If the window gives us less than ssthresh packets
    1697             :                  * in flight, open exponentially (maxseg per packet).
    1698             :                  * Otherwise open linearly: maxseg per window
    1699             :                  * (maxseg^2 / cwnd per packet).
    1700             :                  */
    1701             :                 {
    1702           0 :                 u_int cw = tp->snd_cwnd;
    1703           0 :                 u_int incr = tp->t_maxseg;
    1704             : 
    1705           0 :                 if (cw > tp->snd_ssthresh)
    1706           0 :                         incr = incr * incr / cw;
    1707           0 :                 if (tp->t_dupacks < tcprexmtthresh)
    1708           0 :                         tp->snd_cwnd = ulmin(cw + incr,
    1709           0 :                             TCP_MAXWIN << tp->snd_scale);
    1710             :                 }
    1711           0 :                 ND6_HINT(tp);
    1712           0 :                 if (acked > so->so_snd.sb_cc) {
    1713           0 :                         tp->snd_wnd -= so->so_snd.sb_cc;
    1714           0 :                         sbdrop(so, &so->so_snd, (int)so->so_snd.sb_cc);
    1715             :                         ourfinisacked = 1;
    1716           0 :                 } else {
    1717           0 :                         sbdrop(so, &so->so_snd, acked);
    1718           0 :                         tp->snd_wnd -= acked;
    1719             :                         ourfinisacked = 0;
    1720             :                 }
    1721             : 
    1722           0 :                 tcp_update_sndspace(tp);
    1723           0 :                 if (sb_notify(so, &so->so_snd)) {
    1724           0 :                         tp->t_flags |= TF_BLOCKOUTPUT;
    1725           0 :                         sowwakeup(so);
    1726           0 :                         tp->t_flags &= ~TF_BLOCKOUTPUT;
    1727           0 :                 }
    1728             : 
    1729             :                 /*
    1730             :                  * If we had a pending ICMP message that referred to data
    1731             :                  * that have just been acknowledged, disregard the recorded
    1732             :                  * ICMP message.
    1733             :                  */
    1734           0 :                 if ((tp->t_flags & TF_PMTUD_PEND) &&
    1735           0 :                     SEQ_GT(th->th_ack, tp->t_pmtud_th_seq))
    1736           0 :                         tp->t_flags &= ~TF_PMTUD_PEND;
    1737             : 
    1738             :                 /*
    1739             :                  * Keep track of the largest chunk of data acknowledged
    1740             :                  * since last PMTU update
    1741             :                  */
    1742           0 :                 if (tp->t_pmtud_mss_acked < acked)
    1743           0 :                         tp->t_pmtud_mss_acked = acked;
    1744             : 
    1745           0 :                 tp->snd_una = th->th_ack;
    1746             : #ifdef TCP_ECN
    1747             :                 /* sync snd_last with snd_una */
    1748           0 :                 if (SEQ_GT(tp->snd_una, tp->snd_last))
    1749           0 :                         tp->snd_last = tp->snd_una;
    1750             : #endif
    1751           0 :                 if (SEQ_LT(tp->snd_nxt, tp->snd_una))
    1752           0 :                         tp->snd_nxt = tp->snd_una;
    1753             : 
    1754           0 :                 switch (tp->t_state) {
    1755             : 
    1756             :                 /*
    1757             :                  * In FIN_WAIT_1 STATE in addition to the processing
    1758             :                  * for the ESTABLISHED state if our FIN is now acknowledged
    1759             :                  * then enter FIN_WAIT_2.
    1760             :                  */
    1761             :                 case TCPS_FIN_WAIT_1:
    1762           0 :                         if (ourfinisacked) {
    1763             :                                 /*
    1764             :                                  * If we can't receive any more
    1765             :                                  * data, then closing user can proceed.
    1766             :                                  * Starting the timer is contrary to the
    1767             :                                  * specification, but if we don't get a FIN
    1768             :                                  * we'll hang forever.
    1769             :                                  */
    1770           0 :                                 if (so->so_state & SS_CANTRCVMORE) {
    1771           0 :                                         tp->t_flags |= TF_BLOCKOUTPUT;
    1772           0 :                                         soisdisconnected(so);
    1773           0 :                                         tp->t_flags &= ~TF_BLOCKOUTPUT;
    1774           0 :                                         TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle);
    1775           0 :                                 }
    1776           0 :                                 tp->t_state = TCPS_FIN_WAIT_2;
    1777           0 :                         }
    1778             :                         break;
    1779             : 
    1780             :                 /*
    1781             :                  * In CLOSING STATE in addition to the processing for
    1782             :                  * the ESTABLISHED state if the ACK acknowledges our FIN
    1783             :                  * then enter the TIME-WAIT state, otherwise ignore
    1784             :                  * the segment.
    1785             :                  */
    1786             :                 case TCPS_CLOSING:
    1787           0 :                         if (ourfinisacked) {
    1788           0 :                                 tp->t_state = TCPS_TIME_WAIT;
    1789           0 :                                 tcp_canceltimers(tp);
    1790           0 :                                 TCP_TIMER_ARM(tp, TCPT_2MSL, 2 * TCPTV_MSL);
    1791           0 :                                 tp->t_flags |= TF_BLOCKOUTPUT;
    1792           0 :                                 soisdisconnected(so);
    1793           0 :                                 tp->t_flags &= ~TF_BLOCKOUTPUT;
    1794           0 :                         }
    1795             :                         break;
    1796             : 
    1797             :                 /*
    1798             :                  * In LAST_ACK, we may still be waiting for data to drain
    1799             :                  * and/or to be acked, as well as for the ack of our FIN.
    1800             :                  * If our FIN is now acknowledged, delete the TCB,
    1801             :                  * enter the closed state and return.
    1802             :                  */
    1803             :                 case TCPS_LAST_ACK:
    1804           0 :                         if (ourfinisacked) {
    1805           0 :                                 tp = tcp_close(tp);
    1806           0 :                                 goto drop;
    1807             :                         }
    1808             :                         break;
    1809             : 
    1810             :                 /*
    1811             :                  * In TIME_WAIT state the only thing that should arrive
    1812             :                  * is a retransmission of the remote FIN.  Acknowledge
    1813             :                  * it and restart the finack timer.
    1814             :                  */
    1815             :                 case TCPS_TIME_WAIT:
    1816           0 :                         TCP_TIMER_ARM(tp, TCPT_2MSL, 2 * TCPTV_MSL);
    1817           0 :                         goto dropafterack;
    1818             :                 }
    1819             :         }
    1820             : 
    1821             : step6:
    1822             :         /*
    1823             :          * Update window information.
    1824             :          * Don't look at window if no ACK: TAC's send garbage on first SYN.
    1825             :          */
    1826           0 :         if ((tiflags & TH_ACK) &&
    1827           0 :             (SEQ_LT(tp->snd_wl1, th->th_seq) || (tp->snd_wl1 == th->th_seq &&
    1828           0 :             (SEQ_LT(tp->snd_wl2, th->th_ack) ||
    1829           0 :             (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
    1830             :                 /* keep track of pure window updates */
    1831           0 :                 if (tlen == 0 &&
    1832           0 :                     tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
    1833           0 :                         tcpstat_inc(tcps_rcvwinupd);
    1834           0 :                 tp->snd_wnd = tiwin;
    1835           0 :                 tp->snd_wl1 = th->th_seq;
    1836           0 :                 tp->snd_wl2 = th->th_ack;
    1837           0 :                 if (tp->snd_wnd > tp->max_sndwnd)
    1838           0 :                         tp->max_sndwnd = tp->snd_wnd;
    1839           0 :                 tp->t_flags |= TF_NEEDOUTPUT;
    1840           0 :         }
    1841             : 
    1842             :         /*
    1843             :          * Process segments with URG.
    1844             :          */
    1845           0 :         if ((tiflags & TH_URG) && th->th_urp &&
    1846           0 :             TCPS_HAVERCVDFIN(tp->t_state) == 0) {
    1847             :                 /*
    1848             :                  * This is a kludge, but if we receive and accept
    1849             :                  * random urgent pointers, we'll crash in
    1850             :                  * soreceive.  It's hard to imagine someone
    1851             :                  * actually wanting to send this much urgent data.
    1852             :                  */
    1853           0 :                 if (th->th_urp + so->so_rcv.sb_cc > sb_max) {
    1854           0 :                         th->th_urp = 0;                      /* XXX */
    1855           0 :                         tiflags &= ~TH_URG;         /* XXX */
    1856           0 :                         goto dodata;                    /* XXX */
    1857             :                 }
    1858             :                 /*
    1859             :                  * If this segment advances the known urgent pointer,
    1860             :                  * then mark the data stream.  This should not happen
    1861             :                  * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
    1862             :                  * a FIN has been received from the remote side.
    1863             :                  * In these states we ignore the URG.
    1864             :                  *
    1865             :                  * According to RFC961 (Assigned Protocols),
    1866             :                  * the urgent pointer points to the last octet
    1867             :                  * of urgent data.  We continue, however,
    1868             :                  * to consider it to indicate the first octet
    1869             :                  * of data past the urgent section as the original
    1870             :                  * spec states (in one of two places).
    1871             :                  */
    1872           0 :                 if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
    1873           0 :                         tp->rcv_up = th->th_seq + th->th_urp;
    1874           0 :                         so->so_oobmark = so->so_rcv.sb_cc +
    1875           0 :                             (tp->rcv_up - tp->rcv_nxt) - 1;
    1876           0 :                         if (so->so_oobmark == 0)
    1877           0 :                                 so->so_state |= SS_RCVATMARK;
    1878           0 :                         sohasoutofband(so);
    1879           0 :                         tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
    1880           0 :                 }
    1881             :                 /*
    1882             :                  * Remove out of band data so doesn't get presented to user.
    1883             :                  * This can happen independent of advancing the URG pointer,
    1884             :                  * but if two URG's are pending at once, some out-of-band
    1885             :                  * data may creep in... ick.
    1886             :                  */
    1887           0 :                 if (th->th_urp <= (u_int16_t) tlen &&
    1888           0 :                     (so->so_options & SO_OOBINLINE) == 0)
    1889           0 :                         tcp_pulloutofband(so, th->th_urp, m, hdroptlen);
    1890             :         } else
    1891             :                 /*
    1892             :                  * If no out of band data is expected,
    1893             :                  * pull receive urgent pointer along
    1894             :                  * with the receive window.
    1895             :                  */
    1896           0 :                 if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
    1897           0 :                         tp->rcv_up = tp->rcv_nxt;
    1898             : dodata:                                                 /* XXX */
    1899             : 
    1900             :         /*
    1901             :          * Process the segment text, merging it into the TCP sequencing queue,
    1902             :          * and arranging for acknowledgment of receipt if necessary.
    1903             :          * This process logically involves adjusting tp->rcv_wnd as data
    1904             :          * is presented to the user (this happens in tcp_usrreq.c,
    1905             :          * case PRU_RCVD).  If a FIN has already been received on this
    1906             :          * connection then we just ignore the text.
    1907             :          */
    1908           0 :         if ((tlen || (tiflags & TH_FIN)) &&
    1909           0 :             TCPS_HAVERCVDFIN(tp->t_state) == 0) {
    1910           0 :                 tcp_seq laststart = th->th_seq;
    1911           0 :                 tcp_seq lastend = th->th_seq + tlen;
    1912             : 
    1913           0 :                 if (th->th_seq == tp->rcv_nxt && TAILQ_EMPTY(&tp->t_segq) &&
    1914           0 :                     tp->t_state == TCPS_ESTABLISHED) {
    1915           0 :                         TCP_SETUP_ACK(tp, tiflags, m);
    1916           0 :                         tp->rcv_nxt += tlen;
    1917           0 :                         tiflags = th->th_flags & TH_FIN;
    1918           0 :                         tcpstat_pkt(tcps_rcvpack, tcps_rcvbyte, tlen);
    1919           0 :                         ND6_HINT(tp);
    1920           0 :                         if (so->so_state & SS_CANTRCVMORE)
    1921           0 :                                 m_freem(m);
    1922             :                         else {
    1923           0 :                                 m_adj(m, hdroptlen);
    1924           0 :                                 sbappendstream(so, &so->so_rcv, m);
    1925             :                         }
    1926           0 :                         tp->t_flags |= TF_BLOCKOUTPUT;
    1927           0 :                         sorwakeup(so);
    1928           0 :                         tp->t_flags &= ~TF_BLOCKOUTPUT;
    1929           0 :                 } else {
    1930           0 :                         m_adj(m, hdroptlen);
    1931           0 :                         tiflags = tcp_reass(tp, th, m, &tlen);
    1932           0 :                         tp->t_flags |= TF_ACKNOW;
    1933             :                 }
    1934           0 :                 if (tp->sack_enable)
    1935           0 :                         tcp_update_sack_list(tp, laststart, lastend);
    1936             : 
    1937             :                 /*
    1938             :                  * variable len never referenced again in modern BSD,
    1939             :                  * so why bother computing it ??
    1940             :                  */
    1941             : #if 0
    1942             :                 /*
    1943             :                  * Note the amount of data that peer has sent into
    1944             :                  * our window, in order to estimate the sender's
    1945             :                  * buffer size.
    1946             :                  */
    1947             :                 len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
    1948             : #endif /* 0 */
    1949           0 :         } else {
    1950           0 :                 m_freem(m);
    1951           0 :                 tiflags &= ~TH_FIN;
    1952             :         }
    1953             : 
    1954             :         /*
    1955             :          * If FIN is received ACK the FIN and let the user know
    1956             :          * that the connection is closing.  Ignore a FIN received before
    1957             :          * the connection is fully established.
    1958             :          */
    1959           0 :         if ((tiflags & TH_FIN) && TCPS_HAVEESTABLISHED(tp->t_state)) {
    1960           0 :                 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
    1961           0 :                         tp->t_flags |= TF_BLOCKOUTPUT;
    1962           0 :                         socantrcvmore(so);
    1963           0 :                         tp->t_flags &= ~TF_BLOCKOUTPUT;
    1964           0 :                         tp->t_flags |= TF_ACKNOW;
    1965           0 :                         tp->rcv_nxt++;
    1966           0 :                 }
    1967           0 :                 switch (tp->t_state) {
    1968             : 
    1969             :                 /*
    1970             :                  * In ESTABLISHED STATE enter the CLOSE_WAIT state.
    1971             :                  */
    1972             :                 case TCPS_ESTABLISHED:
    1973           0 :                         tp->t_state = TCPS_CLOSE_WAIT;
    1974           0 :                         break;
    1975             : 
    1976             :                 /*
    1977             :                  * If still in FIN_WAIT_1 STATE FIN has not been acked so
    1978             :                  * enter the CLOSING state.
    1979             :                  */
    1980             :                 case TCPS_FIN_WAIT_1:
    1981           0 :                         tp->t_state = TCPS_CLOSING;
    1982           0 :                         break;
    1983             : 
    1984             :                 /*
    1985             :                  * In FIN_WAIT_2 state enter the TIME_WAIT state,
    1986             :                  * starting the time-wait timer, turning off the other
    1987             :                  * standard timers.
    1988             :                  */
    1989             :                 case TCPS_FIN_WAIT_2:
    1990           0 :                         tp->t_state = TCPS_TIME_WAIT;
    1991           0 :                         tcp_canceltimers(tp);
    1992           0 :                         TCP_TIMER_ARM(tp, TCPT_2MSL, 2 * TCPTV_MSL);
    1993           0 :                         tp->t_flags |= TF_BLOCKOUTPUT;
    1994           0 :                         soisdisconnected(so);
    1995           0 :                         tp->t_flags &= ~TF_BLOCKOUTPUT;
    1996           0 :                         break;
    1997             : 
    1998             :                 /*
    1999             :                  * In TIME_WAIT state restart the 2 MSL time_wait timer.
    2000             :                  */
    2001             :                 case TCPS_TIME_WAIT:
    2002           0 :                         TCP_TIMER_ARM(tp, TCPT_2MSL, 2 * TCPTV_MSL);
    2003           0 :                         break;
    2004             :                 }
    2005             :         }
    2006           0 :         if (otp)
    2007           0 :                 tcp_trace(TA_INPUT, ostate, tp, otp, saveti, 0, tlen);
    2008             : 
    2009             :         /*
    2010             :          * Return any desired output.
    2011             :          */
    2012           0 :         if (tp->t_flags & (TF_ACKNOW|TF_NEEDOUTPUT))
    2013           0 :                 (void) tcp_output(tp);
    2014           0 :         return IPPROTO_DONE;
    2015             : 
    2016             : badsyn:
    2017             :         /*
    2018             :          * Received a bad SYN.  Increment counters and dropwithreset.
    2019             :          */
    2020           0 :         tcpstat_inc(tcps_badsyn);
    2021             :         tp = NULL;
    2022           0 :         goto dropwithreset;
    2023             : 
    2024             : dropafterack_ratelim:
    2025           0 :         if (ppsratecheck(&tcp_ackdrop_ppslim_last, &tcp_ackdrop_ppslim_count,
    2026           0 :             tcp_ackdrop_ppslim) == 0) {
    2027             :                 /* XXX stat */
    2028             :                 goto drop;
    2029             :         }
    2030             :         /* ...fall into dropafterack... */
    2031             : 
    2032             : dropafterack:
    2033             :         /*
    2034             :          * Generate an ACK dropping incoming segment if it occupies
    2035             :          * sequence space, where the ACK reflects our state.
    2036             :          */
    2037           0 :         if (tiflags & TH_RST)
    2038             :                 goto drop;
    2039           0 :         m_freem(m);
    2040           0 :         tp->t_flags |= TF_ACKNOW;
    2041           0 :         (void) tcp_output(tp);
    2042           0 :         return IPPROTO_DONE;
    2043             : 
    2044             : dropwithreset_ratelim:
    2045             :         /*
    2046             :          * We may want to rate-limit RSTs in certain situations,
    2047             :          * particularly if we are sending an RST in response to
    2048             :          * an attempt to connect to or otherwise communicate with
    2049             :          * a port for which we have no socket.
    2050             :          */
    2051           0 :         if (ppsratecheck(&tcp_rst_ppslim_last, &tcp_rst_ppslim_count,
    2052           0 :             tcp_rst_ppslim) == 0) {
    2053             :                 /* XXX stat */
    2054             :                 goto drop;
    2055             :         }
    2056             :         /* ...fall into dropwithreset... */
    2057             : 
    2058             : dropwithreset:
    2059             :         /*
    2060             :          * Generate a RST, dropping incoming segment.
    2061             :          * Make ACK acceptable to originator of segment.
    2062             :          * Don't bother to respond to RST.
    2063             :          */
    2064           0 :         if (tiflags & TH_RST)
    2065             :                 goto drop;
    2066           0 :         if (tiflags & TH_ACK) {
    2067           0 :                 tcp_respond(tp, mtod(m, caddr_t), th, (tcp_seq)0, th->th_ack,
    2068           0 :                     TH_RST, m->m_pkthdr.ph_rtableid);
    2069           0 :         } else {
    2070           0 :                 if (tiflags & TH_SYN)
    2071           0 :                         tlen++;
    2072           0 :                 tcp_respond(tp, mtod(m, caddr_t), th, th->th_seq + tlen,
    2073           0 :                     (tcp_seq)0, TH_RST|TH_ACK, m->m_pkthdr.ph_rtableid);
    2074             :         }
    2075           0 :         m_freem(m);
    2076           0 :         return IPPROTO_DONE;
    2077             : 
    2078             : drop:
    2079             :         /*
    2080             :          * Drop space held by incoming segment and return.
    2081             :          */
    2082           0 :         if (otp)
    2083           0 :                 tcp_trace(TA_DROP, ostate, tp, otp, saveti, 0, tlen);
    2084             : 
    2085           0 :         m_freem(m);
    2086           0 :         return IPPROTO_DONE;
    2087           0 : }
    2088             : 
    2089             : int
    2090           0 : tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt, struct tcphdr *th,
    2091             :     struct mbuf *m, int iphlen, struct tcp_opt_info *oi,
    2092             :     u_int rtableid)
    2093             : {
    2094             :         u_int16_t mss = 0;
    2095             :         int opt, optlen;
    2096             : #ifdef TCP_SIGNATURE
    2097             :         caddr_t sigp = NULL;
    2098             :         struct tdb *tdb = NULL;
    2099             : #endif /* TCP_SIGNATURE */
    2100             : 
    2101           0 :         for (; cp && cnt > 0; cnt -= optlen, cp += optlen) {
    2102           0 :                 opt = cp[0];
    2103           0 :                 if (opt == TCPOPT_EOL)
    2104             :                         break;
    2105           0 :                 if (opt == TCPOPT_NOP)
    2106           0 :                         optlen = 1;
    2107             :                 else {
    2108           0 :                         if (cnt < 2)
    2109             :                                 break;
    2110           0 :                         optlen = cp[1];
    2111           0 :                         if (optlen < 2 || optlen > cnt)
    2112             :                                 break;
    2113             :                 }
    2114           0 :                 switch (opt) {
    2115             : 
    2116             :                 default:
    2117             :                         continue;
    2118             : 
    2119             :                 case TCPOPT_MAXSEG:
    2120           0 :                         if (optlen != TCPOLEN_MAXSEG)
    2121             :                                 continue;
    2122           0 :                         if (!(th->th_flags & TH_SYN))
    2123             :                                 continue;
    2124           0 :                         if (TCPS_HAVERCVDSYN(tp->t_state))
    2125             :                                 continue;
    2126           0 :                         memcpy(&mss, cp + 2, sizeof(mss));
    2127           0 :                         mss = ntohs(mss);
    2128           0 :                         oi->maxseg = mss;
    2129           0 :                         break;
    2130             : 
    2131             :                 case TCPOPT_WINDOW:
    2132           0 :                         if (optlen != TCPOLEN_WINDOW)
    2133             :                                 continue;
    2134           0 :                         if (!(th->th_flags & TH_SYN))
    2135             :                                 continue;
    2136           0 :                         if (TCPS_HAVERCVDSYN(tp->t_state))
    2137             :                                 continue;
    2138           0 :                         tp->t_flags |= TF_RCVD_SCALE;
    2139           0 :                         tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
    2140           0 :                         break;
    2141             : 
    2142             :                 case TCPOPT_TIMESTAMP:
    2143           0 :                         if (optlen != TCPOLEN_TIMESTAMP)
    2144             :                                 continue;
    2145           0 :                         oi->ts_present = 1;
    2146           0 :                         memcpy(&oi->ts_val, cp + 2, sizeof(oi->ts_val));
    2147           0 :                         oi->ts_val = ntohl(oi->ts_val);
    2148           0 :                         memcpy(&oi->ts_ecr, cp + 6, sizeof(oi->ts_ecr));
    2149           0 :                         oi->ts_ecr = ntohl(oi->ts_ecr);
    2150             : 
    2151           0 :                         if (!(th->th_flags & TH_SYN))
    2152             :                                 continue;
    2153           0 :                         if (TCPS_HAVERCVDSYN(tp->t_state))
    2154             :                                 continue;
    2155             :                         /*
    2156             :                          * A timestamp received in a SYN makes
    2157             :                          * it ok to send timestamp requests and replies.
    2158             :                          */
    2159           0 :                         tp->t_flags |= TF_RCVD_TSTMP;
    2160           0 :                         tp->ts_recent = oi->ts_val;
    2161           0 :                         tp->ts_recent_age = tcp_now;
    2162           0 :                         break;
    2163             : 
    2164             :                 case TCPOPT_SACK_PERMITTED:
    2165           0 :                         if (!tp->sack_enable || optlen!=TCPOLEN_SACK_PERMITTED)
    2166             :                                 continue;
    2167           0 :                         if (!(th->th_flags & TH_SYN))
    2168             :                                 continue;
    2169           0 :                         if (TCPS_HAVERCVDSYN(tp->t_state))
    2170             :                                 continue;
    2171             :                         /* MUST only be set on SYN */
    2172           0 :                         tp->t_flags |= TF_SACK_PERMIT;
    2173           0 :                         break;
    2174             :                 case TCPOPT_SACK:
    2175           0 :                         tcp_sack_option(tp, th, cp, optlen);
    2176           0 :                         break;
    2177             : #ifdef TCP_SIGNATURE
    2178             :                 case TCPOPT_SIGNATURE:
    2179           0 :                         if (optlen != TCPOLEN_SIGNATURE)
    2180             :                                 continue;
    2181             : 
    2182           0 :                         if (sigp && timingsafe_bcmp(sigp, cp + 2, 16))
    2183           0 :                                 return (-1);
    2184             : 
    2185           0 :                         sigp = cp + 2;
    2186           0 :                         break;
    2187             : #endif /* TCP_SIGNATURE */
    2188             :                 }
    2189             :         }
    2190             : 
    2191             : #ifdef TCP_SIGNATURE
    2192           0 :         if (tp->t_flags & TF_SIGNATURE) {
    2193           0 :                 union sockaddr_union src, dst;
    2194             : 
    2195           0 :                 memset(&src, 0, sizeof(union sockaddr_union));
    2196           0 :                 memset(&dst, 0, sizeof(union sockaddr_union));
    2197             : 
    2198           0 :                 switch (tp->pf) {
    2199             :                 case 0:
    2200             :                 case AF_INET:
    2201           0 :                         src.sa.sa_len = sizeof(struct sockaddr_in);
    2202           0 :                         src.sa.sa_family = AF_INET;
    2203           0 :                         src.sin.sin_addr = mtod(m, struct ip *)->ip_src;
    2204           0 :                         dst.sa.sa_len = sizeof(struct sockaddr_in);
    2205           0 :                         dst.sa.sa_family = AF_INET;
    2206           0 :                         dst.sin.sin_addr = mtod(m, struct ip *)->ip_dst;
    2207           0 :                         break;
    2208             : #ifdef INET6
    2209             :                 case AF_INET6:
    2210           0 :                         src.sa.sa_len = sizeof(struct sockaddr_in6);
    2211           0 :                         src.sa.sa_family = AF_INET6;
    2212           0 :                         src.sin6.sin6_addr = mtod(m, struct ip6_hdr *)->ip6_src;
    2213           0 :                         dst.sa.sa_len = sizeof(struct sockaddr_in6);
    2214           0 :                         dst.sa.sa_family = AF_INET6;
    2215           0 :                         dst.sin6.sin6_addr = mtod(m, struct ip6_hdr *)->ip6_dst;
    2216           0 :                         break;
    2217             : #endif /* INET6 */
    2218             :                 }
    2219             : 
    2220           0 :                 tdb = gettdbbysrcdst(rtable_l2(rtableid),
    2221             :                     0, &src, &dst, IPPROTO_TCP);
    2222             : 
    2223             :                 /*
    2224             :                  * We don't have an SA for this peer, so we turn off
    2225             :                  * TF_SIGNATURE on the listen socket
    2226             :                  */
    2227           0 :                 if (tdb == NULL && tp->t_state == TCPS_LISTEN)
    2228           0 :                         tp->t_flags &= ~TF_SIGNATURE;
    2229             : 
    2230           0 :         }
    2231             : 
    2232           0 :         if ((sigp ? TF_SIGNATURE : 0) ^ (tp->t_flags & TF_SIGNATURE)) {
    2233           0 :                 tcpstat_inc(tcps_rcvbadsig);
    2234           0 :                 return (-1);
    2235             :         }
    2236             : 
    2237           0 :         if (sigp) {
    2238           0 :                 char sig[16];
    2239             : 
    2240           0 :                 if (tdb == NULL) {
    2241           0 :                         tcpstat_inc(tcps_rcvbadsig);
    2242           0 :                         return (-1);
    2243             :                 }
    2244             : 
    2245           0 :                 if (tcp_signature(tdb, tp->pf, m, th, iphlen, 1, sig) < 0)
    2246           0 :                         return (-1);
    2247             : 
    2248           0 :                 if (timingsafe_bcmp(sig, sigp, 16)) {
    2249           0 :                         tcpstat_inc(tcps_rcvbadsig);
    2250           0 :                         return (-1);
    2251             :                 }
    2252             : 
    2253           0 :                 tcpstat_inc(tcps_rcvgoodsig);
    2254           0 :         }
    2255             : #endif /* TCP_SIGNATURE */
    2256             : 
    2257           0 :         return (0);
    2258           0 : }
    2259             : 
    2260             : u_long
    2261           0 : tcp_seq_subtract(u_long a, u_long b)
    2262             : {
    2263           0 :         return ((long)(a - b));
    2264             : }
    2265             : 
    2266             : /*
    2267             :  * This function is called upon receipt of new valid data (while not in header
    2268             :  * prediction mode), and it updates the ordered list of sacks.
    2269             :  */
    2270             : void
    2271           0 : tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart,
    2272             :     tcp_seq rcv_lastend)
    2273             : {
    2274             :         /*
    2275             :          * First reported block MUST be the most recent one.  Subsequent
    2276             :          * blocks SHOULD be in the order in which they arrived at the
    2277             :          * receiver.  These two conditions make the implementation fully
    2278             :          * compliant with RFC 2018.
    2279             :          */
    2280             :         int i, j = 0, count = 0, lastpos = -1;
    2281           0 :         struct sackblk sack, firstsack, temp[MAX_SACK_BLKS];
    2282             : 
    2283             :         /* First clean up current list of sacks */
    2284           0 :         for (i = 0; i < tp->rcv_numsacks; i++) {
    2285           0 :                 sack = tp->sackblks[i];
    2286           0 :                 if (sack.start == 0 && sack.end == 0) {
    2287           0 :                         count++; /* count = number of blocks to be discarded */
    2288           0 :                         continue;
    2289             :                 }
    2290           0 :                 if (SEQ_LEQ(sack.end, tp->rcv_nxt)) {
    2291           0 :                         tp->sackblks[i].start = tp->sackblks[i].end = 0;
    2292           0 :                         count++;
    2293           0 :                 } else {
    2294           0 :                         temp[j].start = tp->sackblks[i].start;
    2295           0 :                         temp[j++].end = tp->sackblks[i].end;
    2296             :                 }
    2297             :         }
    2298           0 :         tp->rcv_numsacks -= count;
    2299           0 :         if (tp->rcv_numsacks == 0) { /* no sack blocks currently (fast path) */
    2300           0 :                 tcp_clean_sackreport(tp);
    2301           0 :                 if (SEQ_LT(tp->rcv_nxt, rcv_laststart)) {
    2302             :                         /* ==> need first sack block */
    2303           0 :                         tp->sackblks[0].start = rcv_laststart;
    2304           0 :                         tp->sackblks[0].end = rcv_lastend;
    2305           0 :                         tp->rcv_numsacks = 1;
    2306           0 :                 }
    2307           0 :                 return;
    2308             :         }
    2309             :         /* Otherwise, sack blocks are already present. */
    2310           0 :         for (i = 0; i < tp->rcv_numsacks; i++)
    2311           0 :                 tp->sackblks[i] = temp[i]; /* first copy back sack list */
    2312           0 :         if (SEQ_GEQ(tp->rcv_nxt, rcv_lastend))
    2313           0 :                 return;     /* sack list remains unchanged */
    2314             :         /*
    2315             :          * From here, segment just received should be (part of) the 1st sack.
    2316             :          * Go through list, possibly coalescing sack block entries.
    2317             :          */
    2318             :         firstsack.start = rcv_laststart;
    2319             :         firstsack.end = rcv_lastend;
    2320           0 :         for (i = 0; i < tp->rcv_numsacks; i++) {
    2321           0 :                 sack = tp->sackblks[i];
    2322           0 :                 if (SEQ_LT(sack.end, firstsack.start) ||
    2323           0 :                     SEQ_GT(sack.start, firstsack.end))
    2324             :                         continue; /* no overlap */
    2325           0 :                 if (sack.start == firstsack.start && sack.end == firstsack.end){
    2326             :                         /*
    2327             :                          * identical block; delete it here since we will
    2328             :                          * move it to the front of the list.
    2329             :                          */
    2330           0 :                         tp->sackblks[i].start = tp->sackblks[i].end = 0;
    2331             :                         lastpos = i;    /* last posn with a zero entry */
    2332           0 :                         continue;
    2333             :                 }
    2334           0 :                 if (SEQ_LEQ(sack.start, firstsack.start))
    2335           0 :                         firstsack.start = sack.start; /* merge blocks */
    2336           0 :                 if (SEQ_GEQ(sack.end, firstsack.end))
    2337           0 :                         firstsack.end = sack.end;     /* merge blocks */
    2338           0 :                 tp->sackblks[i].start = tp->sackblks[i].end = 0;
    2339             :                 lastpos = i;    /* last posn with a zero entry */
    2340           0 :         }
    2341           0 :         if (lastpos != -1) {    /* at least one merge */
    2342           0 :                 for (i = 0, j = 1; i < tp->rcv_numsacks; i++) {
    2343           0 :                         sack = tp->sackblks[i];
    2344           0 :                         if (sack.start == 0 && sack.end == 0)
    2345             :                                 continue;
    2346           0 :                         temp[j++] = sack;
    2347           0 :                 }
    2348           0 :                 tp->rcv_numsacks = j; /* including first blk (added later) */
    2349           0 :                 for (i = 1; i < tp->rcv_numsacks; i++) /* now copy back */
    2350           0 :                         tp->sackblks[i] = temp[i];
    2351             :         } else {        /* no merges -- shift sacks by 1 */
    2352           0 :                 if (tp->rcv_numsacks < MAX_SACK_BLKS)
    2353           0 :                         tp->rcv_numsacks++;
    2354           0 :                 for (i = tp->rcv_numsacks-1; i > 0; i--)
    2355           0 :                         tp->sackblks[i] = tp->sackblks[i-1];
    2356             :         }
    2357           0 :         tp->sackblks[0] = firstsack;
    2358           0 :         return;
    2359           0 : }
    2360             : 
    2361             : /*
    2362             :  * Process the TCP SACK option.  tp->snd_holes is an ordered list
    2363             :  * of holes (oldest to newest, in terms of the sequence space).
    2364             :  */
    2365             : void
    2366           0 : tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
    2367             : {
    2368             :         int tmp_olen;
    2369             :         u_char *tmp_cp;
    2370             :         struct sackhole *cur, *p, *temp;
    2371             : 
    2372           0 :         if (!tp->sack_enable)
    2373           0 :                 return;
    2374             :         /* SACK without ACK doesn't make sense. */
    2375           0 :         if ((th->th_flags & TH_ACK) == 0)
    2376           0 :                return;
    2377             :         /* Make sure the ACK on this segment is in [snd_una, snd_max]. */
    2378           0 :         if (SEQ_LT(th->th_ack, tp->snd_una) ||
    2379           0 :             SEQ_GT(th->th_ack, tp->snd_max))
    2380           0 :                 return;
    2381             :         /* Note: TCPOLEN_SACK must be 2*sizeof(tcp_seq) */
    2382           0 :         if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
    2383           0 :                 return;
    2384             :         /* Note: TCPOLEN_SACK must be 2*sizeof(tcp_seq) */
    2385           0 :         tmp_cp = cp + 2;
    2386             :         tmp_olen = optlen - 2;
    2387           0 :         tcpstat_inc(tcps_sack_rcv_opts);
    2388           0 :         if (tp->snd_numholes < 0)
    2389           0 :                 tp->snd_numholes = 0;
    2390           0 :         if (tp->t_maxseg == 0)
    2391           0 :                 panic("tcp_sack_option"); /* Should never happen */
    2392           0 :         while (tmp_olen > 0) {
    2393             :                 struct sackblk sack;
    2394             : 
    2395           0 :                 memcpy(&sack.start, tmp_cp, sizeof(tcp_seq));
    2396           0 :                 sack.start = ntohl(sack.start);
    2397           0 :                 memcpy(&sack.end, tmp_cp + sizeof(tcp_seq), sizeof(tcp_seq));
    2398           0 :                 sack.end = ntohl(sack.end);
    2399           0 :                 tmp_olen -= TCPOLEN_SACK;
    2400           0 :                 tmp_cp += TCPOLEN_SACK;
    2401           0 :                 if (SEQ_LEQ(sack.end, sack.start))
    2402           0 :                         continue; /* bad SACK fields */
    2403           0 :                 if (SEQ_LEQ(sack.end, tp->snd_una))
    2404           0 :                         continue; /* old block */
    2405           0 :                 if (SEQ_GT(th->th_ack, tp->snd_una)) {
    2406           0 :                         if (SEQ_LT(sack.start, th->th_ack))
    2407           0 :                                 continue;
    2408             :                 }
    2409           0 :                 if (SEQ_GT(sack.end, tp->snd_max))
    2410           0 :                         continue;
    2411           0 :                 if (tp->snd_holes == NULL) { /* first hole */
    2412           0 :                         tp->snd_holes = (struct sackhole *)
    2413           0 :                             pool_get(&sackhl_pool, PR_NOWAIT);
    2414           0 :                         if (tp->snd_holes == NULL) {
    2415             :                                 /* ENOBUFS, so ignore SACKed block for now*/
    2416           0 :                                 goto done;
    2417             :                         }
    2418             :                         cur = tp->snd_holes;
    2419           0 :                         cur->start = th->th_ack;
    2420           0 :                         cur->end = sack.start;
    2421           0 :                         cur->rxmit = cur->start;
    2422           0 :                         cur->next = NULL;
    2423           0 :                         tp->snd_numholes = 1;
    2424           0 :                         tp->rcv_lastsack = sack.end;
    2425             :                         /*
    2426             :                          * dups is at least one.  If more data has been
    2427             :                          * SACKed, it can be greater than one.
    2428             :                          */
    2429           0 :                         cur->dups = min(tcprexmtthresh,
    2430           0 :                             ((sack.end - cur->end)/tp->t_maxseg));
    2431           0 :                         if (cur->dups < 1)
    2432           0 :                                 cur->dups = 1;
    2433           0 :                         continue; /* with next sack block */
    2434             :                 }
    2435             :                 /* Go thru list of holes:  p = previous,  cur = current */
    2436             :                 p = cur = tp->snd_holes;
    2437           0 :                 while (cur) {
    2438           0 :                         if (SEQ_LEQ(sack.end, cur->start))
    2439             :                                 /* SACKs data before the current hole */
    2440             :                                 break; /* no use going through more holes */
    2441           0 :                         if (SEQ_GEQ(sack.start, cur->end)) {
    2442             :                                 /* SACKs data beyond the current hole */
    2443           0 :                                 cur->dups++;
    2444           0 :                                 if (((sack.end - cur->end)/tp->t_maxseg) >=
    2445           0 :                                     tcprexmtthresh)
    2446           0 :                                         cur->dups = tcprexmtthresh;
    2447             :                                 p = cur;
    2448           0 :                                 cur = cur->next;
    2449           0 :                                 continue;
    2450             :                         }
    2451           0 :                         if (SEQ_LEQ(sack.start, cur->start)) {
    2452             :                                 /* Data acks at least the beginning of hole */
    2453           0 :                                 if (SEQ_GEQ(sack.end, cur->end)) {
    2454             :                                         /* Acks entire hole, so delete hole */
    2455           0 :                                         if (p != cur) {
    2456           0 :                                                 p->next = cur->next;
    2457           0 :                                                 pool_put(&sackhl_pool, cur);
    2458           0 :                                                 cur = p->next;
    2459           0 :                                         } else {
    2460             :                                                 cur = cur->next;
    2461           0 :                                                 pool_put(&sackhl_pool, p);
    2462             :                                                 p = cur;
    2463           0 :                                                 tp->snd_holes = p;
    2464             :                                         }
    2465           0 :                                         tp->snd_numholes--;
    2466           0 :                                         continue;
    2467             :                                 }
    2468             :                                 /* otherwise, move start of hole forward */
    2469           0 :                                 cur->start = sack.end;
    2470           0 :                                 cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
    2471             :                                 p = cur;
    2472           0 :                                 cur = cur->next;
    2473           0 :                                 continue;
    2474             :                         }
    2475             :                         /* move end of hole backward */
    2476           0 :                         if (SEQ_GEQ(sack.end, cur->end)) {
    2477           0 :                                 cur->end = sack.start;
    2478           0 :                                 cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
    2479           0 :                                 cur->dups++;
    2480           0 :                                 if (((sack.end - cur->end)/tp->t_maxseg) >=
    2481           0 :                                     tcprexmtthresh)
    2482           0 :                                         cur->dups = tcprexmtthresh;
    2483             :                                 p = cur;
    2484           0 :                                 cur = cur->next;
    2485           0 :                                 continue;
    2486             :                         }
    2487           0 :                         if (SEQ_LT(cur->start, sack.start) &&
    2488           0 :                             SEQ_GT(cur->end, sack.end)) {
    2489             :                                 /*
    2490             :                                  * ACKs some data in middle of a hole; need to
    2491             :                                  * split current hole
    2492             :                                  */
    2493           0 :                                 temp = (struct sackhole *)
    2494           0 :                                     pool_get(&sackhl_pool, PR_NOWAIT);
    2495           0 :                                 if (temp == NULL)
    2496           0 :                                         goto done; /* ENOBUFS */
    2497           0 :                                 temp->next = cur->next;
    2498           0 :                                 temp->start = sack.end;
    2499           0 :                                 temp->end = cur->end;
    2500           0 :                                 temp->dups = cur->dups;
    2501           0 :                                 temp->rxmit = SEQ_MAX(cur->rxmit, temp->start);
    2502           0 :                                 cur->end = sack.start;
    2503           0 :                                 cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
    2504           0 :                                 cur->dups++;
    2505           0 :                                 if (((sack.end - cur->end)/tp->t_maxseg) >=
    2506           0 :                                         tcprexmtthresh)
    2507           0 :                                         cur->dups = tcprexmtthresh;
    2508           0 :                                 cur->next = temp;
    2509             :                                 p = temp;
    2510           0 :                                 cur = p->next;
    2511           0 :                                 tp->snd_numholes++;
    2512           0 :                         }
    2513             :                 }
    2514             :                 /* At this point, p points to the last hole on the list */
    2515           0 :                 if (SEQ_LT(tp->rcv_lastsack, sack.start)) {
    2516             :                         /*
    2517             :                          * Need to append new hole at end.
    2518             :                          * Last hole is p (and it's not NULL).
    2519             :                          */
    2520           0 :                         temp = (struct sackhole *)
    2521           0 :                             pool_get(&sackhl_pool, PR_NOWAIT);
    2522           0 :                         if (temp == NULL)
    2523           0 :                                 goto done; /* ENOBUFS */
    2524           0 :                         temp->start = tp->rcv_lastsack;
    2525           0 :                         temp->end = sack.start;
    2526           0 :                         temp->dups = min(tcprexmtthresh,
    2527           0 :                             ((sack.end - sack.start)/tp->t_maxseg));
    2528           0 :                         if (temp->dups < 1)
    2529           0 :                                 temp->dups = 1;
    2530           0 :                         temp->rxmit = temp->start;
    2531           0 :                         temp->next = 0;
    2532           0 :                         p->next = temp;
    2533           0 :                         tp->rcv_lastsack = sack.end;
    2534           0 :                         tp->snd_numholes++;
    2535           0 :                 }
    2536           0 :         }
    2537             : done:
    2538           0 :         return;
    2539           0 : }
    2540             : 
    2541             : /*
    2542             :  * Delete stale (i.e, cumulatively ack'd) holes.  Hole is deleted only if
    2543             :  * it is completely acked; otherwise, tcp_sack_option(), called from
    2544             :  * tcp_dooptions(), will fix up the hole.
    2545             :  */
    2546             : void
    2547           0 : tcp_del_sackholes(struct tcpcb *tp, struct tcphdr *th)
    2548             : {
    2549           0 :         if (tp->sack_enable && tp->t_state != TCPS_LISTEN) {
    2550             :                 /* max because this could be an older ack just arrived */
    2551           0 :                 tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ?
    2552             :                         th->th_ack : tp->snd_una;
    2553           0 :                 struct sackhole *cur = tp->snd_holes;
    2554             :                 struct sackhole *prev;
    2555           0 :                 while (cur)
    2556           0 :                         if (SEQ_LEQ(cur->end, lastack)) {
    2557             :                                 prev = cur;
    2558           0 :                                 cur = cur->next;
    2559           0 :                                 pool_put(&sackhl_pool, prev);
    2560           0 :                                 tp->snd_numholes--;
    2561           0 :                         } else if (SEQ_LT(cur->start, lastack)) {
    2562           0 :                                 cur->start = lastack;
    2563           0 :                                 if (SEQ_LT(cur->rxmit, cur->start))
    2564           0 :                                         cur->rxmit = cur->start;
    2565             :                                 break;
    2566             :                         } else
    2567             :                                 break;
    2568           0 :                 tp->snd_holes = cur;
    2569           0 :         }
    2570           0 : }
    2571             : 
    2572             : /*
    2573             :  * Delete all receiver-side SACK information.
    2574             :  */
    2575             : void
    2576           0 : tcp_clean_sackreport(struct tcpcb *tp)
    2577             : {
    2578             :         int i;
    2579             : 
    2580           0 :         tp->rcv_numsacks = 0;
    2581           0 :         for (i = 0; i < MAX_SACK_BLKS; i++)
    2582           0 :                 tp->sackblks[i].start = tp->sackblks[i].end=0;
    2583             : 
    2584           0 : }
    2585             : 
    2586             : /*
    2587             :  * Partial ack handling within a sack recovery episode.  When a partial ack
    2588             :  * arrives, turn off retransmission timer, deflate the window, do not clear
    2589             :  * tp->t_dupacks.
    2590             :  */
    2591             : void
    2592           0 : tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th)
    2593             : {
    2594             :         /* Turn off retx. timer (will start again next segment) */
    2595           0 :         TCP_TIMER_DISARM(tp, TCPT_REXMT);
    2596           0 :         tp->t_rtttime = 0;
    2597             :         /*
    2598             :          * Partial window deflation.  This statement relies on the
    2599             :          * fact that tp->snd_una has not been updated yet.
    2600             :          */
    2601           0 :         if (tp->snd_cwnd > (th->th_ack - tp->snd_una)) {
    2602           0 :                 tp->snd_cwnd -= th->th_ack - tp->snd_una;
    2603           0 :                 tp->snd_cwnd += tp->t_maxseg;
    2604           0 :         } else
    2605           0 :                 tp->snd_cwnd = tp->t_maxseg;
    2606           0 :         tp->snd_cwnd += tp->t_maxseg;
    2607           0 :         tp->t_flags |= TF_NEEDOUTPUT;
    2608           0 : }
    2609             : 
    2610             : /*
    2611             :  * Pull out of band byte out of a segment so
    2612             :  * it doesn't appear in the user's data queue.
    2613             :  * It is still reflected in the segment length for
    2614             :  * sequencing purposes.
    2615             :  */
    2616             : void
    2617           0 : tcp_pulloutofband(struct socket *so, u_int urgent, struct mbuf *m, int off)
    2618             : {
    2619           0 :         int cnt = off + urgent - 1;
    2620             : 
    2621           0 :         while (cnt >= 0) {
    2622           0 :                 if (m->m_len > cnt) {
    2623           0 :                         char *cp = mtod(m, caddr_t) + cnt;
    2624           0 :                         struct tcpcb *tp = sototcpcb(so);
    2625             : 
    2626           0 :                         tp->t_iobc = *cp;
    2627           0 :                         tp->t_oobflags |= TCPOOB_HAVEDATA;
    2628           0 :                         memmove(cp, cp + 1, m->m_len - cnt - 1);
    2629           0 :                         m->m_len--;
    2630             :                         return;
    2631             :                 }
    2632           0 :                 cnt -= m->m_len;
    2633           0 :                 m = m->m_next;
    2634           0 :                 if (m == NULL)
    2635             :                         break;
    2636             :         }
    2637           0 :         panic("tcp_pulloutofband");
    2638           0 : }
    2639             : 
    2640             : /*
    2641             :  * Collect new round-trip time estimate
    2642             :  * and update averages and current timeout.
    2643             :  */
    2644             : void
    2645           0 : tcp_xmit_timer(struct tcpcb *tp, int rtt)
    2646             : {
    2647             :         short delta;
    2648             :         short rttmin;
    2649             : 
    2650           0 :         if (rtt < 0)
    2651           0 :                 rtt = 0;
    2652           0 :         else if (rtt > TCP_RTT_MAX)
    2653           0 :                 rtt = TCP_RTT_MAX;
    2654             : 
    2655           0 :         tcpstat_inc(tcps_rttupdated);
    2656           0 :         if (tp->t_srtt != 0) {
    2657             :                 /*
    2658             :                  * delta is fixed point with 2 (TCP_RTT_BASE_SHIFT) bits
    2659             :                  * after the binary point (scaled by 4), whereas
    2660             :                  * srtt is stored as fixed point with 5 bits after the
    2661             :                  * binary point (i.e., scaled by 32).  The following magic
    2662             :                  * is equivalent to the smoothing algorithm in rfc793 with
    2663             :                  * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
    2664             :                  * point).
    2665             :                  */
    2666           0 :                 delta = (rtt << TCP_RTT_BASE_SHIFT) -
    2667           0 :                     (tp->t_srtt >> TCP_RTT_SHIFT);
    2668           0 :                 if ((tp->t_srtt += delta) <= 0)
    2669           0 :                         tp->t_srtt = 1 << TCP_RTT_BASE_SHIFT;
    2670             :                 /*
    2671             :                  * We accumulate a smoothed rtt variance (actually, a
    2672             :                  * smoothed mean difference), then set the retransmit
    2673             :                  * timer to smoothed rtt + 4 times the smoothed variance.
    2674             :                  * rttvar is stored as fixed point with 4 bits after the
    2675             :                  * binary point (scaled by 16).  The following is
    2676             :                  * equivalent to rfc793 smoothing with an alpha of .75
    2677             :                  * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
    2678             :                  * rfc793's wired-in beta.
    2679             :                  */
    2680           0 :                 if (delta < 0)
    2681           0 :                         delta = -delta;
    2682           0 :                 delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
    2683           0 :                 if ((tp->t_rttvar += delta) <= 0)
    2684           0 :                         tp->t_rttvar = 1 << TCP_RTT_BASE_SHIFT;
    2685             :         } else {
    2686             :                 /*
    2687             :                  * No rtt measurement yet - use the unsmoothed rtt.
    2688             :                  * Set the variance to half the rtt (so our first
    2689             :                  * retransmit happens at 3*rtt).
    2690             :                  */
    2691           0 :                 tp->t_srtt = (rtt + 1) << (TCP_RTT_SHIFT + TCP_RTT_BASE_SHIFT);
    2692           0 :                 tp->t_rttvar = (rtt + 1) <<
    2693             :                     (TCP_RTTVAR_SHIFT + TCP_RTT_BASE_SHIFT - 1);
    2694             :         }
    2695           0 :         tp->t_rtttime = 0;
    2696           0 :         tp->t_rxtshift = 0;
    2697             : 
    2698             :         /*
    2699             :          * the retransmit should happen at rtt + 4 * rttvar.
    2700             :          * Because of the way we do the smoothing, srtt and rttvar
    2701             :          * will each average +1/2 tick of bias.  When we compute
    2702             :          * the retransmit timer, we want 1/2 tick of rounding and
    2703             :          * 1 extra tick because of +-1/2 tick uncertainty in the
    2704             :          * firing of the timer.  The bias will give us exactly the
    2705             :          * 1.5 tick we need.  But, because the bias is
    2706             :          * statistical, we have to test that we don't drop below
    2707             :          * the minimum feasible timer (which is 2 ticks).
    2708             :          */
    2709           0 :         rttmin = min(max(rtt + 2, tp->t_rttmin), TCPTV_REXMTMAX);
    2710           0 :         TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), rttmin, TCPTV_REXMTMAX);
    2711             : 
    2712             :         /*
    2713             :          * We received an ack for a packet that wasn't retransmitted;
    2714             :          * it is probably safe to discard any error indications we've
    2715             :          * received recently.  This isn't quite right, but close enough
    2716             :          * for now (a route might have failed after we sent a segment,
    2717             :          * and the return path might not be symmetrical).
    2718             :          */
    2719           0 :         tp->t_softerror = 0;
    2720           0 : }
    2721             : 
    2722             : /*
    2723             :  * Determine a reasonable value for maxseg size.
    2724             :  * If the route is known, check route for mtu.
    2725             :  * If none, use an mss that can be handled on the outgoing
    2726             :  * interface without forcing IP to fragment; if bigger than
    2727             :  * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
    2728             :  * to utilize large mbufs.  If no route is found, route has no mtu,
    2729             :  * or the destination isn't local, use a default, hopefully conservative
    2730             :  * size (usually 512 or the default IP max size, but no more than the mtu
    2731             :  * of the interface), as we can't discover anything about intervening
    2732             :  * gateways or networks.  We also initialize the congestion/slow start
    2733             :  * window to be a single segment if the destination isn't local.
    2734             :  * While looking at the routing entry, we also initialize other path-dependent
    2735             :  * parameters from pre-set or cached values in the routing entry.
    2736             :  *
    2737             :  * Also take into account the space needed for options that we
    2738             :  * send regularly.  Make maxseg shorter by that amount to assure
    2739             :  * that we can send maxseg amount of data even when the options
    2740             :  * are present.  Store the upper limit of the length of options plus
    2741             :  * data in maxopd.
    2742             :  *
    2743             :  * NOTE: offer == -1 indicates that the maxseg size changed due to
    2744             :  * Path MTU discovery.
    2745             :  */
    2746             : int
    2747           0 : tcp_mss(struct tcpcb *tp, int offer)
    2748             : {
    2749             :         struct rtentry *rt;
    2750             :         struct ifnet *ifp = NULL;
    2751             :         int mss, mssopt;
    2752             :         int iphlen;
    2753             :         struct inpcb *inp;
    2754             : 
    2755           0 :         inp = tp->t_inpcb;
    2756             : 
    2757           0 :         mssopt = mss = tcp_mssdflt;
    2758             : 
    2759           0 :         rt = in_pcbrtentry(inp);
    2760             : 
    2761           0 :         if (rt == NULL)
    2762             :                 goto out;
    2763             : 
    2764           0 :         ifp = if_get(rt->rt_ifidx);
    2765           0 :         if (ifp == NULL)
    2766             :                 goto out;
    2767             : 
    2768           0 :         switch (tp->pf) {
    2769             : #ifdef INET6
    2770             :         case AF_INET6:
    2771             :                 iphlen = sizeof(struct ip6_hdr);
    2772           0 :                 break;
    2773             : #endif
    2774             :         case AF_INET:
    2775             :                 iphlen = sizeof(struct ip);
    2776           0 :                 break;
    2777             :         default:
    2778             :                 /* the family does not support path MTU discovery */
    2779             :                 goto out;
    2780             :         }
    2781             : 
    2782             :         /*
    2783             :          * if there's an mtu associated with the route and we support
    2784             :          * path MTU discovery for the underlying protocol family, use it.
    2785             :          */
    2786           0 :         if (rt->rt_mtu) {
    2787             :                 /*
    2788             :                  * One may wish to lower MSS to take into account options,
    2789             :                  * especially security-related options.
    2790             :                  */
    2791           0 :                 if (tp->pf == AF_INET6 && rt->rt_mtu < IPV6_MMTU) {
    2792             :                         /*
    2793             :                          * RFC2460 section 5, last paragraph: if path MTU is
    2794             :                          * smaller than 1280, use 1280 as packet size and
    2795             :                          * attach fragment header.
    2796             :                          */
    2797           0 :                         mss = IPV6_MMTU - iphlen - sizeof(struct ip6_frag) -
    2798             :                             sizeof(struct tcphdr);
    2799           0 :                 } else {
    2800           0 :                         mss = rt->rt_mtu - iphlen -
    2801             :                             sizeof(struct tcphdr);
    2802             :                 }
    2803           0 :         } else if (ifp->if_flags & IFF_LOOPBACK) {
    2804           0 :                 mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
    2805           0 :         } else if (tp->pf == AF_INET) {
    2806           0 :                 if (ip_mtudisc)
    2807           0 :                         mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
    2808             :         }
    2809             : #ifdef INET6
    2810           0 :         else if (tp->pf == AF_INET6) {
    2811             :                 /*
    2812             :                  * for IPv6, path MTU discovery is always turned on,
    2813             :                  * or the node must use packet size <= 1280.
    2814             :                  */
    2815           0 :                 mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
    2816           0 :         }
    2817             : #endif /* INET6 */
    2818             : 
    2819             :         /* Calculate the value that we offer in TCPOPT_MAXSEG */
    2820           0 :         if (offer != -1) {
    2821           0 :                 mssopt = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
    2822           0 :                 mssopt = max(tcp_mssdflt, mssopt);
    2823           0 :         }
    2824             :  out:
    2825           0 :         if_put(ifp);
    2826             :         /*
    2827             :          * The current mss, t_maxseg, is initialized to the default value.
    2828             :          * If we compute a smaller value, reduce the current mss.
    2829             :          * If we compute a larger value, return it for use in sending
    2830             :          * a max seg size option, but don't store it for use
    2831             :          * unless we received an offer at least that large from peer.
    2832             :          *
    2833             :          * However, do not accept offers lower than the minimum of
    2834             :          * the interface MTU and 216.
    2835             :          */
    2836           0 :         if (offer > 0)
    2837           0 :                 tp->t_peermss = offer;
    2838           0 :         if (tp->t_peermss)
    2839           0 :                 mss = min(mss, max(tp->t_peermss, 216));
    2840             : 
    2841             :         /* sanity - at least max opt. space */
    2842           0 :         mss = max(mss, 64);
    2843             : 
    2844             :         /*
    2845             :          * maxopd stores the maximum length of data AND options
    2846             :          * in a segment; maxseg is the amount of data in a normal
    2847             :          * segment.  We need to store this value (maxopd) apart
    2848             :          * from maxseg, because now every segment carries options
    2849             :          * and thus we normally have somewhat less data in segments.
    2850             :          */
    2851           0 :         tp->t_maxopd = mss;
    2852             : 
    2853           0 :         if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
    2854           0 :             (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
    2855           0 :                 mss -= TCPOLEN_TSTAMP_APPA;
    2856             : #ifdef TCP_SIGNATURE
    2857           0 :         if (tp->t_flags & TF_SIGNATURE)
    2858           0 :                 mss -= TCPOLEN_SIGLEN;
    2859             : #endif
    2860             : 
    2861           0 :         if (offer == -1) {
    2862             :                 /* mss changed due to Path MTU discovery */
    2863           0 :                 tp->t_flags &= ~TF_PMTUD_PEND;
    2864           0 :                 tp->t_pmtud_mtu_sent = 0;
    2865           0 :                 tp->t_pmtud_mss_acked = 0;
    2866           0 :                 if (mss < tp->t_maxseg) {
    2867             :                         /*
    2868             :                          * Follow suggestion in RFC 2414 to reduce the
    2869             :                          * congestion window by the ratio of the old
    2870             :                          * segment size to the new segment size.
    2871             :                          */
    2872           0 :                         tp->snd_cwnd = ulmax((tp->snd_cwnd / tp->t_maxseg) *
    2873           0 :                                              mss, mss);
    2874           0 :                 }
    2875           0 :         } else if (tcp_do_rfc3390 == 2) {
    2876             :                 /* increase initial window  */
    2877           0 :                 tp->snd_cwnd = ulmin(10 * mss, ulmax(2 * mss, 14600));
    2878           0 :         } else if (tcp_do_rfc3390) {
    2879             :                 /* increase initial window  */
    2880           0 :                 tp->snd_cwnd = ulmin(4 * mss, ulmax(2 * mss, 4380));
    2881           0 :         } else
    2882           0 :                 tp->snd_cwnd = mss;
    2883             : 
    2884           0 :         tp->t_maxseg = mss;
    2885             : 
    2886           0 :         return (offer != -1 ? mssopt : mss);
    2887             : }
    2888             : 
    2889             : u_int
    2890           0 : tcp_hdrsz(struct tcpcb *tp)
    2891             : {
    2892             :         u_int hlen;
    2893             : 
    2894           0 :         switch (tp->pf) {
    2895             : #ifdef INET6
    2896             :         case AF_INET6:
    2897             :                 hlen = sizeof(struct ip6_hdr);
    2898           0 :                 break;
    2899             : #endif
    2900             :         case AF_INET:
    2901             :                 hlen = sizeof(struct ip);
    2902           0 :                 break;
    2903             :         default:
    2904             :                 hlen = 0;
    2905           0 :                 break;
    2906             :         }
    2907           0 :         hlen += sizeof(struct tcphdr);
    2908             : 
    2909           0 :         if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
    2910           0 :             (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
    2911           0 :                 hlen += TCPOLEN_TSTAMP_APPA;
    2912             : #ifdef TCP_SIGNATURE
    2913           0 :         if (tp->t_flags & TF_SIGNATURE)
    2914           0 :                 hlen += TCPOLEN_SIGLEN;
    2915             : #endif
    2916           0 :         return (hlen);
    2917             : }
    2918             : 
    2919             : /*
    2920             :  * Set connection variables based on the effective MSS.
    2921             :  * We are passed the TCPCB for the actual connection.  If we
    2922             :  * are the server, we are called by the compressed state engine
    2923             :  * when the 3-way handshake is complete.  If we are the client,
    2924             :  * we are called when we receive the SYN,ACK from the server.
    2925             :  *
    2926             :  * NOTE: The t_maxseg value must be initialized in the TCPCB
    2927             :  * before this routine is called!
    2928             :  */
    2929             : void
    2930           0 : tcp_mss_update(struct tcpcb *tp)
    2931             : {
    2932             :         int mss;
    2933             :         u_long bufsize;
    2934             :         struct rtentry *rt;
    2935             :         struct socket *so;
    2936             : 
    2937           0 :         so = tp->t_inpcb->inp_socket;
    2938           0 :         mss = tp->t_maxseg;
    2939             : 
    2940           0 :         rt = in_pcbrtentry(tp->t_inpcb);
    2941             : 
    2942           0 :         if (rt == NULL)
    2943           0 :                 return;
    2944             : 
    2945           0 :         bufsize = so->so_snd.sb_hiwat;
    2946           0 :         if (bufsize < mss) {
    2947           0 :                 mss = bufsize;
    2948             :                 /* Update t_maxseg and t_maxopd */
    2949           0 :                 tcp_mss(tp, mss);
    2950           0 :         } else {
    2951           0 :                 bufsize = roundup(bufsize, mss);
    2952           0 :                 if (bufsize > sb_max)
    2953           0 :                         bufsize = sb_max;
    2954           0 :                 (void)sbreserve(so, &so->so_snd, bufsize);
    2955             :         }
    2956             : 
    2957           0 :         bufsize = so->so_rcv.sb_hiwat;
    2958           0 :         if (bufsize > mss) {
    2959           0 :                 bufsize = roundup(bufsize, mss);
    2960           0 :                 if (bufsize > sb_max)
    2961           0 :                         bufsize = sb_max;
    2962           0 :                 (void)sbreserve(so, &so->so_rcv, bufsize);
    2963           0 :         }
    2964             : 
    2965           0 : }
    2966             : 
    2967             : /*
    2968             :  * When a partial ack arrives, force the retransmission of the
    2969             :  * next unacknowledged segment.  Do not clear tp->t_dupacks.
    2970             :  * By setting snd_nxt to ti_ack, this forces retransmission timer
    2971             :  * to be started again.
    2972             :  */
    2973             : void
    2974           0 : tcp_newreno_partialack(struct tcpcb *tp, struct tcphdr *th)
    2975             : {
    2976             :         /*
    2977             :          * snd_una has not been updated and the socket send buffer
    2978             :          * not yet drained of the acked data, so we have to leave
    2979             :          * snd_una as it was to get the correct data offset in
    2980             :          * tcp_output().
    2981             :          */
    2982           0 :         tcp_seq onxt = tp->snd_nxt;
    2983           0 :         u_long  ocwnd = tp->snd_cwnd;
    2984             : 
    2985           0 :         TCP_TIMER_DISARM(tp, TCPT_REXMT);
    2986           0 :         tp->t_rtttime = 0;
    2987           0 :         tp->snd_nxt = th->th_ack;
    2988             :         /*
    2989             :          * Set snd_cwnd to one segment beyond acknowledged offset
    2990             :          * (tp->snd_una not yet updated when this function is called)
    2991             :          */
    2992           0 :         tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una);
    2993           0 :         (void)tcp_output(tp);
    2994           0 :         tp->snd_cwnd = ocwnd;
    2995           0 :         if (SEQ_GT(onxt, tp->snd_nxt))
    2996           0 :                 tp->snd_nxt = onxt;
    2997             :         /*
    2998             :          * Partial window deflation.  Relies on fact that tp->snd_una
    2999             :          * not updated yet.
    3000             :          */
    3001           0 :         if (tp->snd_cwnd > th->th_ack - tp->snd_una)
    3002           0 :                 tp->snd_cwnd -= th->th_ack - tp->snd_una;
    3003             :         else
    3004           0 :                 tp->snd_cwnd = 0;
    3005           0 :         tp->snd_cwnd += tp->t_maxseg;
    3006           0 : }
    3007             : 
    3008             : int
    3009           0 : tcp_mss_adv(struct mbuf *m, int af)
    3010             : {
    3011             :         int mss = 0;
    3012             :         int iphlen;
    3013             :         struct ifnet *ifp = NULL;
    3014             : 
    3015           0 :         if (m && (m->m_flags & M_PKTHDR))
    3016           0 :                 ifp = if_get(m->m_pkthdr.ph_ifidx);
    3017             : 
    3018           0 :         switch (af) {
    3019             :         case AF_INET:
    3020           0 :                 if (ifp != NULL)
    3021           0 :                         mss = ifp->if_mtu;
    3022             :                 iphlen = sizeof(struct ip);
    3023           0 :                 break;
    3024             : #ifdef INET6
    3025             :         case AF_INET6:
    3026           0 :                 if (ifp != NULL)
    3027           0 :                         mss = ifp->if_mtu;
    3028             :                 iphlen = sizeof(struct ip6_hdr);
    3029           0 :                 break;
    3030             : #endif  
    3031             :         default:
    3032           0 :                 unhandled_af(af);
    3033             :         }
    3034           0 :         if_put(ifp);
    3035           0 :         mss = mss - iphlen - sizeof(struct tcphdr);
    3036           0 :         return (max(mss, tcp_mssdflt));
    3037             : }
    3038             : 
    3039             : /*
    3040             :  * TCP compressed state engine.  Currently used to hold compressed
    3041             :  * state for SYN_RECEIVED.
    3042             :  */
    3043             : 
    3044             : /* syn hash parameters */
    3045             : int     tcp_syn_hash_size = TCP_SYN_HASH_SIZE;
    3046             : int     tcp_syn_cache_limit = TCP_SYN_HASH_SIZE*TCP_SYN_BUCKET_SIZE;
    3047             : int     tcp_syn_bucket_limit = 3*TCP_SYN_BUCKET_SIZE;
    3048             : int     tcp_syn_use_limit = 100000;
    3049             : 
    3050             : struct syn_cache_set tcp_syn_cache[2];
    3051             : int tcp_syn_cache_active;
    3052             : 
    3053             : #define SYN_HASH(sa, sp, dp, rand) \
    3054             :         (((sa)->s_addr ^ (rand)[0]) *                                \
    3055             :         (((((u_int32_t)(dp))<<16) + ((u_int32_t)(sp))) ^ (rand)[4]))
    3056             : #ifndef INET6
    3057             : #define SYN_HASHALL(hash, src, dst, rand) \
    3058             : do {                                                                    \
    3059             :         hash = SYN_HASH(&satosin(src)->sin_addr,                 \
    3060             :                 satosin(src)->sin_port,                                      \
    3061             :                 satosin(dst)->sin_port, (rand));                     \
    3062             : } while (/*CONSTCOND*/ 0)
    3063             : #else
    3064             : #define SYN_HASH6(sa, sp, dp, rand) \
    3065             :         (((sa)->s6_addr32[0] ^ (rand)[0]) *                  \
    3066             :         ((sa)->s6_addr32[1] ^ (rand)[1]) *                   \
    3067             :         ((sa)->s6_addr32[2] ^ (rand)[2]) *                   \
    3068             :         ((sa)->s6_addr32[3] ^ (rand)[3]) *                   \
    3069             :         (((((u_int32_t)(dp))<<16) + ((u_int32_t)(sp))) ^ (rand)[4]))
    3070             : 
    3071             : #define SYN_HASHALL(hash, src, dst, rand) \
    3072             : do {                                                                    \
    3073             :         switch ((src)->sa_family) {                                  \
    3074             :         case AF_INET:                                                   \
    3075             :                 hash = SYN_HASH(&satosin(src)->sin_addr,         \
    3076             :                         satosin(src)->sin_port,                              \
    3077             :                         satosin(dst)->sin_port, (rand));             \
    3078             :                 break;                                                  \
    3079             :         case AF_INET6:                                                  \
    3080             :                 hash = SYN_HASH6(&satosin6(src)->sin6_addr,              \
    3081             :                         satosin6(src)->sin6_port,                    \
    3082             :                         satosin6(dst)->sin6_port, (rand));           \
    3083             :                 break;                                                  \
    3084             :         default:                                                        \
    3085             :                 hash = 0;                                               \
    3086             :         }                                                               \
    3087             : } while (/*CONSTCOND*/0)
    3088             : #endif /* INET6 */
    3089             : 
    3090             : void
    3091           0 : syn_cache_rm(struct syn_cache *sc)
    3092             : {
    3093           0 :         sc->sc_flags |= SCF_DEAD;
    3094           0 :         TAILQ_REMOVE(&sc->sc_buckethead->sch_bucket, sc, sc_bucketq);
    3095           0 :         sc->sc_tp = NULL;
    3096           0 :         LIST_REMOVE(sc, sc_tpq);
    3097           0 :         sc->sc_buckethead->sch_length--;
    3098           0 :         timeout_del(&sc->sc_timer);
    3099           0 :         sc->sc_set->scs_count--;
    3100           0 : }
    3101             : 
    3102             : void
    3103           0 : syn_cache_put(struct syn_cache *sc)
    3104             : {
    3105           0 :         m_free(sc->sc_ipopts);
    3106           0 :         if (sc->sc_route4.ro_rt != NULL) {
    3107           0 :                 rtfree(sc->sc_route4.ro_rt);
    3108           0 :                 sc->sc_route4.ro_rt = NULL;
    3109           0 :         }
    3110           0 :         timeout_set(&sc->sc_timer, syn_cache_reaper, sc);
    3111           0 :         timeout_add(&sc->sc_timer, 0);
    3112           0 : }
    3113             : 
    3114             : struct pool syn_cache_pool;
    3115             : 
    3116             : /*
    3117             :  * We don't estimate RTT with SYNs, so each packet starts with the default
    3118             :  * RTT and each timer step has a fixed timeout value.
    3119             :  */
    3120             : #define SYN_CACHE_TIMER_ARM(sc)                                         \
    3121             : do {                                                                    \
    3122             :         TCPT_RANGESET((sc)->sc_rxtcur,                                       \
    3123             :             TCPTV_SRTTDFLT * tcp_backoff[(sc)->sc_rxtshift], TCPTV_MIN,      \
    3124             :             TCPTV_REXMTMAX);                                            \
    3125             :         if (!timeout_initialized(&(sc)->sc_timer))                       \
    3126             :                 timeout_set_proc(&(sc)->sc_timer, syn_cache_timer, (sc)); \
    3127             :         timeout_add(&(sc)->sc_timer, (sc)->sc_rxtcur * (hz / PR_SLOWHZ)); \
    3128             : } while (/*CONSTCOND*/0)
    3129             : 
    3130             : #define SYN_CACHE_TIMESTAMP(sc) tcp_now + (sc)->sc_modulate
    3131             : 
    3132             : void
    3133           0 : syn_cache_init(void)
    3134             : {
    3135             :         int i;
    3136             : 
    3137             :         /* Initialize the hash buckets. */
    3138           0 :         tcp_syn_cache[0].scs_buckethead = mallocarray(tcp_syn_hash_size,
    3139             :             sizeof(struct syn_cache_head), M_SYNCACHE, M_WAITOK|M_ZERO);
    3140           0 :         tcp_syn_cache[1].scs_buckethead = mallocarray(tcp_syn_hash_size,
    3141             :             sizeof(struct syn_cache_head), M_SYNCACHE, M_WAITOK|M_ZERO);
    3142           0 :         tcp_syn_cache[0].scs_size = tcp_syn_hash_size;
    3143           0 :         tcp_syn_cache[1].scs_size = tcp_syn_hash_size;
    3144           0 :         for (i = 0; i < tcp_syn_hash_size; i++) {
    3145           0 :                 TAILQ_INIT(&tcp_syn_cache[0].scs_buckethead[i].sch_bucket);
    3146           0 :                 TAILQ_INIT(&tcp_syn_cache[1].scs_buckethead[i].sch_bucket);
    3147             :         }
    3148             : 
    3149             :         /* Initialize the syn cache pool. */
    3150           0 :         pool_init(&syn_cache_pool, sizeof(struct syn_cache), 0, IPL_SOFTNET,
    3151             :             0, "syncache", NULL);
    3152           0 : }
    3153             : 
    3154             : void
    3155           0 : syn_cache_insert(struct syn_cache *sc, struct tcpcb *tp)
    3156             : {
    3157           0 :         struct syn_cache_set *set = &tcp_syn_cache[tcp_syn_cache_active];
    3158             :         struct syn_cache_head *scp;
    3159             :         struct syn_cache *sc2;
    3160             :         int i;
    3161             : 
    3162           0 :         NET_ASSERT_LOCKED();
    3163             : 
    3164             :         /*
    3165             :          * If there are no entries in the hash table, reinitialize
    3166             :          * the hash secrets.  To avoid useless cache swaps and
    3167             :          * reinitialization, use it until the limit is reached.
    3168             :          * An emtpy cache is also the oportunity to resize the hash.
    3169             :          */
    3170           0 :         if (set->scs_count == 0 && set->scs_use <= 0) {
    3171           0 :                 set->scs_use = tcp_syn_use_limit;
    3172           0 :                 if (set->scs_size != tcp_syn_hash_size) {
    3173           0 :                         scp = mallocarray(tcp_syn_hash_size, sizeof(struct
    3174             :                             syn_cache_head), M_SYNCACHE, M_NOWAIT|M_ZERO);
    3175           0 :                         if (scp == NULL) {
    3176             :                                 /* Try again next time. */
    3177           0 :                                 set->scs_use = 0;
    3178           0 :                         } else {
    3179           0 :                                 free(set->scs_buckethead, M_SYNCACHE,
    3180           0 :                                     set->scs_size *
    3181             :                                     sizeof(struct syn_cache_head));
    3182           0 :                                 set->scs_buckethead = scp;
    3183           0 :                                 set->scs_size = tcp_syn_hash_size;
    3184           0 :                                 for (i = 0; i < tcp_syn_hash_size; i++)
    3185           0 :                                         TAILQ_INIT(&scp[i].sch_bucket);
    3186             :                         }
    3187             :                 }
    3188           0 :                 arc4random_buf(set->scs_random, sizeof(set->scs_random));
    3189           0 :                 tcpstat_inc(tcps_sc_seedrandom);
    3190           0 :         }
    3191             : 
    3192           0 :         SYN_HASHALL(sc->sc_hash, &sc->sc_src.sa, &sc->sc_dst.sa,
    3193             :             set->scs_random);
    3194           0 :         scp = &set->scs_buckethead[sc->sc_hash % set->scs_size];
    3195           0 :         sc->sc_buckethead = scp;
    3196             : 
    3197             :         /*
    3198             :          * Make sure that we don't overflow the per-bucket
    3199             :          * limit or the total cache size limit.
    3200             :          */
    3201           0 :         if (scp->sch_length >= tcp_syn_bucket_limit) {
    3202           0 :                 tcpstat_inc(tcps_sc_bucketoverflow);
    3203             :                 /*
    3204             :                  * Someone might attack our bucket hash function.  Reseed
    3205             :                  * with random as soon as the passive syn cache gets empty.
    3206             :                  */
    3207           0 :                 set->scs_use = 0;
    3208             :                 /*
    3209             :                  * The bucket is full.  Toss the oldest element in the
    3210             :                  * bucket.  This will be the first entry in the bucket.
    3211             :                  */
    3212           0 :                 sc2 = TAILQ_FIRST(&scp->sch_bucket);
    3213             : #ifdef DIAGNOSTIC
    3214             :                 /*
    3215             :                  * This should never happen; we should always find an
    3216             :                  * entry in our bucket.
    3217             :                  */
    3218           0 :                 if (sc2 == NULL)
    3219           0 :                         panic("%s: bucketoverflow: impossible", __func__);
    3220             : #endif
    3221           0 :                 syn_cache_rm(sc2);
    3222           0 :                 syn_cache_put(sc2);
    3223           0 :         } else if (set->scs_count >= tcp_syn_cache_limit) {
    3224             :                 struct syn_cache_head *scp2, *sce;
    3225             : 
    3226           0 :                 tcpstat_inc(tcps_sc_overflowed);
    3227             :                 /*
    3228             :                  * The cache is full.  Toss the oldest entry in the
    3229             :                  * first non-empty bucket we can find.
    3230             :                  *
    3231             :                  * XXX We would really like to toss the oldest
    3232             :                  * entry in the cache, but we hope that this
    3233             :                  * condition doesn't happen very often.
    3234             :                  */
    3235             :                 scp2 = scp;
    3236           0 :                 if (TAILQ_EMPTY(&scp2->sch_bucket)) {
    3237           0 :                         sce = &set->scs_buckethead[set->scs_size];
    3238           0 :                         for (++scp2; scp2 != scp; scp2++) {
    3239           0 :                                 if (scp2 >= sce)
    3240           0 :                                         scp2 = &set->scs_buckethead[0];
    3241           0 :                                 if (! TAILQ_EMPTY(&scp2->sch_bucket))
    3242             :                                         break;
    3243             :                         }
    3244             : #ifdef DIAGNOSTIC
    3245             :                         /*
    3246             :                          * This should never happen; we should always find a
    3247             :                          * non-empty bucket.
    3248             :                          */
    3249           0 :                         if (scp2 == scp)
    3250           0 :                                 panic("%s: cacheoverflow: impossible",
    3251             :                                     __func__);
    3252             : #endif
    3253             :                 }
    3254           0 :                 sc2 = TAILQ_FIRST(&scp2->sch_bucket);
    3255           0 :                 syn_cache_rm(sc2);
    3256           0 :                 syn_cache_put(sc2);
    3257           0 :         }
    3258             : 
    3259             :         /*
    3260             :          * Initialize the entry's timer.
    3261             :          */
    3262           0 :         sc->sc_rxttot = 0;
    3263           0 :         sc->sc_rxtshift = 0;
    3264           0 :         SYN_CACHE_TIMER_ARM(sc);
    3265             : 
    3266             :         /* Link it from tcpcb entry */
    3267           0 :         LIST_INSERT_HEAD(&tp->t_sc, sc, sc_tpq);
    3268             : 
    3269             :         /* Put it into the bucket. */
    3270           0 :         TAILQ_INSERT_TAIL(&scp->sch_bucket, sc, sc_bucketq);
    3271           0 :         scp->sch_length++;
    3272           0 :         sc->sc_set = set;
    3273           0 :         set->scs_count++;
    3274           0 :         set->scs_use--;
    3275             : 
    3276           0 :         tcpstat_inc(tcps_sc_added);
    3277             : 
    3278             :         /*
    3279             :          * If the active cache has exceeded its use limit and
    3280             :          * the passive syn cache is empty, exchange their roles.
    3281             :          */
    3282           0 :         if (set->scs_use <= 0 &&
    3283           0 :             tcp_syn_cache[!tcp_syn_cache_active].scs_count == 0)
    3284           0 :                 tcp_syn_cache_active = !tcp_syn_cache_active;
    3285           0 : }
    3286             : 
    3287             : /*
    3288             :  * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted.
    3289             :  * If we have retransmitted an entry the maximum number of times, expire
    3290             :  * that entry.
    3291             :  */
    3292             : void
    3293           0 : syn_cache_timer(void *arg)
    3294             : {
    3295           0 :         struct syn_cache *sc = arg;
    3296             : 
    3297           0 :         NET_LOCK();
    3298           0 :         if (sc->sc_flags & SCF_DEAD)
    3299             :                 goto out;
    3300             : 
    3301           0 :         if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) {
    3302             :                 /* Drop it -- too many retransmissions. */
    3303             :                 goto dropit;
    3304             :         }
    3305             : 
    3306             :         /*
    3307             :          * Compute the total amount of time this entry has
    3308             :          * been on a queue.  If this entry has been on longer
    3309             :          * than the keep alive timer would allow, expire it.
    3310             :          */
    3311           0 :         sc->sc_rxttot += sc->sc_rxtcur;
    3312           0 :         if (sc->sc_rxttot >= tcptv_keep_init)
    3313             :                 goto dropit;
    3314             : 
    3315           0 :         tcpstat_inc(tcps_sc_retransmitted);
    3316           0 :         (void) syn_cache_respond(sc, NULL);
    3317             : 
    3318             :         /* Advance the timer back-off. */
    3319           0 :         sc->sc_rxtshift++;
    3320           0 :         SYN_CACHE_TIMER_ARM(sc);
    3321             : 
    3322             :  out:
    3323           0 :         NET_UNLOCK();
    3324           0 :         return;
    3325             : 
    3326             :  dropit:
    3327           0 :         tcpstat_inc(tcps_sc_timed_out);
    3328           0 :         syn_cache_rm(sc);
    3329           0 :         syn_cache_put(sc);
    3330           0 :         NET_UNLOCK();
    3331           0 : }
    3332             : 
    3333             : void
    3334           0 : syn_cache_reaper(void *arg)
    3335             : {
    3336           0 :         struct syn_cache *sc = arg;
    3337             : 
    3338           0 :         pool_put(&syn_cache_pool, (sc));
    3339             :         return;
    3340           0 : }
    3341             : 
    3342             : /*
    3343             :  * Remove syn cache created by the specified tcb entry,
    3344             :  * because this does not make sense to keep them
    3345             :  * (if there's no tcb entry, syn cache entry will never be used)
    3346             :  */
    3347             : void
    3348           0 : syn_cache_cleanup(struct tcpcb *tp)
    3349             : {
    3350             :         struct syn_cache *sc, *nsc;
    3351             : 
    3352           0 :         NET_ASSERT_LOCKED();
    3353             : 
    3354           0 :         LIST_FOREACH_SAFE(sc, &tp->t_sc, sc_tpq, nsc) {
    3355             : #ifdef DIAGNOSTIC
    3356           0 :                 if (sc->sc_tp != tp)
    3357           0 :                         panic("invalid sc_tp in syn_cache_cleanup");
    3358             : #endif
    3359           0 :                 syn_cache_rm(sc);
    3360           0 :                 syn_cache_put(sc);
    3361             :         }
    3362             :         /* just for safety */
    3363           0 :         LIST_INIT(&tp->t_sc);
    3364           0 : }
    3365             : 
    3366             : /*
    3367             :  * Find an entry in the syn cache.
    3368             :  */
    3369             : struct syn_cache *
    3370           0 : syn_cache_lookup(struct sockaddr *src, struct sockaddr *dst,
    3371             :     struct syn_cache_head **headp, u_int rtableid)
    3372             : {
    3373           0 :         struct syn_cache_set *sets[2];
    3374             :         struct syn_cache *sc;
    3375             :         struct syn_cache_head *scp;
    3376             :         u_int32_t hash;
    3377             :         int i;
    3378             : 
    3379           0 :         NET_ASSERT_LOCKED();
    3380             : 
    3381             :         /* Check the active cache first, the passive cache is likely emtpy. */
    3382           0 :         sets[0] = &tcp_syn_cache[tcp_syn_cache_active];
    3383           0 :         sets[1] = &tcp_syn_cache[!tcp_syn_cache_active];
    3384           0 :         for (i = 0; i < 2; i++) {
    3385           0 :                 if (sets[i]->scs_count == 0)
    3386             :                         continue;
    3387           0 :                 SYN_HASHALL(hash, src, dst, sets[i]->scs_random);
    3388           0 :                 scp = &sets[i]->scs_buckethead[hash % sets[i]->scs_size];
    3389           0 :                 *headp = scp;
    3390           0 :                 TAILQ_FOREACH(sc, &scp->sch_bucket, sc_bucketq) {
    3391           0 :                         if (sc->sc_hash != hash)
    3392             :                                 continue;
    3393           0 :                         if (!bcmp(&sc->sc_src, src, src->sa_len) &&
    3394           0 :                             !bcmp(&sc->sc_dst, dst, dst->sa_len) &&
    3395           0 :                             rtable_l2(rtableid) == rtable_l2(sc->sc_rtableid))
    3396           0 :                                 return (sc);
    3397             :                 }
    3398             :         }
    3399           0 :         return (NULL);
    3400           0 : }
    3401             : 
    3402             : /*
    3403             :  * This function gets called when we receive an ACK for a
    3404             :  * socket in the LISTEN state.  We look up the connection
    3405             :  * in the syn cache, and if its there, we pull it out of
    3406             :  * the cache and turn it into a full-blown connection in
    3407             :  * the SYN-RECEIVED state.
    3408             :  *
    3409             :  * The return values may not be immediately obvious, and their effects
    3410             :  * can be subtle, so here they are:
    3411             :  *
    3412             :  *      NULL    SYN was not found in cache; caller should drop the
    3413             :  *              packet and send an RST.
    3414             :  *
    3415             :  *      -1      We were unable to create the new connection, and are
    3416             :  *              aborting it.  An ACK,RST is being sent to the peer
    3417             :  *              (unless we got screwey sequence numbners; see below),
    3418             :  *              because the 3-way handshake has been completed.  Caller
    3419             :  *              should not free the mbuf, since we may be using it.  If
    3420             :  *              we are not, we will free it.
    3421             :  *
    3422             :  *      Otherwise, the return value is a pointer to the new socket
    3423             :  *      associated with the connection.
    3424             :  */
    3425             : struct socket *
    3426           0 : syn_cache_get(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
    3427             :     u_int hlen, u_int tlen, struct socket *so, struct mbuf *m)
    3428             : {
    3429             :         struct syn_cache *sc;
    3430           0 :         struct syn_cache_head *scp;
    3431             :         struct inpcb *inp, *oldinp;
    3432             :         struct tcpcb *tp = NULL;
    3433             :         struct mbuf *am;
    3434             :         struct socket *oso;
    3435             : 
    3436           0 :         NET_ASSERT_LOCKED();
    3437             : 
    3438           0 :         sc = syn_cache_lookup(src, dst, &scp, sotoinpcb(so)->inp_rtableid);
    3439           0 :         if (sc == NULL)
    3440           0 :                 return (NULL);
    3441             : 
    3442             :         /*
    3443             :          * Verify the sequence and ack numbers.  Try getting the correct
    3444             :          * response again.
    3445             :          */
    3446           0 :         if ((th->th_ack != sc->sc_iss + 1) ||
    3447           0 :             SEQ_LEQ(th->th_seq, sc->sc_irs) ||
    3448           0 :             SEQ_GT(th->th_seq, sc->sc_irs + 1 + sc->sc_win)) {
    3449           0 :                 (void) syn_cache_respond(sc, m);
    3450           0 :                 return ((struct socket *)(-1));
    3451             :         }
    3452             : 
    3453             :         /* Remove this cache entry */
    3454           0 :         syn_cache_rm(sc);
    3455             : 
    3456             :         /*
    3457             :          * Ok, create the full blown connection, and set things up
    3458             :          * as they would have been set up if we had created the
    3459             :          * connection when the SYN arrived.  If we can't create
    3460             :          * the connection, abort it.
    3461             :          */
    3462             :         oso = so;
    3463           0 :         so = sonewconn(so, SS_ISCONNECTED);
    3464           0 :         if (so == NULL)
    3465             :                 goto resetandabort;
    3466             : 
    3467           0 :         oldinp = sotoinpcb(oso);
    3468           0 :         inp = sotoinpcb(so);
    3469             : 
    3470             : #ifdef IPSEC
    3471             :         /*
    3472             :          * We need to copy the required security levels
    3473             :          * from the old pcb. Ditto for any other
    3474             :          * IPsec-related information.
    3475             :          */
    3476           0 :         memcpy(inp->inp_seclevel, oldinp->inp_seclevel,
    3477             :             sizeof(oldinp->inp_seclevel));
    3478             : #endif /* IPSEC */
    3479             : #ifdef INET6
    3480             :         /*
    3481             :          * inp still has the OLD in_pcb stuff, set the
    3482             :          * v6-related flags on the new guy, too.
    3483             :          */
    3484           0 :         inp->inp_flags |= (oldinp->inp_flags & INP_IPV6);
    3485           0 :         if (inp->inp_flags & INP_IPV6) {
    3486           0 :                 inp->inp_ipv6.ip6_hlim = oldinp->inp_ipv6.ip6_hlim;
    3487           0 :                 inp->inp_hops = oldinp->inp_hops;
    3488           0 :         } else
    3489             : #endif /* INET6 */
    3490             :         {
    3491           0 :                 inp->inp_ip.ip_ttl = oldinp->inp_ip.ip_ttl;
    3492             :         }
    3493             : 
    3494             : #if NPF > 0
    3495           0 :         if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
    3496             :                 struct pf_divert *divert;
    3497             : 
    3498           0 :                 divert = pf_find_divert(m);
    3499           0 :                 KASSERT(divert != NULL);
    3500           0 :                 inp->inp_rtableid = divert->rdomain;
    3501           0 :         } else
    3502             : #endif
    3503             :         /* inherit rtable from listening socket */
    3504           0 :         inp->inp_rtableid = sc->sc_rtableid;
    3505             : 
    3506           0 :         inp->inp_lport = th->th_dport;
    3507           0 :         switch (src->sa_family) {
    3508             : #ifdef INET6
    3509             :         case AF_INET6:
    3510           0 :                 inp->inp_laddr6 = satosin6(dst)->sin6_addr;
    3511           0 :                 break;
    3512             : #endif /* INET6 */
    3513             :         case AF_INET:
    3514           0 :                 inp->inp_laddr = satosin(dst)->sin_addr;
    3515           0 :                 inp->inp_options = ip_srcroute(m);
    3516           0 :                 if (inp->inp_options == NULL) {
    3517           0 :                         inp->inp_options = sc->sc_ipopts;
    3518           0 :                         sc->sc_ipopts = NULL;
    3519           0 :                 }
    3520             :                 break;
    3521             :         }
    3522           0 :         in_pcbrehash(inp);
    3523             : 
    3524             :         /*
    3525             :          * Give the new socket our cached route reference.
    3526             :          */
    3527           0 :         if (src->sa_family == AF_INET)
    3528           0 :                 inp->inp_route = sc->sc_route4;         /* struct assignment */
    3529             : #ifdef INET6
    3530             :         else
    3531           0 :                 inp->inp_route6 = sc->sc_route6;
    3532             : #endif
    3533           0 :         sc->sc_route4.ro_rt = NULL;
    3534             : 
    3535           0 :         am = m_get(M_DONTWAIT, MT_SONAME);      /* XXX */
    3536           0 :         if (am == NULL)
    3537             :                 goto resetandabort;
    3538           0 :         am->m_len = src->sa_len;
    3539           0 :         memcpy(mtod(am, caddr_t), src, src->sa_len);
    3540           0 :         if (in_pcbconnect(inp, am)) {
    3541             :                 (void) m_free(am);
    3542             :                 goto resetandabort;
    3543             :         }
    3544             :         (void) m_free(am);
    3545             : 
    3546           0 :         tp = intotcpcb(inp);
    3547           0 :         tp->t_flags = sototcpcb(oso)->t_flags & (TF_NOPUSH|TF_NODELAY);
    3548           0 :         if (sc->sc_request_r_scale != 15) {
    3549           0 :                 tp->requested_s_scale = sc->sc_requested_s_scale;
    3550           0 :                 tp->request_r_scale = sc->sc_request_r_scale;
    3551           0 :                 tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE;
    3552           0 :         }
    3553           0 :         if (sc->sc_flags & SCF_TIMESTAMP)
    3554           0 :                 tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP;
    3555             : 
    3556           0 :         tp->t_template = tcp_template(tp);
    3557           0 :         if (tp->t_template == 0) {
    3558           0 :                 tp = tcp_drop(tp, ENOBUFS);     /* destroys socket */
    3559             :                 so = NULL;
    3560           0 :                 goto abort;
    3561             :         }
    3562           0 :         tp->sack_enable = sc->sc_flags & SCF_SACK_PERMIT;
    3563           0 :         tp->ts_modulate = sc->sc_modulate;
    3564           0 :         tp->ts_recent = sc->sc_timestamp;
    3565           0 :         tp->iss = sc->sc_iss;
    3566           0 :         tp->irs = sc->sc_irs;
    3567           0 :         tcp_sendseqinit(tp);
    3568           0 :         tp->snd_last = tp->snd_una;
    3569             : #ifdef TCP_ECN
    3570           0 :         if (sc->sc_flags & SCF_ECN_PERMIT) {
    3571           0 :                 tp->t_flags |= TF_ECN_PERMIT;
    3572           0 :                 tcpstat_inc(tcps_ecn_accepts);
    3573           0 :         }
    3574             : #endif
    3575           0 :         if (sc->sc_flags & SCF_SACK_PERMIT)
    3576           0 :                 tp->t_flags |= TF_SACK_PERMIT;
    3577             : #ifdef TCP_SIGNATURE
    3578           0 :         if (sc->sc_flags & SCF_SIGNATURE)
    3579           0 :                 tp->t_flags |= TF_SIGNATURE;
    3580             : #endif
    3581           0 :         tcp_rcvseqinit(tp);
    3582           0 :         tp->t_state = TCPS_SYN_RECEIVED;
    3583           0 :         tp->t_rcvtime = tcp_now;
    3584           0 :         TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
    3585           0 :         tcpstat_inc(tcps_accepts);
    3586             : 
    3587           0 :         tcp_mss(tp, sc->sc_peermaxseg);       /* sets t_maxseg */
    3588           0 :         if (sc->sc_peermaxseg)
    3589           0 :                 tcp_mss_update(tp);
    3590             :         /* Reset initial window to 1 segment for retransmit */
    3591           0 :         if (sc->sc_rxtshift > 0)
    3592           0 :                 tp->snd_cwnd = tp->t_maxseg;
    3593           0 :         tp->snd_wl1 = sc->sc_irs;
    3594           0 :         tp->rcv_up = sc->sc_irs + 1;
    3595             : 
    3596             :         /*
    3597             :          * This is what whould have happened in tcp_output() when
    3598             :          * the SYN,ACK was sent.
    3599             :          */
    3600           0 :         tp->snd_up = tp->snd_una;
    3601           0 :         tp->snd_max = tp->snd_nxt = tp->iss+1;
    3602           0 :         TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
    3603           0 :         if (sc->sc_win > 0 && SEQ_GT(tp->rcv_nxt + sc->sc_win, tp->rcv_adv))
    3604           0 :                 tp->rcv_adv = tp->rcv_nxt + sc->sc_win;
    3605           0 :         tp->last_ack_sent = tp->rcv_nxt;
    3606             : 
    3607           0 :         tcpstat_inc(tcps_sc_completed);
    3608           0 :         syn_cache_put(sc);
    3609           0 :         return (so);
    3610             : 
    3611             : resetandabort:
    3612           0 :         tcp_respond(NULL, mtod(m, caddr_t), th, (tcp_seq)0, th->th_ack, TH_RST,
    3613           0 :             m->m_pkthdr.ph_rtableid);
    3614             : abort:
    3615           0 :         m_freem(m);
    3616           0 :         if (so != NULL)
    3617           0 :                 (void) soabort(so);
    3618           0 :         syn_cache_put(sc);
    3619           0 :         tcpstat_inc(tcps_sc_aborted);
    3620           0 :         return ((struct socket *)(-1));
    3621           0 : }
    3622             : 
    3623             : /*
    3624             :  * This function is called when we get a RST for a
    3625             :  * non-existent connection, so that we can see if the
    3626             :  * connection is in the syn cache.  If it is, zap it.
    3627             :  */
    3628             : 
    3629             : void
    3630           0 : syn_cache_reset(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
    3631             :     u_int rtableid)
    3632             : {
    3633             :         struct syn_cache *sc;
    3634           0 :         struct syn_cache_head *scp;
    3635             : 
    3636           0 :         NET_ASSERT_LOCKED();
    3637             : 
    3638           0 :         if ((sc = syn_cache_lookup(src, dst, &scp, rtableid)) == NULL)
    3639           0 :                 return;
    3640           0 :         if (SEQ_LT(th->th_seq, sc->sc_irs) ||
    3641           0 :             SEQ_GT(th->th_seq, sc->sc_irs + 1))
    3642           0 :                 return;
    3643           0 :         syn_cache_rm(sc);
    3644           0 :         tcpstat_inc(tcps_sc_reset);
    3645           0 :         syn_cache_put(sc);
    3646           0 : }
    3647             : 
    3648             : void
    3649           0 : syn_cache_unreach(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
    3650             :     u_int rtableid)
    3651             : {
    3652             :         struct syn_cache *sc;
    3653           0 :         struct syn_cache_head *scp;
    3654             : 
    3655           0 :         NET_ASSERT_LOCKED();
    3656             : 
    3657           0 :         if ((sc = syn_cache_lookup(src, dst, &scp, rtableid)) == NULL)
    3658           0 :                 return;
    3659             :         /* If the sequence number != sc_iss, then it's a bogus ICMP msg */
    3660           0 :         if (ntohl (th->th_seq) != sc->sc_iss) {
    3661           0 :                 return;
    3662             :         }
    3663             : 
    3664             :         /*
    3665             :          * If we've retransmitted 3 times and this is our second error,
    3666             :          * we remove the entry.  Otherwise, we allow it to continue on.
    3667             :          * This prevents us from incorrectly nuking an entry during a
    3668             :          * spurious network outage.
    3669             :          *
    3670             :          * See tcp_notify().
    3671             :          */
    3672           0 :         if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxtshift < 3) {
    3673           0 :                 sc->sc_flags |= SCF_UNREACH;
    3674           0 :                 return;
    3675             :         }
    3676             : 
    3677           0 :         syn_cache_rm(sc);
    3678           0 :         tcpstat_inc(tcps_sc_unreach);
    3679           0 :         syn_cache_put(sc);
    3680           0 : }
    3681             : 
    3682             : /*
    3683             :  * Given a LISTEN socket and an inbound SYN request, add
    3684             :  * this to the syn cache, and send back a segment:
    3685             :  *      <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
    3686             :  * to the source.
    3687             :  *
    3688             :  * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN.
    3689             :  * Doing so would require that we hold onto the data and deliver it
    3690             :  * to the application.  However, if we are the target of a SYN-flood
    3691             :  * DoS attack, an attacker could send data which would eventually
    3692             :  * consume all available buffer space if it were ACKed.  By not ACKing
    3693             :  * the data, we avoid this DoS scenario.
    3694             :  */
    3695             : 
    3696             : int
    3697           0 : syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
    3698             :     u_int iphlen, struct socket *so, struct mbuf *m, u_char *optp, int optlen,
    3699             :     struct tcp_opt_info *oi, tcp_seq *issp)
    3700             : {
    3701           0 :         struct tcpcb tb, *tp;
    3702             :         long win;
    3703             :         struct syn_cache *sc;
    3704           0 :         struct syn_cache_head *scp;
    3705             :         struct mbuf *ipopts;
    3706             : 
    3707           0 :         tp = sototcpcb(so);
    3708             : 
    3709             :         /*
    3710             :          * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
    3711             :          *
    3712             :          * Note this check is performed in tcp_input() very early on.
    3713             :          */
    3714             : 
    3715             :         /*
    3716             :          * Initialize some local state.
    3717             :          */
    3718           0 :         win = sbspace(so, &so->so_rcv);
    3719           0 :         if (win > TCP_MAXWIN)
    3720             :                 win = TCP_MAXWIN;
    3721             : 
    3722           0 :         bzero(&tb, sizeof(tb));
    3723             : #ifdef TCP_SIGNATURE
    3724           0 :         if (optp || (tp->t_flags & TF_SIGNATURE)) {
    3725             : #else
    3726             :         if (optp) {
    3727             : #endif
    3728           0 :                 tb.pf = tp->pf;
    3729           0 :                 tb.sack_enable = tp->sack_enable;
    3730           0 :                 tb.t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
    3731             : #ifdef TCP_SIGNATURE
    3732           0 :                 if (tp->t_flags & TF_SIGNATURE)
    3733           0 :                         tb.t_flags |= TF_SIGNATURE;
    3734             : #endif
    3735           0 :                 tb.t_state = TCPS_LISTEN;
    3736           0 :                 if (tcp_dooptions(&tb, optp, optlen, th, m, iphlen, oi,
    3737           0 :                     sotoinpcb(so)->inp_rtableid))
    3738           0 :                         return (-1);
    3739             :         }
    3740             : 
    3741           0 :         switch (src->sa_family) {
    3742             :         case AF_INET:
    3743             :                 /*
    3744             :                  * Remember the IP options, if any.
    3745             :                  */
    3746           0 :                 ipopts = ip_srcroute(m);
    3747           0 :                 break;
    3748             :         default:
    3749             :                 ipopts = NULL;
    3750           0 :         }
    3751             : 
    3752             :         /*
    3753             :          * See if we already have an entry for this connection.
    3754             :          * If we do, resend the SYN,ACK.  We do not count this
    3755             :          * as a retransmission (XXX though maybe we should).
    3756             :          */
    3757           0 :         sc = syn_cache_lookup(src, dst, &scp, sotoinpcb(so)->inp_rtableid);
    3758           0 :         if (sc != NULL) {
    3759           0 :                 tcpstat_inc(tcps_sc_dupesyn);
    3760           0 :                 if (ipopts) {
    3761             :                         /*
    3762             :                          * If we were remembering a previous source route,
    3763             :                          * forget it and use the new one we've been given.
    3764             :                          */
    3765           0 :                         m_free(sc->sc_ipopts);
    3766           0 :                         sc->sc_ipopts = ipopts;
    3767           0 :                 }
    3768           0 :                 sc->sc_timestamp = tb.ts_recent;
    3769           0 :                 if (syn_cache_respond(sc, m) == 0) {
    3770           0 :                         tcpstat_inc(tcps_sndacks);
    3771           0 :                         tcpstat_inc(tcps_sndtotal);
    3772           0 :                 }
    3773           0 :                 return (0);
    3774             :         }
    3775             : 
    3776           0 :         sc = pool_get(&syn_cache_pool, PR_NOWAIT|PR_ZERO);
    3777           0 :         if (sc == NULL) {
    3778           0 :                 m_free(ipopts);
    3779           0 :                 return (-1);
    3780             :         }
    3781             : 
    3782             :         /*
    3783             :          * Fill in the cache, and put the necessary IP and TCP
    3784             :          * options into the reply.
    3785             :          */
    3786           0 :         memcpy(&sc->sc_src, src, src->sa_len);
    3787           0 :         memcpy(&sc->sc_dst, dst, dst->sa_len);
    3788           0 :         sc->sc_rtableid = sotoinpcb(so)->inp_rtableid;
    3789           0 :         sc->sc_flags = 0;
    3790           0 :         sc->sc_ipopts = ipopts;
    3791           0 :         sc->sc_irs = th->th_seq;
    3792             : 
    3793           0 :         sc->sc_iss = issp ? *issp : arc4random();
    3794           0 :         sc->sc_peermaxseg = oi->maxseg;
    3795           0 :         sc->sc_ourmaxseg = tcp_mss_adv(m, sc->sc_src.sa.sa_family);
    3796           0 :         sc->sc_win = win;
    3797           0 :         sc->sc_timestamp = tb.ts_recent;
    3798           0 :         if ((tb.t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP)) ==
    3799             :             (TF_REQ_TSTMP|TF_RCVD_TSTMP)) {
    3800           0 :                 sc->sc_flags |= SCF_TIMESTAMP;
    3801           0 :                 sc->sc_modulate = arc4random();
    3802           0 :         }
    3803           0 :         if ((tb.t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
    3804             :             (TF_RCVD_SCALE|TF_REQ_SCALE)) {
    3805           0 :                 sc->sc_requested_s_scale = tb.requested_s_scale;
    3806           0 :                 sc->sc_request_r_scale = 0;
    3807             :                 /*
    3808             :                  * Pick the smallest possible scaling factor that
    3809             :                  * will still allow us to scale up to sb_max.
    3810             :                  *
    3811             :                  * We do this because there are broken firewalls that
    3812             :                  * will corrupt the window scale option, leading to
    3813             :                  * the other endpoint believing that our advertised
    3814             :                  * window is unscaled.  At scale factors larger than
    3815             :                  * 5 the unscaled window will drop below 1500 bytes,
    3816             :                  * leading to serious problems when traversing these
    3817             :                  * broken firewalls.
    3818             :                  *
    3819             :                  * With the default sbmax of 256K, a scale factor
    3820             :                  * of 3 will be chosen by this algorithm.  Those who
    3821             :                  * choose a larger sbmax should watch out
    3822             :                  * for the compatiblity problems mentioned above.
    3823             :                  *
    3824             :                  * RFC1323: The Window field in a SYN (i.e., a <SYN>
    3825             :                  * or <SYN,ACK>) segment itself is never scaled.
    3826             :                  */
    3827           0 :                 while (sc->sc_request_r_scale < TCP_MAX_WINSHIFT &&
    3828           0 :                     (TCP_MAXWIN << sc->sc_request_r_scale) < sb_max)
    3829           0 :                         sc->sc_request_r_scale++;
    3830             :         } else {
    3831           0 :                 sc->sc_requested_s_scale = 15;
    3832           0 :                 sc->sc_request_r_scale = 15;
    3833             :         }
    3834             : #ifdef TCP_ECN
    3835             :         /*
    3836             :          * if both ECE and CWR flag bits are set, peer is ECN capable.
    3837             :          */
    3838           0 :         if (tcp_do_ecn &&
    3839           0 :             (th->th_flags & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR))
    3840           0 :                 sc->sc_flags |= SCF_ECN_PERMIT;
    3841             : #endif
    3842             :         /*
    3843             :          * Set SCF_SACK_PERMIT if peer did send a SACK_PERMITTED option
    3844             :          * (i.e., if tcp_dooptions() did set TF_SACK_PERMIT).
    3845             :          */
    3846           0 :         if (tb.sack_enable && (tb.t_flags & TF_SACK_PERMIT))
    3847           0 :                 sc->sc_flags |= SCF_SACK_PERMIT;
    3848             : #ifdef TCP_SIGNATURE
    3849           0 :         if (tb.t_flags & TF_SIGNATURE)
    3850           0 :                 sc->sc_flags |= SCF_SIGNATURE;
    3851             : #endif
    3852           0 :         sc->sc_tp = tp;
    3853           0 :         if (syn_cache_respond(sc, m) == 0) {
    3854           0 :                 syn_cache_insert(sc, tp);
    3855           0 :                 tcpstat_inc(tcps_sndacks);
    3856           0 :                 tcpstat_inc(tcps_sndtotal);
    3857           0 :         } else {
    3858           0 :                 syn_cache_put(sc);
    3859           0 :                 tcpstat_inc(tcps_sc_dropped);
    3860             :         }
    3861             : 
    3862           0 :         return (0);
    3863           0 : }
    3864             : 
    3865             : int
    3866           0 : syn_cache_respond(struct syn_cache *sc, struct mbuf *m)
    3867             : {
    3868             :         u_int8_t *optp;
    3869             :         int optlen, error;
    3870             :         u_int16_t tlen;
    3871             :         struct ip *ip = NULL;
    3872             : #ifdef INET6
    3873             :         struct ip6_hdr *ip6 = NULL;
    3874             : #endif
    3875             :         struct tcphdr *th;
    3876             :         u_int hlen;
    3877             :         struct inpcb *inp;
    3878             : 
    3879           0 :         switch (sc->sc_src.sa.sa_family) {
    3880             :         case AF_INET:
    3881             :                 hlen = sizeof(struct ip);
    3882           0 :                 break;
    3883             : #ifdef INET6
    3884             :         case AF_INET6:
    3885             :                 hlen = sizeof(struct ip6_hdr);
    3886           0 :                 break;
    3887             : #endif
    3888             :         default:
    3889           0 :                 m_freem(m);
    3890           0 :                 return (EAFNOSUPPORT);
    3891             :         }
    3892             : 
    3893             :         /* Compute the size of the TCP options. */
    3894           0 :         optlen = 4 + (sc->sc_request_r_scale != 15 ? 4 : 0) +
    3895           0 :             ((sc->sc_flags & SCF_SACK_PERMIT) ? 4 : 0) +
    3896             : #ifdef TCP_SIGNATURE
    3897           0 :             ((sc->sc_flags & SCF_SIGNATURE) ? TCPOLEN_SIGLEN : 0) +
    3898             : #endif
    3899           0 :             ((sc->sc_flags & SCF_TIMESTAMP) ? TCPOLEN_TSTAMP_APPA : 0);
    3900             : 
    3901           0 :         tlen = hlen + sizeof(struct tcphdr) + optlen;
    3902             : 
    3903             :         /*
    3904             :          * Create the IP+TCP header from scratch.
    3905             :          */
    3906           0 :         m_freem(m);
    3907             : #ifdef DIAGNOSTIC
    3908           0 :         if (max_linkhdr + tlen > MCLBYTES)
    3909           0 :                 return (ENOBUFS);
    3910             : #endif
    3911           0 :         MGETHDR(m, M_DONTWAIT, MT_DATA);
    3912           0 :         if (m && max_linkhdr + tlen > MHLEN) {
    3913           0 :                 MCLGET(m, M_DONTWAIT);
    3914           0 :                 if ((m->m_flags & M_EXT) == 0) {
    3915           0 :                         m_freem(m);
    3916             :                         m = NULL;
    3917           0 :                 }
    3918             :         }
    3919           0 :         if (m == NULL)
    3920           0 :                 return (ENOBUFS);
    3921             : 
    3922             :         /* Fixup the mbuf. */
    3923           0 :         m->m_data += max_linkhdr;
    3924           0 :         m->m_len = m->m_pkthdr.len = tlen;
    3925           0 :         m->m_pkthdr.ph_ifidx = 0;
    3926           0 :         m->m_pkthdr.ph_rtableid = sc->sc_rtableid;
    3927           0 :         memset(mtod(m, u_char *), 0, tlen);
    3928             : 
    3929           0 :         switch (sc->sc_src.sa.sa_family) {
    3930             :         case AF_INET:
    3931           0 :                 ip = mtod(m, struct ip *);
    3932           0 :                 ip->ip_dst = sc->sc_src.sin.sin_addr;
    3933           0 :                 ip->ip_src = sc->sc_dst.sin.sin_addr;
    3934           0 :                 ip->ip_p = IPPROTO_TCP;
    3935           0 :                 th = (struct tcphdr *)(ip + 1);
    3936           0 :                 th->th_dport = sc->sc_src.sin.sin_port;
    3937           0 :                 th->th_sport = sc->sc_dst.sin.sin_port;
    3938           0 :                 break;
    3939             : #ifdef INET6
    3940             :         case AF_INET6:
    3941           0 :                 ip6 = mtod(m, struct ip6_hdr *);
    3942           0 :                 ip6->ip6_dst = sc->sc_src.sin6.sin6_addr;
    3943           0 :                 ip6->ip6_src = sc->sc_dst.sin6.sin6_addr;
    3944           0 :                 ip6->ip6_nxt = IPPROTO_TCP;
    3945             :                 /* ip6_plen will be updated in ip6_output() */
    3946           0 :                 th = (struct tcphdr *)(ip6 + 1);
    3947           0 :                 th->th_dport = sc->sc_src.sin6.sin6_port;
    3948           0 :                 th->th_sport = sc->sc_dst.sin6.sin6_port;
    3949           0 :                 break;
    3950             : #endif
    3951             :         default:
    3952           0 :                 unhandled_af(sc->sc_src.sa.sa_family);
    3953             :         }
    3954             : 
    3955           0 :         th->th_seq = htonl(sc->sc_iss);
    3956           0 :         th->th_ack = htonl(sc->sc_irs + 1);
    3957           0 :         th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
    3958           0 :         th->th_flags = TH_SYN|TH_ACK;
    3959             : #ifdef TCP_ECN
    3960             :         /* Set ECE for SYN-ACK if peer supports ECN. */
    3961           0 :         if (tcp_do_ecn && (sc->sc_flags & SCF_ECN_PERMIT))
    3962           0 :                 th->th_flags |= TH_ECE;
    3963             : #endif
    3964           0 :         th->th_win = htons(sc->sc_win);
    3965             :         /* th_sum already 0 */
    3966             :         /* th_urp already 0 */
    3967             : 
    3968             :         /* Tack on the TCP options. */
    3969           0 :         optp = (u_int8_t *)(th + 1);
    3970           0 :         *optp++ = TCPOPT_MAXSEG;
    3971           0 :         *optp++ = 4;
    3972           0 :         *optp++ = (sc->sc_ourmaxseg >> 8) & 0xff;
    3973           0 :         *optp++ = sc->sc_ourmaxseg & 0xff;
    3974             : 
    3975             :         /* Include SACK_PERMIT_HDR option if peer has already done so. */
    3976           0 :         if (sc->sc_flags & SCF_SACK_PERMIT) {
    3977           0 :                 *((u_int32_t *)optp) = htonl(TCPOPT_SACK_PERMIT_HDR);
    3978           0 :                 optp += 4;
    3979           0 :         }
    3980             : 
    3981           0 :         if (sc->sc_request_r_scale != 15) {
    3982           0 :                 *((u_int32_t *)optp) = htonl(TCPOPT_NOP << 24 |
    3983             :                     TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 |
    3984             :                     sc->sc_request_r_scale);
    3985           0 :                 optp += 4;
    3986           0 :         }
    3987             : 
    3988           0 :         if (sc->sc_flags & SCF_TIMESTAMP) {
    3989           0 :                 u_int32_t *lp = (u_int32_t *)(optp);
    3990             :                 /* Form timestamp option as shown in appendix A of RFC 1323. */
    3991           0 :                 *lp++ = htonl(TCPOPT_TSTAMP_HDR);
    3992           0 :                 *lp++ = htonl(SYN_CACHE_TIMESTAMP(sc));
    3993           0 :                 *lp   = htonl(sc->sc_timestamp);
    3994           0 :                 optp += TCPOLEN_TSTAMP_APPA;
    3995           0 :         }
    3996             : 
    3997             : #ifdef TCP_SIGNATURE
    3998           0 :         if (sc->sc_flags & SCF_SIGNATURE) {
    3999           0 :                 union sockaddr_union src, dst;
    4000             :                 struct tdb *tdb;
    4001             : 
    4002           0 :                 bzero(&src, sizeof(union sockaddr_union));
    4003           0 :                 bzero(&dst, sizeof(union sockaddr_union));
    4004           0 :                 src.sa.sa_len = sc->sc_src.sa.sa_len;
    4005           0 :                 src.sa.sa_family = sc->sc_src.sa.sa_family;
    4006           0 :                 dst.sa.sa_len = sc->sc_dst.sa.sa_len;
    4007           0 :                 dst.sa.sa_family = sc->sc_dst.sa.sa_family;
    4008             : 
    4009           0 :                 switch (sc->sc_src.sa.sa_family) {
    4010             :                 case 0: /*default to PF_INET*/
    4011             :                 case AF_INET:
    4012           0 :                         src.sin.sin_addr = mtod(m, struct ip *)->ip_src;
    4013           0 :                         dst.sin.sin_addr = mtod(m, struct ip *)->ip_dst;
    4014           0 :                         break;
    4015             : #ifdef INET6
    4016             :                 case AF_INET6:
    4017           0 :                         src.sin6.sin6_addr = mtod(m, struct ip6_hdr *)->ip6_src;
    4018           0 :                         dst.sin6.sin6_addr = mtod(m, struct ip6_hdr *)->ip6_dst;
    4019           0 :                         break;
    4020             : #endif /* INET6 */
    4021             :                 }
    4022             : 
    4023           0 :                 tdb = gettdbbysrcdst(rtable_l2(sc->sc_rtableid),
    4024             :                     0, &src, &dst, IPPROTO_TCP);
    4025           0 :                 if (tdb == NULL) {
    4026           0 :                         m_freem(m);
    4027           0 :                         return (EPERM);
    4028             :                 }
    4029             : 
    4030             :                 /* Send signature option */
    4031           0 :                 *(optp++) = TCPOPT_SIGNATURE;
    4032           0 :                 *(optp++) = TCPOLEN_SIGNATURE;
    4033             : 
    4034           0 :                 if (tcp_signature(tdb, sc->sc_src.sa.sa_family, m, th,
    4035           0 :                     hlen, 0, optp) < 0) {
    4036           0 :                         m_freem(m);
    4037           0 :                         return (EINVAL);
    4038             :                 }
    4039           0 :                 optp += 16;
    4040             : 
    4041             :                 /* Pad options list to the next 32 bit boundary and
    4042             :                  * terminate it.
    4043             :                  */
    4044           0 :                 *optp++ = TCPOPT_NOP;
    4045           0 :                 *optp++ = TCPOPT_EOL;
    4046           0 :         }
    4047             : #endif /* TCP_SIGNATURE */
    4048             : 
    4049             :         /* Compute the packet's checksum. */
    4050           0 :         switch (sc->sc_src.sa.sa_family) {
    4051             :         case AF_INET:
    4052           0 :                 ip->ip_len = htons(tlen - hlen);
    4053           0 :                 th->th_sum = 0;
    4054           0 :                 th->th_sum = in_cksum(m, tlen);
    4055           0 :                 break;
    4056             : #ifdef INET6
    4057             :         case AF_INET6:
    4058           0 :                 ip6->ip6_plen = htons(tlen - hlen);
    4059           0 :                 th->th_sum = 0;
    4060           0 :                 th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen, tlen - hlen);
    4061           0 :                 break;
    4062             : #endif
    4063             :         }
    4064             : 
    4065             :         /* use IPsec policy and ttl from listening socket, on SYN ACK */
    4066           0 :         inp = sc->sc_tp ? sc->sc_tp->t_inpcb : NULL;
    4067             : 
    4068             :         /*
    4069             :          * Fill in some straggling IP bits.  Note the stack expects
    4070             :          * ip_len to be in host order, for convenience.
    4071             :          */
    4072           0 :         switch (sc->sc_src.sa.sa_family) {
    4073             :         case AF_INET:
    4074           0 :                 ip->ip_len = htons(tlen);
    4075           0 :                 ip->ip_ttl = inp ? inp->inp_ip.ip_ttl : ip_defttl;
    4076           0 :                 if (inp != NULL)
    4077           0 :                         ip->ip_tos = inp->inp_ip.ip_tos;
    4078             :                 break;
    4079             : #ifdef INET6
    4080             :         case AF_INET6:
    4081           0 :                 ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
    4082           0 :                 ip6->ip6_vfc |= IPV6_VERSION;
    4083           0 :                 ip6->ip6_plen = htons(tlen - hlen);
    4084             :                 /* ip6_hlim will be initialized afterwards */
    4085             :                 /* leave flowlabel = 0, it is legal and require no state mgmt */
    4086           0 :                 break;
    4087             : #endif
    4088             :         }
    4089             : 
    4090           0 :         switch (sc->sc_src.sa.sa_family) {
    4091             :         case AF_INET:
    4092           0 :                 error = ip_output(m, sc->sc_ipopts, &sc->sc_route4,
    4093           0 :                     (ip_mtudisc ? IP_MTUDISC : 0),  NULL, inp, 0);
    4094           0 :                 break;
    4095             : #ifdef INET6
    4096             :         case AF_INET6:
    4097           0 :                 ip6->ip6_hlim = in6_selecthlim(inp);
    4098             : 
    4099           0 :                 error = ip6_output(m, NULL /*XXX*/, &sc->sc_route6, 0,
    4100             :                     NULL, NULL);
    4101           0 :                 break;
    4102             : #endif
    4103             :         default:
    4104             :                 error = EAFNOSUPPORT;
    4105           0 :                 break;
    4106             :         }
    4107           0 :         return (error);
    4108           0 : }

Generated by: LCOV version 1.13