Line data Source code
1 : /* $OpenBSD: tcp_usrreq.c,v 1.169 2018/06/11 07:40:26 bluhm Exp $ */
2 : /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */
3 :
4 : /*
5 : * Copyright (c) 1982, 1986, 1988, 1993
6 : * The Regents of the University of California. All rights reserved.
7 : *
8 : * Redistribution and use in source and binary forms, with or without
9 : * modification, are permitted provided that the following conditions
10 : * are met:
11 : * 1. Redistributions of source code must retain the above copyright
12 : * notice, this list of conditions and the following disclaimer.
13 : * 2. Redistributions in binary form must reproduce the above copyright
14 : * notice, this list of conditions and the following disclaimer in the
15 : * documentation and/or other materials provided with the distribution.
16 : * 3. Neither the name of the University nor the names of its contributors
17 : * may be used to endorse or promote products derived from this software
18 : * without specific prior written permission.
19 : *
20 : * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 : * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 : * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 : * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 : * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 : * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 : * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 : * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 : * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 : * SUCH DAMAGE.
31 : *
32 : * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995
33 : *
34 : * NRL grants permission for redistribution and use in source and binary
35 : * forms, with or without modification, of the software and documentation
36 : * created at NRL provided that the following conditions are met:
37 : *
38 : * 1. Redistributions of source code must retain the above copyright
39 : * notice, this list of conditions and the following disclaimer.
40 : * 2. Redistributions in binary form must reproduce the above copyright
41 : * notice, this list of conditions and the following disclaimer in the
42 : * documentation and/or other materials provided with the distribution.
43 : * 3. All advertising materials mentioning features or use of this software
44 : * must display the following acknowledgements:
45 : * This product includes software developed by the University of
46 : * California, Berkeley and its contributors.
47 : * This product includes software developed at the Information
48 : * Technology Division, US Naval Research Laboratory.
49 : * 4. Neither the name of the NRL nor the names of its contributors
50 : * may be used to endorse or promote products derived from this software
51 : * without specific prior written permission.
52 : *
53 : * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54 : * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 : * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56 : * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
57 : * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58 : * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59 : * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60 : * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61 : * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62 : * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63 : * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64 : *
65 : * The views and conclusions contained in the software and documentation
66 : * are those of the authors and should not be interpreted as representing
67 : * official policies, either expressed or implied, of the US Naval
68 : * Research Laboratory (NRL).
69 : */
70 :
71 : #include <sys/param.h>
72 : #include <sys/systm.h>
73 : #include <sys/mbuf.h>
74 : #include <sys/socket.h>
75 : #include <sys/socketvar.h>
76 : #include <sys/protosw.h>
77 : #include <sys/stat.h>
78 : #include <sys/sysctl.h>
79 : #include <sys/domain.h>
80 : #include <sys/kernel.h>
81 : #include <sys/pool.h>
82 :
83 : #include <net/if.h>
84 : #include <net/if_var.h>
85 : #include <net/route.h>
86 :
87 : #include <netinet/in.h>
88 : #include <netinet/in_var.h>
89 : #include <netinet/ip.h>
90 : #include <netinet/in_pcb.h>
91 : #include <netinet/ip_var.h>
92 : #include <netinet/tcp.h>
93 : #include <netinet/tcp_fsm.h>
94 : #include <netinet/tcp_seq.h>
95 : #include <netinet/tcp_timer.h>
96 : #include <netinet/tcp_var.h>
97 : #include <netinet/tcp_debug.h>
98 :
99 : #ifdef INET6
100 : #include <netinet6/in6_var.h>
101 : #endif
102 :
103 : #ifndef TCP_SENDSPACE
104 : #define TCP_SENDSPACE 1024*16
105 : #endif
106 : u_int tcp_sendspace = TCP_SENDSPACE;
107 : #ifndef TCP_RECVSPACE
108 : #define TCP_RECVSPACE 1024*16
109 : #endif
110 : u_int tcp_recvspace = TCP_RECVSPACE;
111 : u_int tcp_autorcvbuf_inc = 16 * 1024;
112 :
113 : int *tcpctl_vars[TCPCTL_MAXID] = TCPCTL_VARS;
114 :
115 : struct inpcbtable tcbtable;
116 :
117 : int tcp_ident(void *, size_t *, void *, size_t, int);
118 :
119 : /*
120 : * Process a TCP user request for TCP tb. If this is a send request
121 : * then m is the mbuf chain of send data. If this is a timer expiration
122 : * (called from the software clock routine), then timertype tells which timer.
123 : */
124 : /*ARGSUSED*/
125 : int
126 0 : tcp_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
127 : struct mbuf *control, struct proc *p)
128 : {
129 : struct inpcb *inp;
130 : struct tcpcb *otp = NULL, *tp = NULL;
131 : int error = 0;
132 : short ostate;
133 :
134 0 : if (req == PRU_CONTROL) {
135 : #ifdef INET6
136 0 : if (sotopf(so) == PF_INET6)
137 0 : return in6_control(so, (u_long)m, (caddr_t)nam,
138 : (struct ifnet *)control);
139 : else
140 : #endif /* INET6 */
141 0 : return (in_control(so, (u_long)m, (caddr_t)nam,
142 : (struct ifnet *)control));
143 : }
144 :
145 0 : soassertlocked(so);
146 :
147 0 : if (control && control->m_len) {
148 0 : m_freem(control);
149 0 : m_freem(m);
150 0 : return (EINVAL);
151 : }
152 :
153 0 : inp = sotoinpcb(so);
154 : /*
155 : * When a TCP is attached to a socket, then there will be
156 : * a (struct inpcb) pointed at by the socket, and this
157 : * structure will point at a subsidiary (struct tcpcb).
158 : */
159 0 : if (inp == NULL) {
160 0 : error = so->so_error;
161 0 : if (error == 0)
162 : error = EINVAL;
163 : /*
164 : * The following corrects an mbuf leak under rare
165 : * circumstances
166 : */
167 0 : if (req == PRU_SEND || req == PRU_SENDOOB)
168 0 : m_freem(m);
169 0 : return (error);
170 : }
171 0 : tp = intotcpcb(inp);
172 : /* tp might get 0 when using socket splicing */
173 0 : if (tp == NULL)
174 0 : return (0);
175 0 : if (so->so_options & SO_DEBUG) {
176 : otp = tp;
177 0 : ostate = tp->t_state;
178 0 : }
179 :
180 0 : switch (req) {
181 :
182 : /*
183 : * Give the socket an address.
184 : */
185 : case PRU_BIND:
186 0 : error = in_pcbbind(inp, nam, p);
187 0 : break;
188 :
189 : /*
190 : * Prepare to accept connections.
191 : */
192 : case PRU_LISTEN:
193 0 : if (inp->inp_lport == 0)
194 0 : error = in_pcbbind(inp, NULL, p);
195 : /* If the in_pcbbind() above is called, the tp->pf
196 : should still be whatever it was before. */
197 0 : if (error == 0)
198 0 : tp->t_state = TCPS_LISTEN;
199 : break;
200 :
201 : /*
202 : * Initiate connection to peer.
203 : * Create a template for use in transmissions on this connection.
204 : * Enter SYN_SENT state, and mark socket as connecting.
205 : * Start keep-alive timer, and seed output sequence space.
206 : * Send initial segment on connection.
207 : */
208 : case PRU_CONNECT:
209 : #ifdef INET6
210 0 : if (inp->inp_flags & INP_IPV6) {
211 0 : struct sockaddr_in6 *sin6;
212 :
213 0 : if ((error = in6_nam2sin6(nam, &sin6)))
214 0 : break;
215 0 : if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
216 0 : IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
217 : error = EINVAL;
218 0 : break;
219 : }
220 0 : error = in6_pcbconnect(inp, nam);
221 0 : } else
222 : #endif /* INET6 */
223 : {
224 0 : struct sockaddr_in *sin;
225 :
226 0 : if ((error = in_nam2sin(nam, &sin)))
227 0 : break;
228 0 : if ((sin->sin_addr.s_addr == INADDR_ANY) ||
229 0 : (sin->sin_addr.s_addr == INADDR_BROADCAST) ||
230 0 : IN_MULTICAST(sin->sin_addr.s_addr) ||
231 0 : in_broadcast(sin->sin_addr, inp->inp_rtableid)) {
232 : error = EINVAL;
233 0 : break;
234 : }
235 0 : error = in_pcbconnect(inp, nam);
236 0 : }
237 0 : if (error)
238 : break;
239 :
240 0 : tp->t_template = tcp_template(tp);
241 0 : if (tp->t_template == 0) {
242 0 : in_pcbdisconnect(inp);
243 : error = ENOBUFS;
244 0 : break;
245 : }
246 :
247 0 : so->so_state |= SS_CONNECTOUT;
248 :
249 : /* Compute window scaling to request. */
250 0 : tcp_rscale(tp, sb_max);
251 :
252 0 : soisconnecting(so);
253 0 : tcpstat_inc(tcps_connattempt);
254 0 : tp->t_state = TCPS_SYN_SENT;
255 0 : TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
256 0 : tcp_set_iss_tsm(tp);
257 0 : tcp_sendseqinit(tp);
258 0 : tp->snd_last = tp->snd_una;
259 0 : error = tcp_output(tp);
260 0 : break;
261 :
262 : /*
263 : * Create a TCP connection between two sockets.
264 : */
265 : case PRU_CONNECT2:
266 : error = EOPNOTSUPP;
267 0 : break;
268 :
269 : /*
270 : * Initiate disconnect from peer.
271 : * If connection never passed embryonic stage, just drop;
272 : * else if don't need to let data drain, then can just drop anyways,
273 : * else have to begin TCP shutdown process: mark socket disconnecting,
274 : * drain unread data, state switch to reflect user close, and
275 : * send segment (e.g. FIN) to peer. Socket will be really disconnected
276 : * when peer sends FIN and acks ours.
277 : *
278 : * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
279 : */
280 : case PRU_DISCONNECT:
281 0 : tp = tcp_disconnect(tp);
282 0 : break;
283 :
284 : /*
285 : * Accept a connection. Essentially all the work is
286 : * done at higher levels; just return the address
287 : * of the peer, storing through addr.
288 : */
289 : case PRU_ACCEPT:
290 : #ifdef INET6
291 0 : if (inp->inp_flags & INP_IPV6)
292 0 : in6_setpeeraddr(inp, nam);
293 : else
294 : #endif
295 0 : in_setpeeraddr(inp, nam);
296 : break;
297 :
298 : /*
299 : * Mark the connection as being incapable of further output.
300 : */
301 : case PRU_SHUTDOWN:
302 0 : if (so->so_state & SS_CANTSENDMORE)
303 : break;
304 0 : socantsendmore(so);
305 0 : tp = tcp_usrclosed(tp);
306 0 : if (tp)
307 0 : error = tcp_output(tp);
308 : break;
309 :
310 : /*
311 : * After a receive, possibly send window update to peer.
312 : */
313 : case PRU_RCVD:
314 : /*
315 : * soreceive() calls this function when a user receives
316 : * ancillary data on a listening socket. We don't call
317 : * tcp_output in such a case, since there is no header
318 : * template for a listening socket and hence the kernel
319 : * will panic.
320 : */
321 0 : if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0)
322 0 : (void) tcp_output(tp);
323 : break;
324 :
325 : /*
326 : * Do a send by putting data in output queue and updating urgent
327 : * marker if URG set. Possibly send more data.
328 : */
329 : case PRU_SEND:
330 0 : sbappendstream(so, &so->so_snd, m);
331 0 : error = tcp_output(tp);
332 0 : break;
333 :
334 : /*
335 : * Abort the TCP.
336 : */
337 : case PRU_ABORT:
338 0 : tp = tcp_drop(tp, ECONNABORTED);
339 0 : break;
340 :
341 : case PRU_SENSE:
342 0 : ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
343 0 : return (0);
344 :
345 : case PRU_RCVOOB:
346 0 : if ((so->so_oobmark == 0 &&
347 0 : (so->so_state & SS_RCVATMARK) == 0) ||
348 0 : so->so_options & SO_OOBINLINE ||
349 0 : tp->t_oobflags & TCPOOB_HADDATA) {
350 : error = EINVAL;
351 0 : break;
352 : }
353 0 : if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
354 : error = EWOULDBLOCK;
355 0 : break;
356 : }
357 0 : m->m_len = 1;
358 0 : *mtod(m, caddr_t) = tp->t_iobc;
359 0 : if (((long)nam & MSG_PEEK) == 0)
360 0 : tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
361 : break;
362 :
363 : case PRU_SENDOOB:
364 0 : if (sbspace(so, &so->so_snd) < -512) {
365 0 : m_freem(m);
366 : error = ENOBUFS;
367 0 : break;
368 : }
369 : /*
370 : * According to RFC961 (Assigned Protocols),
371 : * the urgent pointer points to the last octet
372 : * of urgent data. We continue, however,
373 : * to consider it to indicate the first octet
374 : * of data past the urgent section.
375 : * Otherwise, snd_up should be one lower.
376 : */
377 0 : sbappendstream(so, &so->so_snd, m);
378 0 : tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
379 0 : tp->t_force = 1;
380 0 : error = tcp_output(tp);
381 0 : tp->t_force = 0;
382 0 : break;
383 :
384 : case PRU_SOCKADDR:
385 : #ifdef INET6
386 0 : if (inp->inp_flags & INP_IPV6)
387 0 : in6_setsockaddr(inp, nam);
388 : else
389 : #endif
390 0 : in_setsockaddr(inp, nam);
391 : break;
392 :
393 : case PRU_PEERADDR:
394 : #ifdef INET6
395 0 : if (inp->inp_flags & INP_IPV6)
396 0 : in6_setpeeraddr(inp, nam);
397 : else
398 : #endif
399 0 : in_setpeeraddr(inp, nam);
400 : break;
401 :
402 : default:
403 0 : panic("tcp_usrreq");
404 : }
405 0 : if (otp)
406 0 : tcp_trace(TA_USER, ostate, tp, otp, NULL, req, 0);
407 0 : return (error);
408 0 : }
409 :
410 : int
411 0 : tcp_ctloutput(int op, struct socket *so, int level, int optname,
412 : struct mbuf *m)
413 : {
414 : int error = 0;
415 : struct inpcb *inp;
416 : struct tcpcb *tp;
417 : int i;
418 :
419 0 : inp = sotoinpcb(so);
420 0 : if (inp == NULL)
421 0 : return (ECONNRESET);
422 0 : if (level != IPPROTO_TCP) {
423 0 : switch (so->so_proto->pr_domain->dom_family) {
424 : #ifdef INET6
425 : case PF_INET6:
426 0 : error = ip6_ctloutput(op, so, level, optname, m);
427 0 : break;
428 : #endif /* INET6 */
429 : case PF_INET:
430 0 : error = ip_ctloutput(op, so, level, optname, m);
431 0 : break;
432 : default:
433 : error = EAFNOSUPPORT; /*?*/
434 0 : break;
435 : }
436 0 : return (error);
437 : }
438 0 : tp = intotcpcb(inp);
439 :
440 0 : switch (op) {
441 :
442 : case PRCO_SETOPT:
443 0 : switch (optname) {
444 :
445 : case TCP_NODELAY:
446 0 : if (m == NULL || m->m_len < sizeof (int))
447 0 : error = EINVAL;
448 0 : else if (*mtod(m, int *))
449 0 : tp->t_flags |= TF_NODELAY;
450 : else
451 0 : tp->t_flags &= ~TF_NODELAY;
452 : break;
453 :
454 : case TCP_NOPUSH:
455 0 : if (m == NULL || m->m_len < sizeof (int))
456 0 : error = EINVAL;
457 0 : else if (*mtod(m, int *))
458 0 : tp->t_flags |= TF_NOPUSH;
459 0 : else if (tp->t_flags & TF_NOPUSH) {
460 0 : tp->t_flags &= ~TF_NOPUSH;
461 0 : if (TCPS_HAVEESTABLISHED(tp->t_state))
462 0 : error = tcp_output(tp);
463 : }
464 : break;
465 :
466 : case TCP_MAXSEG:
467 0 : if (m == NULL || m->m_len < sizeof (int)) {
468 : error = EINVAL;
469 0 : break;
470 : }
471 :
472 0 : i = *mtod(m, int *);
473 0 : if (i > 0 && i <= tp->t_maxseg)
474 0 : tp->t_maxseg = i;
475 : else
476 : error = EINVAL;
477 : break;
478 :
479 : case TCP_SACK_ENABLE:
480 0 : if (m == NULL || m->m_len < sizeof (int)) {
481 : error = EINVAL;
482 0 : break;
483 : }
484 :
485 0 : if (TCPS_HAVEESTABLISHED(tp->t_state)) {
486 : error = EPERM;
487 0 : break;
488 : }
489 :
490 0 : if (tp->t_flags & TF_SIGNATURE) {
491 : error = EPERM;
492 0 : break;
493 : }
494 :
495 0 : if (*mtod(m, int *))
496 0 : tp->sack_enable = 1;
497 : else
498 0 : tp->sack_enable = 0;
499 : break;
500 : #ifdef TCP_SIGNATURE
501 : case TCP_MD5SIG:
502 0 : if (m == NULL || m->m_len < sizeof (int)) {
503 : error = EINVAL;
504 0 : break;
505 : }
506 :
507 0 : if (TCPS_HAVEESTABLISHED(tp->t_state)) {
508 : error = EPERM;
509 0 : break;
510 : }
511 :
512 0 : if (*mtod(m, int *)) {
513 0 : tp->t_flags |= TF_SIGNATURE;
514 0 : tp->sack_enable = 0;
515 0 : } else
516 0 : tp->t_flags &= ~TF_SIGNATURE;
517 : break;
518 : #endif /* TCP_SIGNATURE */
519 : default:
520 : error = ENOPROTOOPT;
521 0 : break;
522 : }
523 : break;
524 :
525 : case PRCO_GETOPT:
526 0 : m->m_len = sizeof(int);
527 :
528 0 : switch (optname) {
529 : case TCP_NODELAY:
530 0 : *mtod(m, int *) = tp->t_flags & TF_NODELAY;
531 0 : break;
532 : case TCP_NOPUSH:
533 0 : *mtod(m, int *) = tp->t_flags & TF_NOPUSH;
534 0 : break;
535 : case TCP_MAXSEG:
536 0 : *mtod(m, int *) = tp->t_maxseg;
537 0 : break;
538 : case TCP_SACK_ENABLE:
539 0 : *mtod(m, int *) = tp->sack_enable;
540 0 : break;
541 : #ifdef TCP_SIGNATURE
542 : case TCP_MD5SIG:
543 0 : *mtod(m, int *) = tp->t_flags & TF_SIGNATURE;
544 0 : break;
545 : #endif
546 : default:
547 : error = ENOPROTOOPT;
548 0 : break;
549 : }
550 : break;
551 : }
552 0 : return (error);
553 0 : }
554 :
555 : /*
556 : * Attach TCP protocol to socket, allocating
557 : * internet protocol control block, tcp control block,
558 : * buffer space, and entering LISTEN state to accept connections.
559 : */
560 : int
561 0 : tcp_attach(struct socket *so, int proto)
562 : {
563 : struct tcpcb *tp;
564 : struct inpcb *inp;
565 : int error;
566 :
567 0 : if (so->so_pcb)
568 0 : return EISCONN;
569 0 : if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0 ||
570 0 : sbcheckreserve(so->so_snd.sb_wat, tcp_sendspace) ||
571 0 : sbcheckreserve(so->so_rcv.sb_wat, tcp_recvspace)) {
572 0 : error = soreserve(so, tcp_sendspace, tcp_recvspace);
573 0 : if (error)
574 0 : return (error);
575 : }
576 :
577 0 : NET_ASSERT_LOCKED();
578 0 : error = in_pcballoc(so, &tcbtable);
579 0 : if (error)
580 0 : return (error);
581 0 : inp = sotoinpcb(so);
582 0 : tp = tcp_newtcpcb(inp);
583 0 : if (tp == NULL) {
584 0 : unsigned int nofd = so->so_state & SS_NOFDREF; /* XXX */
585 :
586 0 : so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
587 0 : in_pcbdetach(inp);
588 0 : so->so_state |= nofd;
589 : return (ENOBUFS);
590 : }
591 0 : tp->t_state = TCPS_CLOSED;
592 : #ifdef INET6
593 : /* we disallow IPv4 mapped address completely. */
594 0 : if (inp->inp_flags & INP_IPV6)
595 0 : tp->pf = PF_INET6;
596 : else
597 0 : tp->pf = PF_INET;
598 : #else
599 : tp->pf = PF_INET;
600 : #endif
601 0 : if ((so->so_options & SO_LINGER) && so->so_linger == 0)
602 0 : so->so_linger = TCP_LINGERTIME;
603 :
604 0 : if (so->so_options & SO_DEBUG)
605 0 : tcp_trace(TA_USER, TCPS_CLOSED, tp, tp, NULL, PRU_ATTACH, 0);
606 0 : return (0);
607 0 : }
608 :
609 : int
610 0 : tcp_detach(struct socket *so)
611 : {
612 : struct inpcb *inp;
613 : struct tcpcb *otp = NULL, *tp = NULL;
614 : int error = 0;
615 : short ostate;
616 :
617 0 : soassertlocked(so);
618 :
619 0 : inp = sotoinpcb(so);
620 : /*
621 : * When a TCP is attached to a socket, then there will be
622 : * a (struct inpcb) pointed at by the socket, and this
623 : * structure will point at a subsidiary (struct tcpcb).
624 : */
625 0 : if (inp == NULL) {
626 0 : error = so->so_error;
627 0 : if (error == 0)
628 : error = EINVAL;
629 0 : return (error);
630 : }
631 0 : tp = intotcpcb(inp);
632 : /* tp might get 0 when using socket splicing */
633 0 : if (tp == NULL)
634 0 : return (0);
635 0 : if (so->so_options & SO_DEBUG) {
636 : otp = tp;
637 0 : ostate = tp->t_state;
638 0 : }
639 :
640 : /*
641 : * Detach the TCP protocol from the socket.
642 : * If the protocol state is non-embryonic, then can't
643 : * do this directly: have to initiate a PRU_DISCONNECT,
644 : * which may finish later; embryonic TCB's can just
645 : * be discarded here.
646 : */
647 0 : tp = tcp_disconnect(tp);
648 :
649 0 : if (otp)
650 0 : tcp_trace(TA_USER, ostate, tp, otp, NULL, PRU_DETACH, 0);
651 0 : return (error);
652 0 : }
653 :
654 : /*
655 : * Initiate (or continue) disconnect.
656 : * If embryonic state, just send reset (once).
657 : * If in ``let data drain'' option and linger null, just drop.
658 : * Otherwise (hard), mark socket disconnecting and drop
659 : * current input data; switch states based on user close, and
660 : * send segment to peer (with FIN).
661 : */
662 : struct tcpcb *
663 0 : tcp_disconnect(struct tcpcb *tp)
664 : {
665 0 : struct socket *so = tp->t_inpcb->inp_socket;
666 :
667 0 : if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
668 0 : tp = tcp_close(tp);
669 0 : else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
670 0 : tp = tcp_drop(tp, 0);
671 : else {
672 0 : soisdisconnecting(so);
673 0 : sbflush(so, &so->so_rcv);
674 0 : tp = tcp_usrclosed(tp);
675 0 : if (tp)
676 0 : (void) tcp_output(tp);
677 : }
678 0 : return (tp);
679 : }
680 :
681 : /*
682 : * User issued close, and wish to trail through shutdown states:
683 : * if never received SYN, just forget it. If got a SYN from peer,
684 : * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
685 : * If already got a FIN from peer, then almost done; go to LAST_ACK
686 : * state. In all other cases, have already sent FIN to peer (e.g.
687 : * after PRU_SHUTDOWN), and just have to play tedious game waiting
688 : * for peer to send FIN or not respond to keep-alives, etc.
689 : * We can let the user exit from the close as soon as the FIN is acked.
690 : */
691 : struct tcpcb *
692 0 : tcp_usrclosed(struct tcpcb *tp)
693 : {
694 :
695 0 : switch (tp->t_state) {
696 :
697 : case TCPS_CLOSED:
698 : case TCPS_LISTEN:
699 : case TCPS_SYN_SENT:
700 0 : tp->t_state = TCPS_CLOSED;
701 0 : tp = tcp_close(tp);
702 0 : break;
703 :
704 : case TCPS_SYN_RECEIVED:
705 : case TCPS_ESTABLISHED:
706 0 : tp->t_state = TCPS_FIN_WAIT_1;
707 0 : break;
708 :
709 : case TCPS_CLOSE_WAIT:
710 0 : tp->t_state = TCPS_LAST_ACK;
711 0 : break;
712 : }
713 0 : if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
714 0 : soisdisconnected(tp->t_inpcb->inp_socket);
715 : /*
716 : * If we are in FIN_WAIT_2, we arrived here because the
717 : * application did a shutdown of the send side. Like the
718 : * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after
719 : * a full close, we start a timer to make sure sockets are
720 : * not left in FIN_WAIT_2 forever.
721 : */
722 0 : if (tp->t_state == TCPS_FIN_WAIT_2)
723 0 : TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle);
724 : }
725 0 : return (tp);
726 : }
727 :
728 : /*
729 : * Look up a socket for ident or tcpdrop, ...
730 : */
731 : int
732 0 : tcp_ident(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int dodrop)
733 : {
734 : int error = 0;
735 0 : struct tcp_ident_mapping tir;
736 : struct inpcb *inp;
737 : struct tcpcb *tp = NULL;
738 : struct sockaddr_in *fin, *lin;
739 : #ifdef INET6
740 : struct sockaddr_in6 *fin6, *lin6;
741 0 : struct in6_addr f6, l6;
742 : #endif
743 :
744 0 : NET_ASSERT_LOCKED();
745 :
746 0 : if (dodrop) {
747 0 : if (oldp != NULL || *oldlenp != 0)
748 0 : return (EINVAL);
749 0 : if (newp == NULL)
750 0 : return (EPERM);
751 0 : if (newlen < sizeof(tir))
752 0 : return (ENOMEM);
753 0 : if ((error = copyin(newp, &tir, sizeof (tir))) != 0 )
754 0 : return (error);
755 : } else {
756 0 : if (oldp == NULL)
757 0 : return (EINVAL);
758 0 : if (*oldlenp < sizeof(tir))
759 0 : return (ENOMEM);
760 0 : if (newp != NULL || newlen != 0)
761 0 : return (EINVAL);
762 0 : if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 )
763 0 : return (error);
764 : }
765 0 : switch (tir.faddr.ss_family) {
766 : #ifdef INET6
767 : case AF_INET6:
768 0 : fin6 = (struct sockaddr_in6 *)&tir.faddr;
769 0 : error = in6_embedscope(&f6, fin6, NULL);
770 0 : if (error)
771 0 : return EINVAL; /*?*/
772 0 : lin6 = (struct sockaddr_in6 *)&tir.laddr;
773 0 : error = in6_embedscope(&l6, lin6, NULL);
774 0 : if (error)
775 0 : return EINVAL; /*?*/
776 : break;
777 : #endif
778 : case AF_INET:
779 0 : fin = (struct sockaddr_in *)&tir.faddr;
780 0 : lin = (struct sockaddr_in *)&tir.laddr;
781 0 : break;
782 : default:
783 0 : return (EINVAL);
784 : }
785 :
786 0 : switch (tir.faddr.ss_family) {
787 : #ifdef INET6
788 : case AF_INET6:
789 0 : inp = in6_pcbhashlookup(&tcbtable, &f6,
790 0 : fin6->sin6_port, &l6, lin6->sin6_port, tir.rdomain);
791 0 : break;
792 : #endif
793 : case AF_INET:
794 0 : inp = in_pcbhashlookup(&tcbtable, fin->sin_addr,
795 0 : fin->sin_port, lin->sin_addr, lin->sin_port, tir.rdomain);
796 0 : break;
797 : default:
798 0 : unhandled_af(tir.faddr.ss_family);
799 : }
800 :
801 0 : if (dodrop) {
802 0 : if (inp && (tp = intotcpcb(inp)) &&
803 0 : ((inp->inp_socket->so_options & SO_ACCEPTCONN) == 0))
804 0 : tp = tcp_drop(tp, ECONNABORTED);
805 : else
806 : error = ESRCH;
807 0 : return (error);
808 : }
809 :
810 0 : if (inp == NULL) {
811 0 : tcpstat_inc(tcps_pcbhashmiss);
812 0 : switch (tir.faddr.ss_family) {
813 : #ifdef INET6
814 : case AF_INET6:
815 0 : inp = in6_pcblookup_listen(&tcbtable,
816 0 : &l6, lin6->sin6_port, NULL, tir.rdomain);
817 0 : break;
818 : #endif
819 : case AF_INET:
820 0 : inp = in_pcblookup_listen(&tcbtable,
821 0 : lin->sin_addr, lin->sin_port, NULL, tir.rdomain);
822 0 : break;
823 : }
824 : }
825 :
826 0 : if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) {
827 0 : tir.ruid = inp->inp_socket->so_ruid;
828 0 : tir.euid = inp->inp_socket->so_euid;
829 0 : } else {
830 0 : tir.ruid = -1;
831 0 : tir.euid = -1;
832 : }
833 :
834 0 : *oldlenp = sizeof (tir);
835 0 : error = copyout((void *)&tir, oldp, sizeof (tir));
836 0 : return (error);
837 0 : }
838 :
839 : int
840 0 : tcp_sysctl_tcpstat(void *oldp, size_t *oldlenp, void *newp)
841 : {
842 0 : uint64_t counters[tcps_ncounters];
843 0 : struct tcpstat tcpstat;
844 : struct syn_cache_set *set;
845 : int i = 0;
846 :
847 : #define ASSIGN(field) do { tcpstat.field = counters[i++]; } while (0)
848 :
849 0 : memset(&tcpstat, 0, sizeof tcpstat);
850 0 : counters_read(tcpcounters, counters, nitems(counters));
851 0 : ASSIGN(tcps_connattempt);
852 0 : ASSIGN(tcps_accepts);
853 0 : ASSIGN(tcps_connects);
854 0 : ASSIGN(tcps_drops);
855 0 : ASSIGN(tcps_conndrops);
856 0 : ASSIGN(tcps_closed);
857 0 : ASSIGN(tcps_segstimed);
858 0 : ASSIGN(tcps_rttupdated);
859 0 : ASSIGN(tcps_delack);
860 0 : ASSIGN(tcps_timeoutdrop);
861 0 : ASSIGN(tcps_rexmttimeo);
862 0 : ASSIGN(tcps_persisttimeo);
863 0 : ASSIGN(tcps_persistdrop);
864 0 : ASSIGN(tcps_keeptimeo);
865 0 : ASSIGN(tcps_keepprobe);
866 0 : ASSIGN(tcps_keepdrops);
867 0 : ASSIGN(tcps_sndtotal);
868 0 : ASSIGN(tcps_sndpack);
869 0 : ASSIGN(tcps_sndbyte);
870 0 : ASSIGN(tcps_sndrexmitpack);
871 0 : ASSIGN(tcps_sndrexmitbyte);
872 0 : ASSIGN(tcps_sndrexmitfast);
873 0 : ASSIGN(tcps_sndacks);
874 0 : ASSIGN(tcps_sndprobe);
875 0 : ASSIGN(tcps_sndurg);
876 0 : ASSIGN(tcps_sndwinup);
877 0 : ASSIGN(tcps_sndctrl);
878 0 : ASSIGN(tcps_rcvtotal);
879 0 : ASSIGN(tcps_rcvpack);
880 0 : ASSIGN(tcps_rcvbyte);
881 0 : ASSIGN(tcps_rcvbadsum);
882 0 : ASSIGN(tcps_rcvbadoff);
883 0 : ASSIGN(tcps_rcvmemdrop);
884 0 : ASSIGN(tcps_rcvnosec);
885 0 : ASSIGN(tcps_rcvshort);
886 0 : ASSIGN(tcps_rcvduppack);
887 0 : ASSIGN(tcps_rcvdupbyte);
888 0 : ASSIGN(tcps_rcvpartduppack);
889 0 : ASSIGN(tcps_rcvpartdupbyte);
890 0 : ASSIGN(tcps_rcvoopack);
891 0 : ASSIGN(tcps_rcvoobyte);
892 0 : ASSIGN(tcps_rcvpackafterwin);
893 0 : ASSIGN(tcps_rcvbyteafterwin);
894 0 : ASSIGN(tcps_rcvafterclose);
895 0 : ASSIGN(tcps_rcvwinprobe);
896 0 : ASSIGN(tcps_rcvdupack);
897 0 : ASSIGN(tcps_rcvacktoomuch);
898 0 : ASSIGN(tcps_rcvacktooold);
899 0 : ASSIGN(tcps_rcvackpack);
900 0 : ASSIGN(tcps_rcvackbyte);
901 0 : ASSIGN(tcps_rcvwinupd);
902 0 : ASSIGN(tcps_pawsdrop);
903 0 : ASSIGN(tcps_predack);
904 0 : ASSIGN(tcps_preddat);
905 0 : ASSIGN(tcps_pcbhashmiss);
906 0 : ASSIGN(tcps_noport);
907 0 : ASSIGN(tcps_badsyn);
908 0 : ASSIGN(tcps_dropsyn);
909 0 : ASSIGN(tcps_rcvbadsig);
910 0 : ASSIGN(tcps_rcvgoodsig);
911 0 : ASSIGN(tcps_inswcsum);
912 0 : ASSIGN(tcps_outswcsum);
913 0 : ASSIGN(tcps_ecn_accepts);
914 0 : ASSIGN(tcps_ecn_rcvece);
915 0 : ASSIGN(tcps_ecn_rcvcwr);
916 0 : ASSIGN(tcps_ecn_rcvce);
917 0 : ASSIGN(tcps_ecn_sndect);
918 0 : ASSIGN(tcps_ecn_sndece);
919 0 : ASSIGN(tcps_ecn_sndcwr);
920 0 : ASSIGN(tcps_cwr_ecn);
921 0 : ASSIGN(tcps_cwr_frecovery);
922 0 : ASSIGN(tcps_cwr_timeout);
923 0 : ASSIGN(tcps_sc_added);
924 0 : ASSIGN(tcps_sc_completed);
925 0 : ASSIGN(tcps_sc_timed_out);
926 0 : ASSIGN(tcps_sc_overflowed);
927 0 : ASSIGN(tcps_sc_reset);
928 0 : ASSIGN(tcps_sc_unreach);
929 0 : ASSIGN(tcps_sc_bucketoverflow);
930 0 : ASSIGN(tcps_sc_aborted);
931 0 : ASSIGN(tcps_sc_dupesyn);
932 0 : ASSIGN(tcps_sc_dropped);
933 0 : ASSIGN(tcps_sc_collisions);
934 0 : ASSIGN(tcps_sc_retransmitted);
935 0 : ASSIGN(tcps_sc_seedrandom);
936 0 : ASSIGN(tcps_sc_hash_size);
937 0 : ASSIGN(tcps_sc_entry_count);
938 0 : ASSIGN(tcps_sc_entry_limit);
939 0 : ASSIGN(tcps_sc_bucket_maxlen);
940 0 : ASSIGN(tcps_sc_bucket_limit);
941 0 : ASSIGN(tcps_sc_uses_left);
942 0 : ASSIGN(tcps_conndrained);
943 0 : ASSIGN(tcps_sack_recovery_episode);
944 0 : ASSIGN(tcps_sack_rexmits);
945 0 : ASSIGN(tcps_sack_rexmit_bytes);
946 0 : ASSIGN(tcps_sack_rcv_opts);
947 0 : ASSIGN(tcps_sack_snd_opts);
948 :
949 : #undef ASSIGN
950 :
951 0 : set = &tcp_syn_cache[tcp_syn_cache_active];
952 0 : tcpstat.tcps_sc_hash_size = set->scs_size;
953 0 : tcpstat.tcps_sc_entry_count = set->scs_count;
954 0 : tcpstat.tcps_sc_entry_limit = tcp_syn_cache_limit;
955 0 : tcpstat.tcps_sc_bucket_maxlen = 0;
956 0 : for (i = 0; i < set->scs_size; i++) {
957 0 : if (tcpstat.tcps_sc_bucket_maxlen <
958 0 : set->scs_buckethead[i].sch_length)
959 0 : tcpstat.tcps_sc_bucket_maxlen =
960 : set->scs_buckethead[i].sch_length;
961 : }
962 0 : tcpstat.tcps_sc_bucket_limit = tcp_syn_bucket_limit;
963 0 : tcpstat.tcps_sc_uses_left = set->scs_use;
964 :
965 0 : return (sysctl_rdstruct(oldp, oldlenp, newp,
966 : &tcpstat, sizeof(tcpstat)));
967 0 : }
968 :
969 : /*
970 : * Sysctl for tcp variables.
971 : */
972 : int
973 0 : tcp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
974 : size_t newlen)
975 : {
976 0 : int error, nval;
977 :
978 : /* All sysctl names at this level are terminal. */
979 0 : if (namelen != 1)
980 0 : return (ENOTDIR);
981 :
982 0 : switch (name[0]) {
983 : case TCPCTL_SACK:
984 0 : NET_LOCK();
985 0 : error = sysctl_int(oldp, oldlenp, newp, newlen,
986 : &tcp_do_sack);
987 0 : NET_UNLOCK();
988 0 : return (error);
989 :
990 : case TCPCTL_SLOWHZ:
991 0 : return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ));
992 :
993 : case TCPCTL_BADDYNAMIC:
994 0 : NET_LOCK();
995 0 : error = sysctl_struct(oldp, oldlenp, newp, newlen,
996 : baddynamicports.tcp, sizeof(baddynamicports.tcp));
997 0 : NET_UNLOCK();
998 0 : return (error);
999 :
1000 : case TCPCTL_ROOTONLY:
1001 0 : if (newp && securelevel > 0)
1002 0 : return (EPERM);
1003 0 : NET_LOCK();
1004 0 : error = sysctl_struct(oldp, oldlenp, newp, newlen,
1005 : rootonlyports.tcp, sizeof(rootonlyports.tcp));
1006 0 : NET_UNLOCK();
1007 0 : return (error);
1008 :
1009 : case TCPCTL_IDENT:
1010 0 : NET_LOCK();
1011 0 : error = tcp_ident(oldp, oldlenp, newp, newlen, 0);
1012 0 : NET_UNLOCK();
1013 0 : return (error);
1014 :
1015 : case TCPCTL_DROP:
1016 0 : NET_LOCK();
1017 0 : error = tcp_ident(oldp, oldlenp, newp, newlen, 1);
1018 0 : NET_UNLOCK();
1019 0 : return (error);
1020 :
1021 : case TCPCTL_ALWAYS_KEEPALIVE:
1022 0 : NET_LOCK();
1023 0 : error = sysctl_int(oldp, oldlenp, newp, newlen,
1024 : &tcp_always_keepalive);
1025 0 : NET_UNLOCK();
1026 0 : return (error);
1027 :
1028 : #ifdef TCP_ECN
1029 : case TCPCTL_ECN:
1030 0 : NET_LOCK();
1031 0 : error = sysctl_int(oldp, oldlenp, newp, newlen,
1032 : &tcp_do_ecn);
1033 0 : NET_UNLOCK();
1034 0 : return (error);
1035 : #endif
1036 : case TCPCTL_REASS_LIMIT:
1037 0 : NET_LOCK();
1038 0 : nval = tcp_reass_limit;
1039 0 : error = sysctl_int(oldp, oldlenp, newp, newlen, &nval);
1040 0 : if (!error && nval != tcp_reass_limit) {
1041 0 : error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0);
1042 0 : if (!error)
1043 0 : tcp_reass_limit = nval;
1044 : }
1045 0 : NET_UNLOCK();
1046 0 : return (error);
1047 :
1048 : case TCPCTL_SACKHOLE_LIMIT:
1049 0 : NET_LOCK();
1050 0 : nval = tcp_sackhole_limit;
1051 0 : error = sysctl_int(oldp, oldlenp, newp, newlen, &nval);
1052 0 : if (!error && nval != tcp_sackhole_limit) {
1053 0 : error = pool_sethardlimit(&sackhl_pool, nval, NULL, 0);
1054 0 : if (!error)
1055 0 : tcp_sackhole_limit = nval;
1056 : }
1057 0 : NET_UNLOCK();
1058 0 : return (error);
1059 :
1060 : case TCPCTL_STATS:
1061 0 : return (tcp_sysctl_tcpstat(oldp, oldlenp, newp));
1062 :
1063 : case TCPCTL_SYN_USE_LIMIT:
1064 0 : NET_LOCK();
1065 0 : error = sysctl_int(oldp, oldlenp, newp, newlen,
1066 : &tcp_syn_use_limit);
1067 0 : if (!error && newp != NULL) {
1068 : /*
1069 : * Global tcp_syn_use_limit is used when reseeding a
1070 : * new cache. Also update the value in active cache.
1071 : */
1072 0 : if (tcp_syn_cache[0].scs_use > tcp_syn_use_limit)
1073 0 : tcp_syn_cache[0].scs_use = tcp_syn_use_limit;
1074 0 : if (tcp_syn_cache[1].scs_use > tcp_syn_use_limit)
1075 0 : tcp_syn_cache[1].scs_use = tcp_syn_use_limit;
1076 : }
1077 0 : NET_UNLOCK();
1078 0 : return (error);
1079 :
1080 : case TCPCTL_SYN_HASH_SIZE:
1081 0 : NET_LOCK();
1082 0 : nval = tcp_syn_hash_size;
1083 0 : error = sysctl_int(oldp, oldlenp, newp, newlen, &nval);
1084 0 : if (!error && nval != tcp_syn_hash_size) {
1085 0 : if (nval < 1 || nval > 100000) {
1086 : error = EINVAL;
1087 0 : } else {
1088 : /*
1089 : * If global hash size has been changed,
1090 : * switch sets as soon as possible. Then
1091 : * the actual hash array will be reallocated.
1092 : */
1093 0 : if (tcp_syn_cache[0].scs_size != nval)
1094 0 : tcp_syn_cache[0].scs_use = 0;
1095 0 : if (tcp_syn_cache[1].scs_size != nval)
1096 0 : tcp_syn_cache[1].scs_use = 0;
1097 0 : tcp_syn_hash_size = nval;
1098 : }
1099 : }
1100 0 : NET_UNLOCK();
1101 0 : return (error);
1102 :
1103 : default:
1104 0 : if (name[0] < TCPCTL_MAXID) {
1105 0 : NET_LOCK();
1106 0 : error = sysctl_int_arr(tcpctl_vars, name, namelen,
1107 : oldp, oldlenp, newp, newlen);
1108 0 : NET_UNLOCK();
1109 0 : return (error);
1110 : }
1111 0 : return (ENOPROTOOPT);
1112 : }
1113 : /* NOTREACHED */
1114 0 : }
1115 :
1116 : /*
1117 : * Scale the send buffer so that inflight data is not accounted against
1118 : * the limit. The buffer will scale with the congestion window, if the
1119 : * the receiver stops acking data the window will shrink and therefor
1120 : * the buffer size will shrink as well.
1121 : * In low memory situation try to shrink the buffer to the initial size
1122 : * disabling the send buffer scaling as long as the situation persists.
1123 : */
1124 : void
1125 0 : tcp_update_sndspace(struct tcpcb *tp)
1126 : {
1127 0 : struct socket *so = tp->t_inpcb->inp_socket;
1128 0 : u_long nmax = so->so_snd.sb_hiwat;
1129 :
1130 0 : if (sbchecklowmem()) {
1131 : /* low on memory try to get rid of some */
1132 0 : if (tcp_sendspace < nmax)
1133 0 : nmax = tcp_sendspace;
1134 0 : } else if (so->so_snd.sb_wat != tcp_sendspace)
1135 : /* user requested buffer size, auto-scaling disabled */
1136 0 : nmax = so->so_snd.sb_wat;
1137 : else
1138 : /* automatic buffer scaling */
1139 0 : nmax = MIN(sb_max, so->so_snd.sb_wat + tp->snd_max -
1140 : tp->snd_una);
1141 :
1142 : /* a writable socket must be preserved because of poll(2) semantics */
1143 0 : if (sbspace(so, &so->so_snd) >= so->so_snd.sb_lowat) {
1144 0 : if (nmax < so->so_snd.sb_cc + so->so_snd.sb_lowat)
1145 0 : nmax = so->so_snd.sb_cc + so->so_snd.sb_lowat;
1146 0 : if (nmax * 2 < so->so_snd.sb_mbcnt + so->so_snd.sb_lowat)
1147 0 : nmax = (so->so_snd.sb_mbcnt+so->so_snd.sb_lowat+1) / 2;
1148 : }
1149 :
1150 : /* round to MSS boundary */
1151 0 : nmax = roundup(nmax, tp->t_maxseg);
1152 :
1153 0 : if (nmax != so->so_snd.sb_hiwat)
1154 0 : sbreserve(so, &so->so_snd, nmax);
1155 0 : }
1156 :
1157 : /*
1158 : * Scale the recv buffer by looking at how much data was transferred in
1159 : * on approximated RTT. If more than a big part of the recv buffer was
1160 : * transferred during that time we increase the buffer by a constant.
1161 : * In low memory situation try to shrink the buffer to the initial size.
1162 : */
1163 : void
1164 0 : tcp_update_rcvspace(struct tcpcb *tp)
1165 : {
1166 0 : struct socket *so = tp->t_inpcb->inp_socket;
1167 0 : u_long nmax = so->so_rcv.sb_hiwat;
1168 :
1169 0 : if (sbchecklowmem()) {
1170 : /* low on memory try to get rid of some */
1171 0 : if (tcp_recvspace < nmax)
1172 0 : nmax = tcp_recvspace;
1173 0 : } else if (so->so_rcv.sb_wat != tcp_recvspace)
1174 : /* user requested buffer size, auto-scaling disabled */
1175 0 : nmax = so->so_rcv.sb_wat;
1176 : else {
1177 : /* automatic buffer scaling */
1178 0 : if (tp->rfbuf_cnt > so->so_rcv.sb_hiwat / 8 * 7)
1179 0 : nmax = MIN(sb_max, so->so_rcv.sb_hiwat +
1180 : tcp_autorcvbuf_inc);
1181 : }
1182 :
1183 : /* a readable socket must be preserved because of poll(2) semantics */
1184 0 : if (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat &&
1185 0 : nmax < so->so_snd.sb_lowat)
1186 0 : nmax = so->so_snd.sb_lowat;
1187 :
1188 0 : if (nmax == so->so_rcv.sb_hiwat)
1189 0 : return;
1190 :
1191 : /* round to MSS boundary */
1192 0 : nmax = roundup(nmax, tp->t_maxseg);
1193 0 : sbreserve(so, &so->so_rcv, nmax);
1194 0 : }
|