Line data Source code
1 : /* $OpenBSD: nfs_socket.c,v 1.131 2018/09/10 16:14:08 bluhm Exp $ */
2 : /* $NetBSD: nfs_socket.c,v 1.27 1996/04/15 20:20:00 thorpej Exp $ */
3 :
4 : /*
5 : * Copyright (c) 1989, 1991, 1993, 1995
6 : * The Regents of the University of California. All rights reserved.
7 : *
8 : * This code is derived from software contributed to Berkeley by
9 : * Rick Macklem at The University of Guelph.
10 : *
11 : * Redistribution and use in source and binary forms, with or without
12 : * modification, are permitted provided that the following conditions
13 : * are met:
14 : * 1. Redistributions of source code must retain the above copyright
15 : * notice, this list of conditions and the following disclaimer.
16 : * 2. Redistributions in binary form must reproduce the above copyright
17 : * notice, this list of conditions and the following disclaimer in the
18 : * documentation and/or other materials provided with the distribution.
19 : * 3. Neither the name of the University nor the names of its contributors
20 : * may be used to endorse or promote products derived from this software
21 : * without specific prior written permission.
22 : *
23 : * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 : * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 : * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 : * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 : * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 : * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 : * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 : * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 : * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 : * SUCH DAMAGE.
34 : *
35 : * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
36 : */
37 :
38 : /*
39 : * Socket operations for use by nfs
40 : */
41 :
42 : #include <sys/param.h>
43 : #include <sys/systm.h>
44 : #include <sys/proc.h>
45 : #include <sys/mount.h>
46 : #include <sys/kernel.h>
47 : #include <sys/mbuf.h>
48 : #include <sys/vnode.h>
49 : #include <sys/domain.h>
50 : #include <sys/protosw.h>
51 : #include <sys/signalvar.h>
52 : #include <sys/socket.h>
53 : #include <sys/socketvar.h>
54 : #include <sys/syslog.h>
55 : #include <sys/tprintf.h>
56 : #include <sys/namei.h>
57 : #include <sys/pool.h>
58 : #include <sys/queue.h>
59 :
60 : #include <netinet/in.h>
61 : #include <netinet/tcp.h>
62 :
63 : #include <nfs/rpcv2.h>
64 : #include <nfs/nfsproto.h>
65 : #include <nfs/nfs.h>
66 : #include <nfs/xdr_subs.h>
67 : #include <nfs/nfsm_subs.h>
68 : #include <nfs/nfsmount.h>
69 : #include <nfs/nfs_var.h>
70 :
71 : /* External data, mostly RPC constants in XDR form. */
72 : extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers,
73 : rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr;
74 : extern u_int32_t nfs_prog;
75 : extern struct nfsstats nfsstats;
76 : extern int nfsv3_procid[NFS_NPROCS];
77 : extern int nfs_ticks;
78 :
79 : extern struct pool nfsrv_descript_pl;
80 :
81 : /*
82 : * There is a congestion window for outstanding rpcs maintained per mount
83 : * point. The cwnd size is adjusted in roughly the way that:
84 : * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
85 : * SIGCOMM '88". ACM, August 1988.
86 : * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
87 : * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
88 : * of rpcs is in progress.
89 : * (The sent count and cwnd are scaled for integer arith.)
90 : * Variants of "slow start" were tried and were found to be too much of a
91 : * performance hit (ave. rtt 3 times larger),
92 : * I suspect due to the large rtt that nfs rpcs have.
93 : */
94 : #define NFS_CWNDSCALE 256
95 : #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
96 : int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256 };
97 :
98 : /* RTT estimator */
99 : enum nfs_rto_timers nfs_ptimers[NFS_NPROCS] = {
100 : NFS_DEFAULT_TIMER, /* NULL */
101 : NFS_GETATTR_TIMER, /* GETATTR */
102 : NFS_DEFAULT_TIMER, /* SETATTR */
103 : NFS_LOOKUP_TIMER, /* LOOKUP */
104 : NFS_GETATTR_TIMER, /* ACCESS */
105 : NFS_READ_TIMER, /* READLINK */
106 : NFS_READ_TIMER, /* READ */
107 : NFS_WRITE_TIMER, /* WRITE */
108 : NFS_DEFAULT_TIMER, /* CREATE */
109 : NFS_DEFAULT_TIMER, /* MKDIR */
110 : NFS_DEFAULT_TIMER, /* SYMLINK */
111 : NFS_DEFAULT_TIMER, /* MKNOD */
112 : NFS_DEFAULT_TIMER, /* REMOVE */
113 : NFS_DEFAULT_TIMER, /* RMDIR */
114 : NFS_DEFAULT_TIMER, /* RENAME */
115 : NFS_DEFAULT_TIMER, /* LINK */
116 : NFS_READ_TIMER, /* READDIR */
117 : NFS_READ_TIMER, /* READDIRPLUS */
118 : NFS_DEFAULT_TIMER, /* FSSTAT */
119 : NFS_DEFAULT_TIMER, /* FSINFO */
120 : NFS_DEFAULT_TIMER, /* PATHCONF */
121 : NFS_DEFAULT_TIMER, /* COMMIT */
122 : NFS_DEFAULT_TIMER, /* NOOP */
123 : };
124 :
125 : void nfs_init_rtt(struct nfsmount *);
126 : void nfs_update_rtt(struct nfsreq *);
127 : int nfs_estimate_rto(struct nfsmount *, u_int32_t procnum);
128 :
129 : void nfs_realign(struct mbuf **, int);
130 : void nfs_realign_fixup(struct mbuf *, struct mbuf *, unsigned int *);
131 :
132 : int nfs_rcvlock(struct nfsreq *);
133 : int nfs_receive(struct nfsreq *, struct mbuf **, struct mbuf **);
134 : int nfs_reconnect(struct nfsreq *);
135 : int nfs_reply(struct nfsreq *);
136 : void nfs_msg(struct nfsreq *, char *);
137 : void nfs_rcvunlock(int *);
138 :
139 : int nfsrv_getstream(struct nfssvc_sock *, int);
140 :
141 : unsigned int nfs_realign_test = 0;
142 : unsigned int nfs_realign_count = 0;
143 :
144 : /* Initialize the RTT estimator state for a new mount point. */
145 : void
146 0 : nfs_init_rtt(struct nfsmount *nmp)
147 : {
148 : int i;
149 :
150 0 : for (i = 0; i < NFS_MAX_TIMER; i++)
151 0 : nmp->nm_srtt[i] = NFS_INITRTT;
152 0 : for (i = 0; i < NFS_MAX_TIMER; i++)
153 0 : nmp->nm_sdrtt[i] = 0;
154 0 : }
155 :
156 : /*
157 : * Update a mount point's RTT estimator state using data from the
158 : * passed-in request.
159 : *
160 : * Use a gain of 0.125 on the mean and a gain of 0.25 on the deviation.
161 : *
162 : * NB: Since the timer resolution of NFS_HZ is so course, it can often
163 : * result in r_rtt == 0. Since r_rtt == N means that the actual RTT is
164 : * between N + dt and N + 2 - dt ticks, add 1 before calculating the
165 : * update values.
166 : */
167 : void
168 0 : nfs_update_rtt(struct nfsreq *rep)
169 : {
170 0 : int t1 = rep->r_rtt + 1;
171 0 : int index = nfs_ptimers[rep->r_procnum] - 1;
172 0 : int *srtt = &rep->r_nmp->nm_srtt[index];
173 0 : int *sdrtt = &rep->r_nmp->nm_sdrtt[index];
174 :
175 0 : t1 -= *srtt >> 3;
176 0 : *srtt += t1;
177 0 : if (t1 < 0)
178 0 : t1 = -t1;
179 0 : t1 -= *sdrtt >> 2;
180 0 : *sdrtt += t1;
181 0 : }
182 :
183 : /*
184 : * Estimate RTO for an NFS RPC sent via an unreliable datagram.
185 : *
186 : * Use the mean and mean deviation of RTT for the appropriate type
187 : * of RPC for the frequent RPCs and a default for the others.
188 : * The justification for doing "other" this way is that these RPCs
189 : * happen so infrequently that timer est. would probably be stale.
190 : * Also, since many of these RPCs are non-idempotent, a conservative
191 : * timeout is desired.
192 : *
193 : * getattr, lookup - A+2D
194 : * read, write - A+4D
195 : * other - nm_timeo
196 : */
197 : int
198 0 : nfs_estimate_rto(struct nfsmount *nmp, u_int32_t procnum)
199 : {
200 0 : enum nfs_rto_timers timer = nfs_ptimers[procnum];
201 0 : int index = timer - 1;
202 : int rto;
203 :
204 0 : switch (timer) {
205 : case NFS_GETATTR_TIMER:
206 : case NFS_LOOKUP_TIMER:
207 0 : rto = ((nmp->nm_srtt[index] + 3) >> 2) +
208 0 : ((nmp->nm_sdrtt[index] + 1) >> 1);
209 0 : break;
210 : case NFS_READ_TIMER:
211 : case NFS_WRITE_TIMER:
212 0 : rto = ((nmp->nm_srtt[index] + 7) >> 3) +
213 0 : (nmp->nm_sdrtt[index] + 1);
214 0 : break;
215 : default:
216 0 : rto = nmp->nm_timeo;
217 0 : return (rto);
218 : }
219 :
220 0 : if (rto < NFS_MINRTO)
221 0 : rto = NFS_MINRTO;
222 0 : else if (rto > NFS_MAXRTO)
223 0 : rto = NFS_MAXRTO;
224 :
225 0 : return (rto);
226 0 : }
227 :
228 :
229 :
230 : /*
231 : * Initialize sockets and congestion for a new NFS connection.
232 : * We do not free the sockaddr if error.
233 : */
234 : int
235 0 : nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
236 : {
237 : struct socket *so;
238 : int s, error, rcvreserve, sndreserve;
239 : struct sockaddr *saddr;
240 : struct sockaddr_in *sin;
241 : struct mbuf *nam = NULL, *mopt = NULL;
242 :
243 0 : if (!(nmp->nm_sotype == SOCK_DGRAM || nmp->nm_sotype == SOCK_STREAM))
244 0 : return (EINVAL);
245 :
246 0 : nmp->nm_so = NULL;
247 0 : saddr = mtod(nmp->nm_nam, struct sockaddr *);
248 0 : error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype,
249 0 : nmp->nm_soproto);
250 0 : if (error) {
251 0 : nfs_disconnect(nmp);
252 0 : return (error);
253 : }
254 :
255 : /* Allocate mbufs possibly waiting before grabbing the socket lock. */
256 0 : if (nmp->nm_sotype == SOCK_STREAM || saddr->sa_family == AF_INET)
257 0 : MGET(mopt, M_WAIT, MT_SOOPTS);
258 0 : if (saddr->sa_family == AF_INET)
259 0 : MGET(nam, M_WAIT, MT_SONAME);
260 :
261 0 : so = nmp->nm_so;
262 0 : s = solock(so);
263 0 : nmp->nm_soflags = so->so_proto->pr_flags;
264 :
265 : /*
266 : * Some servers require that the client port be a reserved port number.
267 : * We always allocate a reserved port, as this prevents filehandle
268 : * disclosure through UDP port capture.
269 : */
270 0 : if (saddr->sa_family == AF_INET) {
271 : int *ip;
272 :
273 0 : mopt->m_len = sizeof(int);
274 0 : ip = mtod(mopt, int *);
275 0 : *ip = IP_PORTRANGE_LOW;
276 0 : error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
277 0 : if (error)
278 0 : goto bad;
279 :
280 0 : sin = mtod(nam, struct sockaddr_in *);
281 0 : memset(sin, 0, sizeof(*sin));
282 0 : sin->sin_len = nam->m_len = sizeof(struct sockaddr_in);
283 0 : sin->sin_family = AF_INET;
284 0 : sin->sin_addr.s_addr = INADDR_ANY;
285 0 : sin->sin_port = htons(0);
286 0 : error = sobind(so, nam, &proc0);
287 0 : if (error)
288 0 : goto bad;
289 :
290 0 : mopt->m_len = sizeof(int);
291 0 : ip = mtod(mopt, int *);
292 0 : *ip = IP_PORTRANGE_DEFAULT;
293 0 : error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
294 0 : if (error)
295 0 : goto bad;
296 0 : }
297 :
298 : /*
299 : * Protocols that do not require connections may be optionally left
300 : * unconnected for servers that reply from a port other than NFS_PORT.
301 : */
302 0 : if (nmp->nm_flag & NFSMNT_NOCONN) {
303 0 : if (nmp->nm_soflags & PR_CONNREQUIRED) {
304 : error = ENOTCONN;
305 0 : goto bad;
306 : }
307 : } else {
308 0 : error = soconnect(so, nmp->nm_nam);
309 0 : if (error)
310 : goto bad;
311 :
312 : /*
313 : * Wait for the connection to complete. Cribbed from the
314 : * connect system call but with the wait timing out so
315 : * that interruptible mounts don't hang here for a long time.
316 : */
317 0 : while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
318 0 : sosleep(so, &so->so_timeo, PSOCK, "nfscon", 2 * hz);
319 0 : if ((so->so_state & SS_ISCONNECTING) &&
320 0 : so->so_error == 0 && rep &&
321 0 : (error = nfs_sigintr(nmp, rep, rep->r_procp)) != 0){
322 0 : so->so_state &= ~SS_ISCONNECTING;
323 0 : goto bad;
324 : }
325 : }
326 0 : if (so->so_error) {
327 : error = so->so_error;
328 0 : so->so_error = 0;
329 0 : goto bad;
330 : }
331 : }
332 : /*
333 : * Always set receive timeout to detect server crash and reconnect.
334 : * Otherwise, we can get stuck in soreceive forever.
335 : */
336 0 : so->so_rcv.sb_timeo = (5 * hz);
337 0 : if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT))
338 0 : so->so_snd.sb_timeo = (5 * hz);
339 : else
340 0 : so->so_snd.sb_timeo = 0;
341 0 : if (nmp->nm_sotype == SOCK_DGRAM) {
342 0 : sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR;
343 0 : rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
344 0 : NFS_MAXPKTHDR) * 2;
345 0 : } else if (nmp->nm_sotype == SOCK_STREAM) {
346 0 : if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
347 0 : *mtod(mopt, int32_t *) = 1;
348 0 : mopt->m_len = sizeof(int32_t);
349 0 : sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, mopt);
350 0 : }
351 0 : if (so->so_proto->pr_protocol == IPPROTO_TCP) {
352 0 : *mtod(mopt, int32_t *) = 1;
353 0 : mopt->m_len = sizeof(int32_t);
354 0 : sosetopt(so, IPPROTO_TCP, TCP_NODELAY, mopt);
355 0 : }
356 0 : sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR +
357 0 : sizeof (u_int32_t)) * 2;
358 0 : rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR +
359 0 : sizeof (u_int32_t)) * 2;
360 : } else {
361 0 : panic("%s: nm_sotype %d", __func__, nmp->nm_sotype);
362 : }
363 0 : error = soreserve(so, sndreserve, rcvreserve);
364 0 : if (error)
365 : goto bad;
366 0 : so->so_rcv.sb_flags |= SB_NOINTR;
367 0 : so->so_snd.sb_flags |= SB_NOINTR;
368 0 : sounlock(so, s);
369 :
370 0 : m_freem(mopt);
371 0 : m_freem(nam);
372 :
373 : /* Initialize other non-zero congestion variables */
374 0 : nfs_init_rtt(nmp);
375 0 : nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
376 0 : nmp->nm_sent = 0;
377 0 : nmp->nm_timeouts = 0;
378 0 : return (0);
379 :
380 : bad:
381 0 : sounlock(so, s);
382 :
383 0 : m_freem(mopt);
384 0 : m_freem(nam);
385 :
386 0 : nfs_disconnect(nmp);
387 0 : return (error);
388 0 : }
389 :
390 : /*
391 : * Reconnect routine:
392 : * Called when a connection is broken on a reliable protocol.
393 : * - clean up the old socket
394 : * - nfs_connect() again
395 : * - set R_MUSTRESEND for all outstanding requests on mount point
396 : * If this fails the mount point is DEAD!
397 : * nb: Must be called with the nfs_sndlock() set on the mount point.
398 : */
399 : int
400 0 : nfs_reconnect(struct nfsreq *rep)
401 : {
402 : struct nfsreq *rp;
403 0 : struct nfsmount *nmp = rep->r_nmp;
404 : int error;
405 :
406 0 : nfs_disconnect(nmp);
407 0 : while ((error = nfs_connect(nmp, rep)) != 0) {
408 0 : if (error == EINTR || error == ERESTART)
409 0 : return (EINTR);
410 0 : (void)tsleep((caddr_t)&lbolt, PSOCK, "nfsrecon", 0);
411 : }
412 :
413 : /*
414 : * Loop through outstanding request list and fix up all requests
415 : * on old socket.
416 : */
417 0 : TAILQ_FOREACH(rp, &nmp->nm_reqsq, r_chain) {
418 0 : rp->r_flags |= R_MUSTRESEND;
419 0 : rp->r_rexmit = 0;
420 : }
421 0 : return (0);
422 0 : }
423 :
424 : /*
425 : * NFS disconnect. Clean up and unlink.
426 : */
427 : void
428 0 : nfs_disconnect(struct nfsmount *nmp)
429 : {
430 : struct socket *so;
431 :
432 0 : if (nmp->nm_so) {
433 : so = nmp->nm_so;
434 0 : nmp->nm_so = NULL;
435 0 : soshutdown(so, SHUT_RDWR);
436 0 : soclose(so, 0);
437 0 : }
438 0 : }
439 :
440 : /*
441 : * This is the nfs send routine. For connection based socket types, it
442 : * must be called with an nfs_sndlock() on the socket.
443 : * "rep == NULL" indicates that it has been called from a server.
444 : * For the client side:
445 : * - return EINTR if the RPC is terminated, 0 otherwise
446 : * - set R_MUSTRESEND if the send fails for any reason
447 : * - do any cleanup required by recoverable socket errors (???)
448 : * For the server side:
449 : * - return EINTR or ERESTART if interrupted by a signal
450 : * - return EPIPE if a connection is lost for connection based sockets (TCP...)
451 : * - do any cleanup required by recoverable socket errors (???)
452 : */
453 : int
454 0 : nfs_send(struct socket *so, struct mbuf *nam, struct mbuf *top,
455 : struct nfsreq *rep)
456 : {
457 : struct mbuf *sendnam;
458 : int error, soflags, flags;
459 :
460 0 : if (rep) {
461 0 : if (rep->r_flags & R_SOFTTERM) {
462 0 : m_freem(top);
463 0 : return (EINTR);
464 : }
465 0 : if ((so = rep->r_nmp->nm_so) == NULL) {
466 0 : rep->r_flags |= R_MUSTRESEND;
467 0 : m_freem(top);
468 0 : return (0);
469 : }
470 0 : rep->r_flags &= ~R_MUSTRESEND;
471 0 : soflags = rep->r_nmp->nm_soflags;
472 0 : } else
473 0 : soflags = so->so_proto->pr_flags;
474 0 : if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
475 0 : sendnam = NULL;
476 : else
477 : sendnam = nam;
478 : flags = 0;
479 :
480 0 : error = sosend(so, sendnam, NULL, top, NULL, flags);
481 0 : if (error) {
482 0 : if (rep) {
483 : /*
484 : * Deal with errors for the client side.
485 : */
486 0 : if (rep->r_flags & R_SOFTTERM)
487 0 : error = EINTR;
488 : else
489 0 : rep->r_flags |= R_MUSTRESEND;
490 : }
491 :
492 : /*
493 : * Handle any recoverable (soft) socket errors here. (???)
494 : */
495 0 : if (error != EINTR && error != ERESTART &&
496 0 : error != EWOULDBLOCK && error != EPIPE)
497 0 : error = 0;
498 : }
499 0 : return (error);
500 0 : }
501 :
502 : #ifdef NFSCLIENT
503 : /*
504 : * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
505 : * done by soreceive(), but for SOCK_STREAM we must deal with the Record
506 : * Mark and consolidate the data into a new mbuf list.
507 : * nb: Sometimes TCP passes the data up to soreceive() in long lists of
508 : * small mbufs.
509 : * For SOCK_STREAM we must be very careful to read an entire record once
510 : * we have read any of it, even if the system call has been interrupted.
511 : */
512 : int
513 0 : nfs_receive(struct nfsreq *rep, struct mbuf **aname, struct mbuf **mp)
514 : {
515 : struct socket *so;
516 0 : struct uio auio;
517 0 : struct iovec aio;
518 : struct mbuf *m;
519 0 : struct mbuf *control;
520 0 : u_int32_t len;
521 : struct mbuf **getnam;
522 0 : int error, sotype, rcvflg;
523 0 : struct proc *p = curproc; /* XXX */
524 :
525 : /*
526 : * Set up arguments for soreceive()
527 : */
528 0 : *mp = NULL;
529 0 : *aname = NULL;
530 0 : sotype = rep->r_nmp->nm_sotype;
531 :
532 : /*
533 : * For reliable protocols, lock against other senders/receivers
534 : * in case a reconnect is necessary.
535 : * For SOCK_STREAM, first get the Record Mark to find out how much
536 : * more there is to get.
537 : * We must lock the socket against other receivers
538 : * until we have an entire rpc request/reply.
539 : */
540 0 : if (sotype != SOCK_DGRAM) {
541 0 : error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
542 0 : if (error)
543 0 : return (error);
544 : tryagain:
545 : /*
546 : * Check for fatal errors and resending request.
547 : */
548 : /*
549 : * Ugh: If a reconnect attempt just happened, nm_so
550 : * would have changed. NULL indicates a failed
551 : * attempt that has essentially shut down this
552 : * mount point.
553 : */
554 0 : if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
555 0 : nfs_sndunlock(&rep->r_nmp->nm_flag);
556 0 : return (EINTR);
557 : }
558 0 : so = rep->r_nmp->nm_so;
559 0 : if (!so) {
560 0 : error = nfs_reconnect(rep);
561 0 : if (error) {
562 0 : nfs_sndunlock(&rep->r_nmp->nm_flag);
563 0 : return (error);
564 : }
565 0 : goto tryagain;
566 : }
567 0 : while (rep->r_flags & R_MUSTRESEND) {
568 0 : m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
569 0 : nfsstats.rpcretries++;
570 0 : rep->r_rtt = 0;
571 0 : rep->r_flags &= ~R_TIMING;
572 0 : error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
573 0 : if (error) {
574 0 : if (error == EINTR || error == ERESTART ||
575 0 : (error = nfs_reconnect(rep)) != 0) {
576 0 : nfs_sndunlock(&rep->r_nmp->nm_flag);
577 0 : return (error);
578 : }
579 0 : goto tryagain;
580 : }
581 : }
582 0 : nfs_sndunlock(&rep->r_nmp->nm_flag);
583 0 : if (sotype == SOCK_STREAM) {
584 0 : aio.iov_base = (caddr_t) &len;
585 0 : aio.iov_len = sizeof(u_int32_t);
586 0 : auio.uio_iov = &aio;
587 0 : auio.uio_iovcnt = 1;
588 0 : auio.uio_segflg = UIO_SYSSPACE;
589 0 : auio.uio_rw = UIO_READ;
590 0 : auio.uio_offset = 0;
591 0 : auio.uio_resid = sizeof(u_int32_t);
592 0 : auio.uio_procp = p;
593 0 : do {
594 0 : rcvflg = MSG_WAITALL;
595 0 : error = soreceive(so, NULL, &auio, NULL, NULL,
596 : &rcvflg, 0);
597 0 : if (error == EWOULDBLOCK && rep) {
598 0 : if (rep->r_flags & R_SOFTTERM)
599 0 : return (EINTR);
600 : /*
601 : * looks like the server died after it
602 : * received the request, make sure
603 : * that we will retransmit and we
604 : * don't get stuck here forever.
605 : */
606 0 : if (rep->r_rexmit >=
607 0 : rep->r_nmp->nm_retry) {
608 0 : nfsstats.rpctimeouts++;
609 : error = EPIPE;
610 0 : }
611 : }
612 0 : } while (error == EWOULDBLOCK);
613 0 : if (!error && auio.uio_resid > 0) {
614 0 : log(LOG_INFO,
615 : "short receive (%zu/%zu) from nfs server %s\n",
616 0 : sizeof(u_int32_t) - auio.uio_resid,
617 : sizeof(u_int32_t),
618 0 : rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
619 : error = EPIPE;
620 0 : }
621 0 : if (error)
622 : goto errout;
623 :
624 0 : len = ntohl(len) & ~0x80000000;
625 : /*
626 : * This is SERIOUS! We are out of sync with the sender
627 : * and forcing a disconnect/reconnect is all I can do.
628 : */
629 0 : if (len > NFS_MAXPACKET) {
630 0 : log(LOG_ERR, "%s (%u) from nfs server %s\n",
631 : "impossible packet length",
632 : len,
633 0 : rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
634 : error = EFBIG;
635 0 : goto errout;
636 : }
637 0 : auio.uio_resid = len;
638 0 : do {
639 0 : rcvflg = MSG_WAITALL;
640 0 : error = soreceive(so, NULL, &auio, mp, NULL,
641 : &rcvflg, 0);
642 0 : } while (error == EWOULDBLOCK || error == EINTR ||
643 0 : error == ERESTART);
644 0 : if (!error && auio.uio_resid > 0) {
645 0 : log(LOG_INFO, "short receive (%zu/%u) from "
646 0 : "nfs server %s\n", len - auio.uio_resid,
647 0 : len, rep->r_nmp->nm_mountp->
648 0 : mnt_stat.f_mntfromname);
649 : error = EPIPE;
650 0 : }
651 : } else {
652 : /*
653 : * NB: Since uio_resid is big, MSG_WAITALL is ignored
654 : * and soreceive() will return when it has either a
655 : * control msg or a data msg.
656 : * We have no use for control msg., but must grab them
657 : * and then throw them away so we know what is going
658 : * on.
659 : */
660 0 : auio.uio_resid = len = 100000000; /* Anything Big */
661 0 : auio.uio_procp = p;
662 0 : do {
663 0 : rcvflg = 0;
664 0 : error = soreceive(so, NULL, &auio, mp, &control,
665 : &rcvflg, 0);
666 0 : m_freem(control);
667 0 : if (error == EWOULDBLOCK && rep) {
668 0 : if (rep->r_flags & R_SOFTTERM)
669 0 : return (EINTR);
670 : }
671 0 : } while (error == EWOULDBLOCK ||
672 0 : (!error && *mp == NULL && control));
673 0 : if ((rcvflg & MSG_EOR) == 0)
674 0 : printf("Egad!!\n");
675 0 : if (!error && *mp == NULL)
676 0 : error = EPIPE;
677 0 : len -= auio.uio_resid;
678 : }
679 : errout:
680 0 : if (error && error != EINTR && error != ERESTART) {
681 0 : m_freemp(mp);
682 0 : if (error != EPIPE)
683 0 : log(LOG_INFO,
684 : "receive error %d from nfs server %s\n",
685 : error,
686 0 : rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
687 0 : error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
688 0 : if (!error) {
689 0 : error = nfs_reconnect(rep);
690 0 : if (!error)
691 0 : goto tryagain;
692 0 : nfs_sndunlock(&rep->r_nmp->nm_flag);
693 0 : }
694 : }
695 : } else {
696 0 : if ((so = rep->r_nmp->nm_so) == NULL)
697 0 : return (EACCES);
698 0 : if (so->so_state & SS_ISCONNECTED)
699 0 : getnam = NULL;
700 : else
701 : getnam = aname;
702 0 : auio.uio_resid = len = 1000000;
703 0 : auio.uio_procp = p;
704 0 : do {
705 0 : rcvflg = 0;
706 0 : error = soreceive(so, getnam, &auio, mp, NULL,
707 : &rcvflg, 0);
708 0 : if (error == EWOULDBLOCK &&
709 0 : (rep->r_flags & R_SOFTTERM))
710 0 : return (EINTR);
711 0 : } while (error == EWOULDBLOCK);
712 0 : len -= auio.uio_resid;
713 : }
714 0 : if (error)
715 0 : m_freemp(mp);
716 : /*
717 : * Search for any mbufs that are not a multiple of 4 bytes long
718 : * or with m_data not longword aligned.
719 : * These could cause pointer alignment problems, so copy them to
720 : * well aligned mbufs.
721 : */
722 0 : nfs_realign(mp, 5 * NFSX_UNSIGNED);
723 0 : return (error);
724 0 : }
725 :
726 : /*
727 : * Implement receipt of reply on a socket.
728 : * We must search through the list of received datagrams matching them
729 : * with outstanding requests using the xid, until ours is found.
730 : */
731 : int
732 0 : nfs_reply(struct nfsreq *myrep)
733 : {
734 : struct nfsreq *rep;
735 0 : struct nfsmount *nmp = myrep->r_nmp;
736 0 : struct nfsm_info info;
737 0 : struct mbuf *nam;
738 : u_int32_t rxid, *tl, t1;
739 0 : caddr_t cp2;
740 : int error;
741 :
742 : /*
743 : * Loop around until we get our own reply
744 : */
745 0 : for (;;) {
746 : /*
747 : * Lock against other receivers so that I don't get stuck in
748 : * sbwait() after someone else has received my reply for me.
749 : * Also necessary for connection based protocols to avoid
750 : * race conditions during a reconnect.
751 : */
752 0 : error = nfs_rcvlock(myrep);
753 0 : if (error)
754 0 : return (error == EALREADY ? 0 : error);
755 :
756 : /*
757 : * Get the next Rpc reply off the socket
758 : */
759 0 : error = nfs_receive(myrep, &nam, &info.nmi_mrep);
760 0 : nfs_rcvunlock(&nmp->nm_flag);
761 0 : if (error) {
762 :
763 : /*
764 : * Ignore routing errors on connectionless protocols??
765 : */
766 0 : if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
767 0 : if (nmp->nm_so)
768 0 : nmp->nm_so->so_error = 0;
769 0 : continue;
770 : }
771 0 : return (error);
772 : }
773 0 : m_freem(nam);
774 :
775 : /*
776 : * Get the xid and check that it is an rpc reply
777 : */
778 0 : info.nmi_md = info.nmi_mrep;
779 0 : info.nmi_dpos = mtod(info.nmi_md, caddr_t);
780 0 : nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
781 0 : rxid = *tl++;
782 0 : if (*tl != rpc_reply) {
783 0 : nfsstats.rpcinvalid++;
784 0 : m_freem(info.nmi_mrep);
785 : nfsmout:
786 0 : continue;
787 : }
788 :
789 : /*
790 : * Loop through the request list to match up the reply
791 : * Iff no match, just drop the datagram
792 : */
793 0 : TAILQ_FOREACH(rep, &nmp->nm_reqsq, r_chain) {
794 0 : if (rep->r_mrep == NULL && rxid == rep->r_xid) {
795 : /* Found it.. */
796 0 : rep->r_mrep = info.nmi_mrep;
797 0 : rep->r_md = info.nmi_md;
798 0 : rep->r_dpos = info.nmi_dpos;
799 :
800 : /*
801 : * Update congestion window.
802 : * Do the additive increase of
803 : * one rpc/rtt.
804 : */
805 0 : if (nmp->nm_cwnd <= nmp->nm_sent) {
806 0 : nmp->nm_cwnd +=
807 0 : (NFS_CWNDSCALE * NFS_CWNDSCALE +
808 0 : (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
809 0 : if (nmp->nm_cwnd > NFS_MAXCWND)
810 0 : nmp->nm_cwnd = NFS_MAXCWND;
811 : }
812 0 : rep->r_flags &= ~R_SENT;
813 0 : nmp->nm_sent -= NFS_CWNDSCALE;
814 :
815 0 : if (rep->r_flags & R_TIMING)
816 0 : nfs_update_rtt(rep);
817 :
818 0 : nmp->nm_timeouts = 0;
819 0 : break;
820 : }
821 : }
822 : /*
823 : * If not matched to a request, drop it.
824 : * If it's mine, get out.
825 : */
826 0 : if (rep == 0) {
827 0 : nfsstats.rpcunexpected++;
828 0 : m_freem(info.nmi_mrep);
829 0 : } else if (rep == myrep) {
830 0 : if (rep->r_mrep == NULL)
831 0 : panic("nfsreply nil");
832 0 : return (0);
833 : }
834 : }
835 0 : }
836 :
837 : /*
838 : * nfs_request - goes something like this
839 : * - fill in request struct
840 : * - links it into list
841 : * - calls nfs_send() for first transmit
842 : * - calls nfs_receive() to get reply
843 : * - break down rpc header and return with nfs reply pointed to
844 : * by mrep or error
845 : * nb: always frees up mreq mbuf list
846 : */
847 : int
848 0 : nfs_request(struct vnode *vp, int procnum, struct nfsm_info *infop)
849 : {
850 : struct mbuf *m;
851 : u_int32_t *tl;
852 : struct nfsmount *nmp;
853 0 : struct timeval tv;
854 0 : caddr_t cp2;
855 : int t1, i, error = 0;
856 : int trylater_delay;
857 : struct nfsreq *rep;
858 0 : struct nfsm_info info;
859 :
860 0 : rep = pool_get(&nfsreqpl, PR_WAITOK);
861 0 : rep->r_nmp = VFSTONFS(vp->v_mount);
862 0 : rep->r_vp = vp;
863 0 : rep->r_procp = infop->nmi_procp;
864 0 : rep->r_procnum = procnum;
865 :
866 : /* empty mbuf for AUTH_UNIX header */
867 0 : rep->r_mreq = m_gethdr(M_WAIT, MT_DATA);
868 0 : rep->r_mreq->m_next = infop->nmi_mreq;
869 0 : rep->r_mreq->m_len = 0;
870 0 : m_calchdrlen(rep->r_mreq);
871 :
872 0 : trylater_delay = NFS_MINTIMEO;
873 :
874 0 : nmp = rep->r_nmp;
875 :
876 : /* Get the RPC header with authorization. */
877 0 : nfsm_rpchead(rep, infop->nmi_cred, RPCAUTH_UNIX);
878 0 : m = rep->r_mreq;
879 :
880 : /*
881 : * For stream protocols, insert a Sun RPC Record Mark.
882 : */
883 0 : if (nmp->nm_sotype == SOCK_STREAM) {
884 0 : M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
885 0 : *mtod(m, u_int32_t *) = htonl(0x80000000 |
886 : (m->m_pkthdr.len - NFSX_UNSIGNED));
887 0 : }
888 :
889 : tryagain:
890 0 : rep->r_rtt = rep->r_rexmit = 0;
891 0 : if (nfs_ptimers[rep->r_procnum] != NFS_DEFAULT_TIMER)
892 0 : rep->r_flags = R_TIMING;
893 : else
894 0 : rep->r_flags = 0;
895 0 : rep->r_mrep = NULL;
896 :
897 : /*
898 : * Do the client side RPC.
899 : */
900 0 : nfsstats.rpcrequests++;
901 : /*
902 : * Chain request into list of outstanding requests. Be sure
903 : * to put it LAST so timer finds oldest requests first.
904 : */
905 0 : if (TAILQ_EMPTY(&nmp->nm_reqsq))
906 0 : timeout_add(&nmp->nm_rtimeout, nfs_ticks);
907 0 : TAILQ_INSERT_TAIL(&nmp->nm_reqsq, rep, r_chain);
908 :
909 : /*
910 : * If backing off another request or avoiding congestion, don't
911 : * send this one now but let timer do it. If not timing a request,
912 : * do it now.
913 : */
914 0 : if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
915 0 : (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
916 0 : nmp->nm_sent < nmp->nm_cwnd)) {
917 0 : if (nmp->nm_soflags & PR_CONNREQUIRED)
918 0 : error = nfs_sndlock(&nmp->nm_flag, rep);
919 0 : if (!error) {
920 0 : error = nfs_send(nmp->nm_so, nmp->nm_nam,
921 0 : m_copym(m, 0, M_COPYALL, M_WAIT), rep);
922 0 : if (nmp->nm_soflags & PR_CONNREQUIRED)
923 0 : nfs_sndunlock(&nmp->nm_flag);
924 : }
925 0 : if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
926 0 : nmp->nm_sent += NFS_CWNDSCALE;
927 0 : rep->r_flags |= R_SENT;
928 0 : }
929 : } else {
930 0 : rep->r_rtt = -1;
931 : }
932 :
933 : /*
934 : * Wait for the reply from our send or the timer's.
935 : */
936 0 : if (!error || error == EPIPE)
937 0 : error = nfs_reply(rep);
938 :
939 : /*
940 : * RPC done, unlink the request.
941 : */
942 0 : TAILQ_REMOVE(&nmp->nm_reqsq, rep, r_chain);
943 0 : if (TAILQ_EMPTY(&nmp->nm_reqsq))
944 0 : timeout_del(&nmp->nm_rtimeout);
945 :
946 : /*
947 : * Decrement the outstanding request count.
948 : */
949 0 : if (rep->r_flags & R_SENT) {
950 0 : rep->r_flags &= ~R_SENT; /* paranoia */
951 0 : nmp->nm_sent -= NFS_CWNDSCALE;
952 0 : }
953 :
954 : /*
955 : * If there was a successful reply and a tprintf msg.
956 : * tprintf a response.
957 : */
958 0 : if (!error && (rep->r_flags & R_TPRINTFMSG))
959 0 : nfs_msg(rep, "is alive again");
960 0 : info.nmi_mrep = rep->r_mrep;
961 0 : info.nmi_md = rep->r_md;
962 0 : info.nmi_dpos = rep->r_dpos;
963 0 : if (error) {
964 0 : infop->nmi_mrep = NULL;
965 0 : goto nfsmout1;
966 : }
967 :
968 : /*
969 : * break down the rpc header and check if ok
970 : */
971 0 : nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
972 0 : if (*tl++ == rpc_msgdenied) {
973 0 : if (*tl == rpc_mismatch)
974 0 : error = EOPNOTSUPP;
975 : else
976 : error = EACCES; /* Should be EAUTH. */
977 0 : infop->nmi_mrep = NULL;
978 0 : goto nfsmout1;
979 : }
980 :
981 : /*
982 : * Since we only support RPCAUTH_UNIX atm we step over the
983 : * reply verifer type, and in the (error) case that there really
984 : * is any data in it, we advance over it.
985 : */
986 0 : tl++; /* Step over verifer type */
987 0 : i = fxdr_unsigned(int32_t, *tl);
988 0 : if (i > 0)
989 0 : nfsm_adv(nfsm_rndup(i)); /* Should not happen */
990 :
991 0 : nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
992 : /* 0 == ok */
993 0 : if (*tl == 0) {
994 0 : nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
995 0 : if (*tl != 0) {
996 0 : error = fxdr_unsigned(int, *tl);
997 0 : if ((nmp->nm_flag & NFSMNT_NFSV3) &&
998 0 : error == NFSERR_TRYLATER) {
999 0 : m_freem(info.nmi_mrep);
1000 : error = 0;
1001 0 : tv.tv_sec = trylater_delay;
1002 0 : tv.tv_usec = 0;
1003 0 : tsleep(&tv, PSOCK, "nfsretry", tvtohz(&tv));
1004 0 : trylater_delay *= NFS_TIMEOUTMUL;
1005 0 : if (trylater_delay > NFS_MAXTIMEO)
1006 0 : trylater_delay = NFS_MAXTIMEO;
1007 :
1008 0 : goto tryagain;
1009 : }
1010 :
1011 : /*
1012 : * If the File Handle was stale, invalidate the
1013 : * lookup cache, just in case.
1014 : */
1015 0 : if (error == ESTALE)
1016 0 : cache_purge(rep->r_vp);
1017 : }
1018 : goto nfsmout;
1019 : }
1020 :
1021 0 : error = EPROTONOSUPPORT;
1022 :
1023 : nfsmout:
1024 0 : infop->nmi_mrep = info.nmi_mrep;
1025 0 : infop->nmi_md = info.nmi_md;
1026 0 : infop->nmi_dpos = info.nmi_dpos;
1027 : nfsmout1:
1028 0 : m_freem(rep->r_mreq);
1029 0 : pool_put(&nfsreqpl, rep);
1030 0 : return (error);
1031 0 : }
1032 : #endif /* NFSCLIENT */
1033 :
1034 : /*
1035 : * Generate the rpc reply header
1036 : * siz arg. is used to decide if adding a cluster is worthwhile
1037 : */
1038 : int
1039 0 : nfs_rephead(int siz, struct nfsrv_descript *nd, struct nfssvc_sock *slp,
1040 : int err, struct mbuf **mrq, struct mbuf **mbp)
1041 : {
1042 : u_int32_t *tl;
1043 : struct mbuf *mreq;
1044 0 : struct mbuf *mb;
1045 :
1046 0 : MGETHDR(mreq, M_WAIT, MT_DATA);
1047 0 : mb = mreq;
1048 : /*
1049 : * If this is a big reply, use a cluster else
1050 : * try and leave leading space for the lower level headers.
1051 : */
1052 0 : siz += RPC_REPLYSIZ;
1053 0 : if (siz >= MHLEN - max_hdr) {
1054 0 : MCLGET(mreq, M_WAIT);
1055 0 : } else
1056 0 : mreq->m_data += max_hdr;
1057 0 : tl = mtod(mreq, u_int32_t *);
1058 0 : mreq->m_len = 6 * NFSX_UNSIGNED;
1059 0 : *tl++ = txdr_unsigned(nd->nd_retxid);
1060 0 : *tl++ = rpc_reply;
1061 0 : if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
1062 0 : *tl++ = rpc_msgdenied;
1063 0 : if (err & NFSERR_AUTHERR) {
1064 0 : *tl++ = rpc_autherr;
1065 0 : *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
1066 0 : mreq->m_len -= NFSX_UNSIGNED;
1067 0 : } else {
1068 0 : *tl++ = rpc_mismatch;
1069 0 : *tl++ = txdr_unsigned(RPC_VER2);
1070 0 : *tl = txdr_unsigned(RPC_VER2);
1071 : }
1072 : } else {
1073 0 : *tl++ = rpc_msgaccepted;
1074 :
1075 : /* AUTH_UNIX requires RPCAUTH_NULL. */
1076 0 : *tl++ = 0;
1077 0 : *tl++ = 0;
1078 :
1079 0 : switch (err) {
1080 : case EPROGUNAVAIL:
1081 0 : *tl = txdr_unsigned(RPC_PROGUNAVAIL);
1082 0 : break;
1083 : case EPROGMISMATCH:
1084 0 : *tl = txdr_unsigned(RPC_PROGMISMATCH);
1085 0 : tl = nfsm_build(&mb, 2 * NFSX_UNSIGNED);
1086 0 : *tl++ = txdr_unsigned(NFS_VER2);
1087 0 : *tl = txdr_unsigned(NFS_VER3);
1088 0 : break;
1089 : case EPROCUNAVAIL:
1090 0 : *tl = txdr_unsigned(RPC_PROCUNAVAIL);
1091 0 : break;
1092 : case EBADRPC:
1093 0 : *tl = txdr_unsigned(RPC_GARBAGE);
1094 0 : break;
1095 : default:
1096 0 : *tl = 0;
1097 0 : if (err != NFSERR_RETVOID) {
1098 0 : tl = nfsm_build(&mb, NFSX_UNSIGNED);
1099 0 : if (err)
1100 0 : *tl = txdr_unsigned(nfsrv_errmap(nd, err));
1101 : else
1102 0 : *tl = 0;
1103 : }
1104 : break;
1105 : };
1106 : }
1107 :
1108 0 : *mrq = mreq;
1109 0 : if (mbp != NULL)
1110 0 : *mbp = mb;
1111 0 : if (err != 0 && err != NFSERR_RETVOID)
1112 0 : nfsstats.srvrpc_errs++;
1113 0 : return (0);
1114 0 : }
1115 :
1116 : /*
1117 : * nfs timer routine
1118 : * Scan the nfsreq list and retranmit any requests that have timed out.
1119 : */
1120 : void
1121 0 : nfs_timer(void *arg)
1122 : {
1123 0 : struct nfsmount *nmp = arg;
1124 : struct nfsreq *rep;
1125 : struct mbuf *m;
1126 : struct socket *so;
1127 : int timeo, error;
1128 :
1129 0 : NET_LOCK();
1130 0 : TAILQ_FOREACH(rep, &nmp->nm_reqsq, r_chain) {
1131 0 : if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
1132 : continue;
1133 0 : if (nfs_sigintr(nmp, rep, rep->r_procp)) {
1134 0 : rep->r_flags |= R_SOFTTERM;
1135 0 : continue;
1136 : }
1137 0 : if (rep->r_rtt >= 0) {
1138 0 : rep->r_rtt++;
1139 0 : if (nmp->nm_flag & NFSMNT_DUMBTIMR)
1140 0 : timeo = nmp->nm_timeo;
1141 : else
1142 0 : timeo = nfs_estimate_rto(nmp, rep->r_procnum);
1143 0 : if (nmp->nm_timeouts > 0)
1144 0 : timeo *= nfs_backoff[nmp->nm_timeouts - 1];
1145 0 : if (rep->r_rtt <= timeo)
1146 : continue;
1147 0 : if (nmp->nm_timeouts < nitems(nfs_backoff))
1148 0 : nmp->nm_timeouts++;
1149 : }
1150 :
1151 : /* Check for server not responding. */
1152 0 : if ((rep->r_flags & R_TPRINTFMSG) == 0 && rep->r_rexmit > 4) {
1153 0 : nfs_msg(rep, "not responding");
1154 0 : rep->r_flags |= R_TPRINTFMSG;
1155 0 : }
1156 0 : if (rep->r_rexmit >= nmp->nm_retry) { /* too many */
1157 0 : nfsstats.rpctimeouts++;
1158 0 : rep->r_flags |= R_SOFTTERM;
1159 0 : continue;
1160 : }
1161 0 : if (nmp->nm_sotype != SOCK_DGRAM) {
1162 0 : if (++rep->r_rexmit > NFS_MAXREXMIT)
1163 0 : rep->r_rexmit = NFS_MAXREXMIT;
1164 : continue;
1165 : }
1166 :
1167 0 : if ((so = nmp->nm_so) == NULL)
1168 : continue;
1169 :
1170 : /*
1171 : * If there is enough space and the window allows..
1172 : * Resend it
1173 : * Set r_rtt to -1 in case we fail to send it now.
1174 : */
1175 0 : rep->r_rtt = -1;
1176 0 : if (sbspace(so, &so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
1177 0 : ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1178 0 : (rep->r_flags & R_SENT) ||
1179 0 : nmp->nm_sent < nmp->nm_cwnd) &&
1180 0 : (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
1181 0 : if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
1182 0 : error = (*so->so_proto->pr_usrreq)(so, PRU_SEND,
1183 0 : m, NULL, NULL, curproc);
1184 : else
1185 0 : error = (*so->so_proto->pr_usrreq)(so, PRU_SEND,
1186 0 : m, nmp->nm_nam, NULL, curproc);
1187 0 : if (error) {
1188 0 : if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
1189 0 : so->so_error = 0;
1190 : } else {
1191 : /*
1192 : * Iff first send, start timing
1193 : * else turn timing off, backoff timer
1194 : * and divide congestion window by 2.
1195 : */
1196 0 : if (rep->r_flags & R_SENT) {
1197 0 : rep->r_flags &= ~R_TIMING;
1198 0 : if (++rep->r_rexmit > NFS_MAXREXMIT)
1199 0 : rep->r_rexmit = NFS_MAXREXMIT;
1200 0 : nmp->nm_cwnd >>= 1;
1201 0 : if (nmp->nm_cwnd < NFS_CWNDSCALE)
1202 0 : nmp->nm_cwnd = NFS_CWNDSCALE;
1203 0 : nfsstats.rpcretries++;
1204 0 : } else {
1205 0 : rep->r_flags |= R_SENT;
1206 0 : nmp->nm_sent += NFS_CWNDSCALE;
1207 : }
1208 0 : rep->r_rtt = 0;
1209 : }
1210 : }
1211 : }
1212 0 : NET_UNLOCK();
1213 0 : timeout_add(&nmp->nm_rtimeout, nfs_ticks);
1214 0 : }
1215 :
1216 : /*
1217 : * Test for a termination condition pending on the process.
1218 : * This is used for NFSMNT_INT mounts.
1219 : */
1220 : int
1221 0 : nfs_sigintr(struct nfsmount *nmp, struct nfsreq *rep, struct proc *p)
1222 : {
1223 :
1224 0 : if (rep && (rep->r_flags & R_SOFTTERM))
1225 0 : return (EINTR);
1226 0 : if (!(nmp->nm_flag & NFSMNT_INT))
1227 0 : return (0);
1228 0 : if (p && p->p_siglist &&
1229 0 : (((p->p_siglist & ~p->p_sigmask) &
1230 0 : ~p->p_p->ps_sigacts->ps_sigignore) & NFSINT_SIGMASK))
1231 0 : return (EINTR);
1232 0 : return (0);
1233 0 : }
1234 :
1235 : /*
1236 : * Lock a socket against others.
1237 : * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
1238 : * and also to avoid race conditions between the processes with nfs requests
1239 : * in progress when a reconnect is necessary.
1240 : */
1241 : int
1242 0 : nfs_sndlock(int *flagp, struct nfsreq *rep)
1243 : {
1244 : struct proc *p;
1245 : int slpflag = 0, slptimeo = 0;
1246 :
1247 0 : if (rep) {
1248 0 : p = rep->r_procp;
1249 0 : if (rep->r_nmp->nm_flag & NFSMNT_INT)
1250 0 : slpflag = PCATCH;
1251 : } else
1252 : p = NULL;
1253 0 : while (*flagp & NFSMNT_SNDLOCK) {
1254 0 : if (rep && nfs_sigintr(rep->r_nmp, rep, p))
1255 0 : return (EINTR);
1256 0 : *flagp |= NFSMNT_WANTSND;
1257 0 : (void)tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
1258 : slptimeo);
1259 0 : if (slpflag == PCATCH) {
1260 : slpflag = 0;
1261 0 : slptimeo = 2 * hz;
1262 0 : }
1263 : }
1264 0 : *flagp |= NFSMNT_SNDLOCK;
1265 0 : return (0);
1266 0 : }
1267 :
1268 : /*
1269 : * Unlock the stream socket for others.
1270 : */
1271 : void
1272 0 : nfs_sndunlock(int *flagp)
1273 : {
1274 :
1275 0 : if ((*flagp & NFSMNT_SNDLOCK) == 0)
1276 0 : panic("nfs sndunlock");
1277 0 : *flagp &= ~NFSMNT_SNDLOCK;
1278 0 : if (*flagp & NFSMNT_WANTSND) {
1279 0 : *flagp &= ~NFSMNT_WANTSND;
1280 0 : wakeup((caddr_t)flagp);
1281 0 : }
1282 0 : }
1283 :
1284 : int
1285 0 : nfs_rcvlock(struct nfsreq *rep)
1286 : {
1287 0 : int *flagp = &rep->r_nmp->nm_flag;
1288 : int slpflag, slptimeo = 0;
1289 :
1290 0 : if (*flagp & NFSMNT_INT)
1291 0 : slpflag = PCATCH;
1292 : else
1293 : slpflag = 0;
1294 :
1295 0 : while (*flagp & NFSMNT_RCVLOCK) {
1296 0 : if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
1297 0 : return (EINTR);
1298 0 : *flagp |= NFSMNT_WANTRCV;
1299 0 : (void)tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
1300 : slptimeo);
1301 0 : if (rep->r_mrep != NULL) {
1302 : /*
1303 : * Don't take the lock if our reply has been received
1304 : * while we where sleeping.
1305 : */
1306 0 : return (EALREADY);
1307 : }
1308 0 : if (slpflag == PCATCH) {
1309 : slpflag = 0;
1310 0 : slptimeo = 2 * hz;
1311 0 : }
1312 : }
1313 0 : *flagp |= NFSMNT_RCVLOCK;
1314 0 : return (0);
1315 0 : }
1316 :
1317 : /*
1318 : * Unlock the stream socket for others.
1319 : */
1320 : void
1321 0 : nfs_rcvunlock(int *flagp)
1322 : {
1323 :
1324 0 : if ((*flagp & NFSMNT_RCVLOCK) == 0)
1325 0 : panic("nfs rcvunlock");
1326 0 : *flagp &= ~NFSMNT_RCVLOCK;
1327 0 : if (*flagp & NFSMNT_WANTRCV) {
1328 0 : *flagp &= ~NFSMNT_WANTRCV;
1329 0 : wakeup(flagp);
1330 0 : }
1331 0 : }
1332 :
1333 : /*
1334 : * Auxiliary routine to align the length of mbuf copies made with m_copyback().
1335 : */
1336 : void
1337 0 : nfs_realign_fixup(struct mbuf *m, struct mbuf *n, unsigned int *off)
1338 : {
1339 : size_t padding;
1340 :
1341 : /*
1342 : * The maximum number of bytes that m_copyback() places in a mbuf is
1343 : * always an aligned quantity, so realign happens at the chain's tail.
1344 : */
1345 0 : while (n->m_next != NULL)
1346 : n = n->m_next;
1347 :
1348 : /*
1349 : * Pad from the next elements in the source chain. Loop until the
1350 : * destination chain is aligned, or the end of the source is reached.
1351 : */
1352 : do {
1353 0 : m = m->m_next;
1354 0 : if (m == NULL)
1355 0 : return;
1356 :
1357 0 : padding = min(ALIGN(n->m_len) - n->m_len, m->m_len);
1358 0 : if (padding > M_TRAILINGSPACE(n))
1359 0 : panic("nfs_realign_fixup: no memory to pad to");
1360 :
1361 0 : bcopy(mtod(m, void *), mtod(n, char *) + n->m_len, padding);
1362 :
1363 0 : n->m_len += padding;
1364 0 : m_adj(m, padding);
1365 0 : *off += padding;
1366 :
1367 : } while (!ALIGNED_POINTER(n->m_len, void *));
1368 0 : }
1369 :
1370 : /*
1371 : * The NFS RPC parsing code uses the data address and the length of mbuf
1372 : * structures to calculate on-memory addresses. This function makes sure these
1373 : * parameters are correctly aligned.
1374 : */
1375 : void
1376 0 : nfs_realign(struct mbuf **pm, int hsiz)
1377 : {
1378 : struct mbuf *m;
1379 : struct mbuf *n = NULL;
1380 : unsigned int off = 0;
1381 :
1382 0 : ++nfs_realign_test;
1383 0 : while ((m = *pm) != NULL) {
1384 : if (!ALIGNED_POINTER(m->m_data, void *) ||
1385 : !ALIGNED_POINTER(m->m_len, void *)) {
1386 : MGET(n, M_WAIT, MT_DATA);
1387 : #define ALIGN_POINTER(n) ((u_int)(((n) + sizeof(void *)) & ~sizeof(void *)))
1388 : if (ALIGN_POINTER(m->m_len) >= MINCLSIZE) {
1389 : MCLGET(n, M_WAIT);
1390 : }
1391 : n->m_len = 0;
1392 : break;
1393 : }
1394 0 : pm = &m->m_next;
1395 : }
1396 : /*
1397 : * If n is non-NULL, loop on m copying data, then replace the
1398 : * portion of the chain that had to be realigned.
1399 : */
1400 0 : if (n != NULL) {
1401 0 : ++nfs_realign_count;
1402 0 : while (m) {
1403 0 : m_copyback(n, off, m->m_len, mtod(m, caddr_t), M_WAIT);
1404 :
1405 : /*
1406 : * If an unaligned amount of memory was copied, fix up
1407 : * the last mbuf created by m_copyback().
1408 : */
1409 : if (!ALIGNED_POINTER(m->m_len, void *))
1410 : nfs_realign_fixup(m, n, &off);
1411 :
1412 0 : off += m->m_len;
1413 0 : m = m->m_next;
1414 : }
1415 0 : m_freemp(pm);
1416 0 : *pm = n;
1417 0 : }
1418 0 : }
1419 :
1420 :
1421 : /*
1422 : * Parse an RPC request
1423 : * - verify it
1424 : * - fill in the cred struct.
1425 : */
1426 : int
1427 0 : nfs_getreq(struct nfsrv_descript *nd, struct nfsd *nfsd, int has_header)
1428 : {
1429 : int len, i;
1430 : u_int32_t *tl;
1431 : int32_t t1;
1432 0 : caddr_t cp2;
1433 : u_int32_t nfsvers, auth_type;
1434 : int error = 0;
1435 0 : struct nfsm_info info;
1436 :
1437 0 : info.nmi_mrep = nd->nd_mrep;
1438 0 : info.nmi_md = nd->nd_md;
1439 0 : info.nmi_dpos = nd->nd_dpos;
1440 0 : if (has_header) {
1441 0 : nfsm_dissect(tl, u_int32_t *, 10 * NFSX_UNSIGNED);
1442 0 : nd->nd_retxid = fxdr_unsigned(u_int32_t, *tl++);
1443 0 : if (*tl++ != rpc_call) {
1444 0 : m_freem(info.nmi_mrep);
1445 0 : return (EBADRPC);
1446 : }
1447 : } else
1448 0 : nfsm_dissect(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
1449 0 : nd->nd_repstat = 0;
1450 0 : nd->nd_flag = 0;
1451 0 : if (*tl++ != rpc_vers) {
1452 0 : nd->nd_repstat = ERPCMISMATCH;
1453 0 : nd->nd_procnum = NFSPROC_NOOP;
1454 0 : return (0);
1455 : }
1456 0 : if (*tl != nfs_prog) {
1457 0 : nd->nd_repstat = EPROGUNAVAIL;
1458 0 : nd->nd_procnum = NFSPROC_NOOP;
1459 0 : return (0);
1460 : }
1461 0 : tl++;
1462 0 : nfsvers = fxdr_unsigned(u_int32_t, *tl++);
1463 0 : if (nfsvers != NFS_VER2 && nfsvers != NFS_VER3) {
1464 0 : nd->nd_repstat = EPROGMISMATCH;
1465 0 : nd->nd_procnum = NFSPROC_NOOP;
1466 0 : return (0);
1467 : }
1468 0 : if (nfsvers == NFS_VER3)
1469 0 : nd->nd_flag = ND_NFSV3;
1470 0 : nd->nd_procnum = fxdr_unsigned(u_int32_t, *tl++);
1471 0 : if (nd->nd_procnum == NFSPROC_NULL)
1472 0 : return (0);
1473 0 : if (nd->nd_procnum >= NFS_NPROCS ||
1474 0 : (nd->nd_procnum > NFSPROC_COMMIT) ||
1475 0 : (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
1476 0 : nd->nd_repstat = EPROCUNAVAIL;
1477 0 : nd->nd_procnum = NFSPROC_NOOP;
1478 0 : return (0);
1479 : }
1480 0 : if ((nd->nd_flag & ND_NFSV3) == 0)
1481 0 : nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
1482 0 : auth_type = *tl++;
1483 0 : len = fxdr_unsigned(int, *tl++);
1484 0 : if (len < 0 || len > RPCAUTH_MAXSIZ) {
1485 0 : m_freem(info.nmi_mrep);
1486 0 : return (EBADRPC);
1487 : }
1488 :
1489 : /* Handle auth_unix */
1490 0 : if (auth_type == rpc_auth_unix) {
1491 0 : len = fxdr_unsigned(int, *++tl);
1492 0 : if (len < 0 || len > NFS_MAXNAMLEN) {
1493 0 : m_freem(info.nmi_mrep);
1494 0 : return (EBADRPC);
1495 : }
1496 0 : nfsm_adv(nfsm_rndup(len));
1497 0 : nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1498 0 : memset(&nd->nd_cr, 0, sizeof (struct ucred));
1499 0 : nd->nd_cr.cr_ref = 1;
1500 0 : nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
1501 0 : nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
1502 0 : len = fxdr_unsigned(int, *tl);
1503 0 : if (len < 0 || len > RPCAUTH_UNIXGIDS) {
1504 0 : m_freem(info.nmi_mrep);
1505 0 : return (EBADRPC);
1506 : }
1507 0 : nfsm_dissect(tl, u_int32_t *, (len + 2) * NFSX_UNSIGNED);
1508 0 : for (i = 0; i < len; i++) {
1509 0 : if (i < NGROUPS_MAX)
1510 0 : nd->nd_cr.cr_groups[i] =
1511 0 : fxdr_unsigned(gid_t, *tl++);
1512 : else
1513 0 : tl++;
1514 : }
1515 0 : nd->nd_cr.cr_ngroups = (len > NGROUPS_MAX) ? NGROUPS_MAX : len;
1516 0 : len = fxdr_unsigned(int, *++tl);
1517 0 : if (len < 0 || len > RPCAUTH_MAXSIZ) {
1518 0 : m_freem(info.nmi_mrep);
1519 0 : return (EBADRPC);
1520 : }
1521 0 : if (len > 0)
1522 0 : nfsm_adv(nfsm_rndup(len));
1523 : } else {
1524 0 : nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
1525 0 : nd->nd_procnum = NFSPROC_NOOP;
1526 0 : return (0);
1527 : }
1528 :
1529 0 : nd->nd_md = info.nmi_md;
1530 0 : nd->nd_dpos = info.nmi_dpos;
1531 0 : return (0);
1532 : nfsmout:
1533 0 : return (error);
1534 0 : }
1535 :
1536 : void
1537 0 : nfs_msg(struct nfsreq *rep, char *msg)
1538 : {
1539 : tpr_t tpr;
1540 :
1541 0 : if (rep->r_procp)
1542 0 : tpr = tprintf_open(rep->r_procp);
1543 : else
1544 : tpr = NULL;
1545 :
1546 0 : tprintf(tpr, "nfs server %s: %s\n",
1547 0 : rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname, msg);
1548 0 : tprintf_close(tpr);
1549 0 : }
1550 :
1551 : #ifdef NFSSERVER
1552 : /*
1553 : * Socket upcall routine for the nfsd sockets.
1554 : * The caddr_t arg is a pointer to the "struct nfssvc_sock".
1555 : * Essentially do as much as possible non-blocking, else punt and it will
1556 : * be called with M_WAIT from an nfsd.
1557 : */
1558 : void
1559 0 : nfsrv_rcv(struct socket *so, caddr_t arg, int waitflag)
1560 : {
1561 0 : struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
1562 : struct mbuf *m;
1563 0 : struct mbuf *mp, *nam;
1564 0 : struct uio auio;
1565 0 : int flags, error;
1566 :
1567 0 : if ((slp->ns_flag & SLP_VALID) == 0)
1568 0 : return;
1569 :
1570 : /* Defer soreceive() to an nfsd. */
1571 0 : if (waitflag == M_DONTWAIT) {
1572 0 : slp->ns_flag |= SLP_NEEDQ;
1573 0 : goto dorecs;
1574 : }
1575 :
1576 0 : auio.uio_procp = NULL;
1577 0 : if (so->so_type == SOCK_STREAM) {
1578 : /*
1579 : * Do soreceive().
1580 : */
1581 0 : auio.uio_resid = 1000000000;
1582 0 : flags = MSG_DONTWAIT;
1583 0 : error = soreceive(so, &nam, &auio, &mp, NULL,
1584 : &flags, 0);
1585 0 : if (error || mp == NULL) {
1586 0 : if (error == EWOULDBLOCK)
1587 0 : slp->ns_flag |= SLP_NEEDQ;
1588 : else
1589 0 : slp->ns_flag |= SLP_DISCONN;
1590 : goto dorecs;
1591 : }
1592 : m = mp;
1593 0 : if (slp->ns_rawend) {
1594 0 : slp->ns_rawend->m_next = m;
1595 0 : slp->ns_cc += 1000000000 - auio.uio_resid;
1596 0 : } else {
1597 0 : slp->ns_raw = m;
1598 0 : slp->ns_cc = 1000000000 - auio.uio_resid;
1599 : }
1600 0 : while (m->m_next)
1601 : m = m->m_next;
1602 0 : slp->ns_rawend = m;
1603 :
1604 : /*
1605 : * Now try and parse record(s) out of the raw stream data.
1606 : */
1607 0 : error = nfsrv_getstream(slp, waitflag);
1608 0 : if (error) {
1609 0 : if (error == EPERM)
1610 0 : slp->ns_flag |= SLP_DISCONN;
1611 : else
1612 0 : slp->ns_flag |= SLP_NEEDQ;
1613 : }
1614 : } else {
1615 0 : do {
1616 0 : auio.uio_resid = 1000000000;
1617 0 : flags = MSG_DONTWAIT;
1618 0 : error = soreceive(so, &nam, &auio, &mp,
1619 : NULL, &flags, 0);
1620 0 : if (mp) {
1621 0 : if (nam) {
1622 : m = nam;
1623 0 : m->m_next = mp;
1624 0 : } else
1625 : m = mp;
1626 0 : if (slp->ns_recend)
1627 0 : slp->ns_recend->m_nextpkt = m;
1628 : else
1629 0 : slp->ns_rec = m;
1630 0 : slp->ns_recend = m;
1631 0 : m->m_nextpkt = NULL;
1632 0 : }
1633 0 : if (error) {
1634 0 : if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
1635 0 : && error != EWOULDBLOCK) {
1636 0 : slp->ns_flag |= SLP_DISCONN;
1637 0 : goto dorecs;
1638 : }
1639 : }
1640 0 : } while (mp);
1641 : }
1642 :
1643 : /*
1644 : * Now try and process the request records, non-blocking.
1645 : */
1646 : dorecs:
1647 0 : if (waitflag == M_DONTWAIT &&
1648 0 : (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
1649 0 : nfsrv_wakenfsd(slp);
1650 0 : }
1651 :
1652 : /*
1653 : * Try and extract an RPC request from the mbuf data list received on a
1654 : * stream socket. The "waitflag" argument indicates whether or not it
1655 : * can sleep.
1656 : */
1657 : int
1658 0 : nfsrv_getstream(struct nfssvc_sock *slp, int waitflag)
1659 : {
1660 : struct mbuf *m, **mpp;
1661 : char *cp1, *cp2;
1662 : int len;
1663 : struct mbuf *om, *m2, *recm;
1664 0 : u_int32_t recmark;
1665 :
1666 0 : if (slp->ns_flag & SLP_GETSTREAM)
1667 0 : return (0);
1668 0 : slp->ns_flag |= SLP_GETSTREAM;
1669 0 : for (;;) {
1670 0 : if (slp->ns_reclen == 0) {
1671 0 : if (slp->ns_cc < NFSX_UNSIGNED) {
1672 0 : slp->ns_flag &= ~SLP_GETSTREAM;
1673 0 : return (0);
1674 : }
1675 0 : m = slp->ns_raw;
1676 0 : if (m->m_len >= NFSX_UNSIGNED) {
1677 0 : bcopy(mtod(m, caddr_t), &recmark,
1678 : NFSX_UNSIGNED);
1679 0 : m->m_data += NFSX_UNSIGNED;
1680 0 : m->m_len -= NFSX_UNSIGNED;
1681 0 : } else {
1682 : cp1 = (caddr_t)&recmark;
1683 0 : cp2 = mtod(m, caddr_t);
1684 0 : while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
1685 0 : while (m->m_len == 0) {
1686 0 : m = m->m_next;
1687 0 : cp2 = mtod(m, caddr_t);
1688 : }
1689 0 : *cp1++ = *cp2++;
1690 0 : m->m_data++;
1691 0 : m->m_len--;
1692 : }
1693 : }
1694 0 : slp->ns_cc -= NFSX_UNSIGNED;
1695 0 : recmark = ntohl(recmark);
1696 0 : slp->ns_reclen = recmark & ~0x80000000;
1697 0 : if (recmark & 0x80000000)
1698 0 : slp->ns_flag |= SLP_LASTFRAG;
1699 : else
1700 0 : slp->ns_flag &= ~SLP_LASTFRAG;
1701 0 : if (slp->ns_reclen > NFS_MAXPACKET) {
1702 0 : slp->ns_flag &= ~SLP_GETSTREAM;
1703 0 : return (EPERM);
1704 : }
1705 : }
1706 :
1707 : /*
1708 : * Now get the record part.
1709 : */
1710 : recm = NULL;
1711 0 : if (slp->ns_cc == slp->ns_reclen) {
1712 0 : recm = slp->ns_raw;
1713 0 : slp->ns_raw = slp->ns_rawend = NULL;
1714 0 : slp->ns_cc = slp->ns_reclen = 0;
1715 0 : } else if (slp->ns_cc > slp->ns_reclen) {
1716 : len = 0;
1717 0 : m = slp->ns_raw;
1718 : om = NULL;
1719 0 : while (len < slp->ns_reclen) {
1720 0 : if ((len + m->m_len) > slp->ns_reclen) {
1721 0 : m2 = m_copym(m, 0, slp->ns_reclen - len,
1722 : waitflag);
1723 0 : if (m2) {
1724 0 : if (om) {
1725 0 : om->m_next = m2;
1726 0 : recm = slp->ns_raw;
1727 0 : } else
1728 : recm = m2;
1729 0 : m->m_data += slp->ns_reclen-len;
1730 0 : m->m_len -= slp->ns_reclen-len;
1731 0 : len = slp->ns_reclen;
1732 : } else {
1733 0 : slp->ns_flag &= ~SLP_GETSTREAM;
1734 0 : return (EWOULDBLOCK);
1735 : }
1736 0 : } else if ((len + m->m_len) == slp->ns_reclen) {
1737 : om = m;
1738 : len += m->m_len;
1739 : m = m->m_next;
1740 0 : recm = slp->ns_raw;
1741 0 : om->m_next = NULL;
1742 0 : } else {
1743 : om = m;
1744 : len += m->m_len;
1745 : m = m->m_next;
1746 : }
1747 : }
1748 0 : slp->ns_raw = m;
1749 0 : slp->ns_cc -= len;
1750 0 : slp->ns_reclen = 0;
1751 : } else {
1752 0 : slp->ns_flag &= ~SLP_GETSTREAM;
1753 0 : return (0);
1754 : }
1755 :
1756 : /*
1757 : * Accumulate the fragments into a record.
1758 : */
1759 0 : mpp = &slp->ns_frag;
1760 0 : while (*mpp)
1761 0 : mpp = &((*mpp)->m_next);
1762 0 : *mpp = recm;
1763 0 : if (slp->ns_flag & SLP_LASTFRAG) {
1764 0 : if (slp->ns_recend)
1765 0 : slp->ns_recend->m_nextpkt = slp->ns_frag;
1766 : else
1767 0 : slp->ns_rec = slp->ns_frag;
1768 0 : slp->ns_recend = slp->ns_frag;
1769 0 : slp->ns_frag = NULL;
1770 0 : }
1771 : }
1772 0 : }
1773 :
1774 : /*
1775 : * Parse an RPC header.
1776 : */
1777 : int
1778 0 : nfsrv_dorec(struct nfssvc_sock *slp, struct nfsd *nfsd,
1779 : struct nfsrv_descript **ndp)
1780 : {
1781 0 : struct mbuf *m, *nam;
1782 : struct nfsrv_descript *nd;
1783 : int error;
1784 :
1785 0 : *ndp = NULL;
1786 0 : if ((slp->ns_flag & SLP_VALID) == 0 ||
1787 0 : (m = slp->ns_rec) == NULL)
1788 0 : return (ENOBUFS);
1789 0 : slp->ns_rec = m->m_nextpkt;
1790 0 : if (slp->ns_rec)
1791 0 : m->m_nextpkt = NULL;
1792 : else
1793 0 : slp->ns_recend = NULL;
1794 0 : if (m->m_type == MT_SONAME) {
1795 : nam = m;
1796 0 : m = m->m_next;
1797 0 : nam->m_next = NULL;
1798 0 : } else
1799 : nam = NULL;
1800 0 : nd = pool_get(&nfsrv_descript_pl, PR_WAITOK);
1801 0 : nfs_realign(&m, 10 * NFSX_UNSIGNED);
1802 0 : nd->nd_md = nd->nd_mrep = m;
1803 0 : nd->nd_nam2 = nam;
1804 0 : nd->nd_dpos = mtod(m, caddr_t);
1805 0 : error = nfs_getreq(nd, nfsd, 1);
1806 0 : if (error) {
1807 0 : m_freem(nam);
1808 0 : pool_put(&nfsrv_descript_pl, nd);
1809 0 : return (error);
1810 : }
1811 0 : *ndp = nd;
1812 0 : nfsd->nfsd_nd = nd;
1813 0 : return (0);
1814 0 : }
1815 :
1816 :
1817 : /*
1818 : * Search for a sleeping nfsd and wake it up.
1819 : * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
1820 : * running nfsds will go look for the work in the nfssvc_sock list.
1821 : */
1822 : void
1823 0 : nfsrv_wakenfsd(struct nfssvc_sock *slp)
1824 : {
1825 : struct nfsd *nfsd;
1826 :
1827 0 : if ((slp->ns_flag & SLP_VALID) == 0)
1828 0 : return;
1829 :
1830 0 : TAILQ_FOREACH(nfsd, &nfsd_head, nfsd_chain) {
1831 0 : if (nfsd->nfsd_flag & NFSD_WAITING) {
1832 0 : nfsd->nfsd_flag &= ~NFSD_WAITING;
1833 0 : if (nfsd->nfsd_slp)
1834 0 : panic("nfsd wakeup");
1835 0 : slp->ns_sref++;
1836 0 : nfsd->nfsd_slp = slp;
1837 0 : wakeup_one(nfsd);
1838 0 : return;
1839 : }
1840 : }
1841 :
1842 0 : slp->ns_flag |= SLP_DOREC;
1843 0 : nfsd_head_flag |= NFSD_CHECKSLP;
1844 0 : }
1845 : #endif /* NFSSERVER */
|