Line data Source code
1 : /* $OpenBSD: nfs_bio.c,v 1.82 2017/02/22 11:42:46 mpi Exp $ */
2 : /* $NetBSD: nfs_bio.c,v 1.25.4.2 1996/07/08 20:47:04 jtc Exp $ */
3 :
4 : /*
5 : * Copyright (c) 1989, 1993
6 : * The Regents of the University of California. All rights reserved.
7 : *
8 : * This code is derived from software contributed to Berkeley by
9 : * Rick Macklem at The University of Guelph.
10 : *
11 : * Redistribution and use in source and binary forms, with or without
12 : * modification, are permitted provided that the following conditions
13 : * are met:
14 : * 1. Redistributions of source code must retain the above copyright
15 : * notice, this list of conditions and the following disclaimer.
16 : * 2. Redistributions in binary form must reproduce the above copyright
17 : * notice, this list of conditions and the following disclaimer in the
18 : * documentation and/or other materials provided with the distribution.
19 : * 3. Neither the name of the University nor the names of its contributors
20 : * may be used to endorse or promote products derived from this software
21 : * without specific prior written permission.
22 : *
23 : * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 : * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 : * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 : * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 : * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 : * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 : * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 : * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 : * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 : * SUCH DAMAGE.
34 : *
35 : * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
36 : */
37 :
38 : #include <sys/param.h>
39 : #include <sys/systm.h>
40 : #include <sys/resourcevar.h>
41 : #include <sys/signalvar.h>
42 : #include <sys/proc.h>
43 : #include <sys/buf.h>
44 : #include <sys/vnode.h>
45 : #include <sys/mount.h>
46 : #include <sys/kernel.h>
47 : #include <sys/namei.h>
48 : #include <sys/queue.h>
49 : #include <sys/time.h>
50 :
51 : #include <nfs/nfsproto.h>
52 : #include <nfs/nfs.h>
53 : #include <nfs/nfsmount.h>
54 : #include <nfs/nfsnode.h>
55 : #include <nfs/nfs_var.h>
56 :
57 : extern int nfs_numasync;
58 : extern struct nfsstats nfsstats;
59 : struct nfs_bufqhead nfs_bufq;
60 : uint32_t nfs_bufqmax, nfs_bufqlen;
61 :
62 : struct buf *nfs_getcacheblk(struct vnode *, daddr_t, int, struct proc *);
63 :
64 : /*
65 : * Vnode op for read using bio
66 : * Any similarity to readip() is purely coincidental
67 : */
68 : int
69 0 : nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
70 : {
71 0 : struct nfsnode *np = VTONFS(vp);
72 : int biosize, diff;
73 : struct buf *bp = NULL, *rabp;
74 0 : struct vattr vattr;
75 : struct proc *p;
76 0 : struct nfsmount *nmp = VFSTONFS(vp->v_mount);
77 : daddr_t lbn, bn, rabn;
78 : caddr_t baddr;
79 : int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin;
80 : off_t offdiff;
81 :
82 : #ifdef DIAGNOSTIC
83 0 : if (uio->uio_rw != UIO_READ)
84 0 : panic("nfs_read mode");
85 : #endif
86 0 : if (uio->uio_resid == 0)
87 0 : return (0);
88 0 : if (uio->uio_offset < 0)
89 0 : return (EINVAL);
90 0 : p = uio->uio_procp;
91 0 : if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
92 0 : (void)nfs_fsinfo(nmp, vp, cred, p);
93 0 : biosize = nmp->nm_rsize;
94 : /*
95 : * For nfs, cache consistency can only be maintained approximately.
96 : * Although RFC1094 does not specify the criteria, the following is
97 : * believed to be compatible with the reference port.
98 : * For nfs:
99 : * If the file's modify time on the server has changed since the
100 : * last read rpc or you have written to the file,
101 : * you may have lost data cache consistency with the
102 : * server, so flush all of the file's data out of the cache.
103 : * Then force a getattr rpc to ensure that you have up to date
104 : * attributes.
105 : */
106 0 : if (np->n_flag & NMODIFIED) {
107 0 : NFS_INVALIDATE_ATTRCACHE(np);
108 0 : error = VOP_GETATTR(vp, &vattr, cred, p);
109 0 : if (error)
110 0 : return (error);
111 0 : np->n_mtime = vattr.va_mtime;
112 0 : } else {
113 0 : error = VOP_GETATTR(vp, &vattr, cred, p);
114 0 : if (error)
115 0 : return (error);
116 0 : if (timespeccmp(&np->n_mtime, &vattr.va_mtime, !=)) {
117 0 : error = nfs_vinvalbuf(vp, V_SAVE, cred, p);
118 0 : if (error)
119 0 : return (error);
120 0 : np->n_mtime = vattr.va_mtime;
121 0 : }
122 : }
123 :
124 : /*
125 : * update the cache read creds for this vnode
126 : */
127 0 : if (np->n_rcred)
128 0 : crfree(np->n_rcred);
129 0 : np->n_rcred = cred;
130 0 : crhold(cred);
131 :
132 0 : do {
133 0 : if ((vp->v_flag & VROOT) && vp->v_type == VLNK) {
134 0 : return (nfs_readlinkrpc(vp, uio, cred));
135 : }
136 : baddr = NULL;
137 0 : switch (vp->v_type) {
138 : case VREG:
139 0 : nfsstats.biocache_reads++;
140 0 : lbn = uio->uio_offset / biosize;
141 0 : on = uio->uio_offset & (biosize - 1);
142 0 : bn = lbn * (biosize / DEV_BSIZE);
143 : not_readin = 1;
144 :
145 : /*
146 : * Start the read ahead(s), as required.
147 : */
148 0 : if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
149 0 : for (nra = 0; nra < nmp->nm_readahead &&
150 0 : (lbn + 1 + nra) * biosize < np->n_size; nra++) {
151 0 : rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
152 0 : if (!incore(vp, rabn)) {
153 0 : rabp = nfs_getcacheblk(vp, rabn, biosize, p);
154 0 : if (!rabp)
155 0 : return (EINTR);
156 0 : if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
157 0 : rabp->b_flags |= (B_READ | B_ASYNC);
158 0 : if (nfs_asyncio(rabp, 1)) {
159 0 : rabp->b_flags |= B_INVAL;
160 0 : brelse(rabp);
161 0 : }
162 : } else
163 0 : brelse(rabp);
164 : }
165 : }
166 : }
167 :
168 : again:
169 0 : bp = nfs_getcacheblk(vp, bn, biosize, p);
170 0 : if (!bp)
171 0 : return (EINTR);
172 : got_buf = 1;
173 0 : if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
174 0 : bp->b_flags |= B_READ;
175 : not_readin = 0;
176 0 : error = nfs_doio(bp, p);
177 0 : if (error) {
178 0 : brelse(bp);
179 0 : return (error);
180 : }
181 : }
182 0 : n = ulmin(biosize - on, uio->uio_resid);
183 0 : offdiff = np->n_size - uio->uio_offset;
184 0 : if (offdiff < (off_t)n)
185 0 : n = (int)offdiff;
186 0 : if (not_readin && n > 0) {
187 0 : if (on < bp->b_validoff || (on + n) > bp->b_validend) {
188 0 : bp->b_flags |= B_INVAFTERWRITE;
189 0 : if (bp->b_dirtyend > 0) {
190 0 : if ((bp->b_flags & B_DELWRI) == 0)
191 0 : panic("nfsbioread");
192 0 : if (VOP_BWRITE(bp) == EINTR)
193 0 : return (EINTR);
194 : } else
195 0 : brelse(bp);
196 0 : goto again;
197 : }
198 : }
199 0 : diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
200 0 : if (diff < n)
201 0 : n = diff;
202 : break;
203 : case VLNK:
204 0 : nfsstats.biocache_readlinks++;
205 0 : bp = nfs_getcacheblk(vp, 0, NFS_MAXPATHLEN, p);
206 0 : if (!bp)
207 0 : return (EINTR);
208 0 : if ((bp->b_flags & B_DONE) == 0) {
209 0 : bp->b_flags |= B_READ;
210 0 : error = nfs_doio(bp, p);
211 0 : if (error) {
212 0 : brelse(bp);
213 0 : return (error);
214 : }
215 : }
216 0 : n = ulmin(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
217 : got_buf = 1;
218 : on = 0;
219 0 : break;
220 : default:
221 0 : panic("nfsbioread: type %x unexpected", vp->v_type);
222 : break;
223 : }
224 :
225 0 : if (n > 0) {
226 0 : if (!baddr)
227 0 : baddr = bp->b_data;
228 0 : error = uiomove(baddr + on, n, uio);
229 0 : }
230 :
231 0 : if (vp->v_type == VLNK)
232 0 : n = 0;
233 :
234 0 : if (got_buf)
235 0 : brelse(bp);
236 0 : } while (error == 0 && uio->uio_resid > 0 && n > 0);
237 0 : return (error);
238 0 : }
239 :
240 : /*
241 : * Vnode op for write using bio
242 : */
243 : int
244 0 : nfs_write(void *v)
245 : {
246 0 : struct vop_write_args *ap = v;
247 : int biosize;
248 0 : struct uio *uio = ap->a_uio;
249 0 : struct proc *p = uio->uio_procp;
250 0 : struct vnode *vp = ap->a_vp;
251 0 : struct nfsnode *np = VTONFS(vp);
252 0 : struct ucred *cred = ap->a_cred;
253 0 : int ioflag = ap->a_ioflag;
254 : struct buf *bp;
255 0 : struct vattr vattr;
256 0 : struct nfsmount *nmp = VFSTONFS(vp->v_mount);
257 : daddr_t lbn, bn;
258 : int n, on, error = 0, extended = 0, wrotedta = 0, truncated = 0;
259 0 : ssize_t overrun;
260 :
261 : #ifdef DIAGNOSTIC
262 0 : if (uio->uio_rw != UIO_WRITE)
263 0 : panic("nfs_write mode");
264 0 : if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
265 0 : panic("nfs_write proc");
266 : #endif
267 0 : if (vp->v_type != VREG)
268 0 : return (EIO);
269 0 : if (np->n_flag & NWRITEERR) {
270 0 : np->n_flag &= ~NWRITEERR;
271 0 : return (np->n_error);
272 : }
273 0 : if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
274 0 : (void)nfs_fsinfo(nmp, vp, cred, p);
275 0 : if (ioflag & (IO_APPEND | IO_SYNC)) {
276 0 : if (np->n_flag & NMODIFIED) {
277 0 : NFS_INVALIDATE_ATTRCACHE(np);
278 0 : error = nfs_vinvalbuf(vp, V_SAVE, cred, p);
279 0 : if (error)
280 0 : return (error);
281 : }
282 0 : if (ioflag & IO_APPEND) {
283 0 : NFS_INVALIDATE_ATTRCACHE(np);
284 0 : error = VOP_GETATTR(vp, &vattr, cred, p);
285 0 : if (error)
286 0 : return (error);
287 0 : uio->uio_offset = np->n_size;
288 0 : }
289 : }
290 0 : if (uio->uio_offset < 0)
291 0 : return (EINVAL);
292 0 : if (uio->uio_resid == 0)
293 0 : return (0);
294 :
295 : /* do the filesize rlimit check */
296 0 : if ((error = vn_fsizechk(vp, uio, ioflag, &overrun)))
297 0 : return (error);
298 :
299 : /*
300 : * update the cache write creds for this node.
301 : */
302 0 : if (np->n_wcred)
303 0 : crfree(np->n_wcred);
304 0 : np->n_wcred = cred;
305 0 : crhold(cred);
306 :
307 : /*
308 : * I use nm_rsize, not nm_wsize so that all buffer cache blocks
309 : * will be the same size within a filesystem. nfs_writerpc will
310 : * still use nm_wsize when sizing the rpc's.
311 : */
312 0 : biosize = nmp->nm_rsize;
313 0 : do {
314 :
315 : /*
316 : * XXX make sure we aren't cached in the VM page cache
317 : */
318 0 : uvm_vnp_uncache(vp);
319 :
320 0 : nfsstats.biocache_writes++;
321 0 : lbn = uio->uio_offset / biosize;
322 0 : on = uio->uio_offset & (biosize-1);
323 0 : n = ulmin(biosize - on, uio->uio_resid);
324 0 : bn = lbn * (biosize / DEV_BSIZE);
325 : again:
326 0 : bp = nfs_getcacheblk(vp, bn, biosize, p);
327 0 : if (!bp) {
328 : error = EINTR;
329 0 : goto out;
330 : }
331 0 : np->n_flag |= NMODIFIED;
332 0 : if (uio->uio_offset + n > np->n_size) {
333 0 : np->n_size = uio->uio_offset + n;
334 0 : uvm_vnp_setsize(vp, (u_long)np->n_size);
335 : extended = 1;
336 0 : } else if (uio->uio_offset + n < np->n_size)
337 0 : truncated = 1;
338 :
339 : /*
340 : * If the new write will leave a contiguous dirty
341 : * area, just update the b_dirtyoff and b_dirtyend,
342 : * otherwise force a write rpc of the old dirty area.
343 : */
344 0 : if (bp->b_dirtyend > 0 &&
345 0 : (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
346 0 : bp->b_proc = p;
347 0 : if (VOP_BWRITE(bp) == EINTR) {
348 : error = EINTR;
349 0 : goto out;
350 : }
351 0 : goto again;
352 : }
353 :
354 0 : error = uiomove((char *)bp->b_data + on, n, uio);
355 0 : if (error) {
356 0 : bp->b_flags |= B_ERROR;
357 0 : brelse(bp);
358 0 : goto out;
359 : }
360 0 : if (bp->b_dirtyend > 0) {
361 0 : bp->b_dirtyoff = min(on, bp->b_dirtyoff);
362 0 : bp->b_dirtyend = max((on + n), bp->b_dirtyend);
363 0 : } else {
364 0 : bp->b_dirtyoff = on;
365 0 : bp->b_dirtyend = on + n;
366 : }
367 0 : if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
368 0 : bp->b_validoff > bp->b_dirtyend) {
369 0 : bp->b_validoff = bp->b_dirtyoff;
370 0 : bp->b_validend = bp->b_dirtyend;
371 0 : } else {
372 0 : bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
373 0 : bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
374 : }
375 :
376 : wrotedta = 1;
377 :
378 : /*
379 : * Since this block is being modified, it must be written
380 : * again and not just committed.
381 : */
382 :
383 0 : if (NFS_ISV3(vp)) {
384 0 : rw_enter_write(&np->n_commitlock);
385 0 : if (bp->b_flags & B_NEEDCOMMIT) {
386 0 : bp->b_flags &= ~B_NEEDCOMMIT;
387 0 : nfs_del_tobecommitted_range(vp, bp);
388 0 : }
389 0 : nfs_del_committed_range(vp, bp);
390 0 : rw_exit_write(&np->n_commitlock);
391 0 : } else
392 0 : bp->b_flags &= ~B_NEEDCOMMIT;
393 :
394 0 : if (ioflag & IO_SYNC) {
395 0 : bp->b_proc = p;
396 0 : error = VOP_BWRITE(bp);
397 0 : if (error)
398 : goto out;
399 0 : } else if ((n + on) == biosize) {
400 0 : bp->b_proc = NULL;
401 0 : bp->b_flags |= B_ASYNC;
402 0 : (void)nfs_writebp(bp, 0);
403 0 : } else {
404 0 : bdwrite(bp);
405 : }
406 0 : } while (uio->uio_resid > 0 && n > 0);
407 :
408 : /*out: XXX belongs here??? */
409 0 : if (wrotedta)
410 0 : VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0) |
411 : (truncated ? NOTE_TRUNCATE : 0));
412 :
413 : out:
414 : /* correct the result for writes clamped by vn_fsizechk() */
415 0 : uio->uio_resid += overrun;
416 :
417 0 : return (error);
418 0 : }
419 :
420 : /*
421 : * Get an nfs cache block.
422 : * Allocate a new one if the block isn't currently in the cache
423 : * and return the block marked busy. If the calling process is
424 : * interrupted by a signal for an interruptible mount point, return
425 : * NULL.
426 : */
427 : struct buf *
428 0 : nfs_getcacheblk(struct vnode *vp, daddr_t bn, int size, struct proc *p)
429 : {
430 : struct buf *bp;
431 0 : struct nfsmount *nmp = VFSTONFS(vp->v_mount);
432 :
433 0 : if (nmp->nm_flag & NFSMNT_INT) {
434 0 : bp = getblk(vp, bn, size, PCATCH, 0);
435 0 : while (bp == NULL) {
436 0 : if (nfs_sigintr(nmp, NULL, p))
437 0 : return (NULL);
438 0 : bp = getblk(vp, bn, size, 0, 2 * hz);
439 : }
440 : } else
441 0 : bp = getblk(vp, bn, size, 0, 0);
442 0 : return (bp);
443 0 : }
444 :
445 : /*
446 : * Flush and invalidate all dirty buffers. If another process is already
447 : * doing the flush, just wait for completion.
448 : */
449 : int
450 0 : nfs_vinvalbuf(struct vnode *vp, int flags, struct ucred *cred, struct proc *p)
451 : {
452 0 : struct nfsmount *nmp= VFSTONFS(vp->v_mount);
453 0 : struct nfsnode *np = VTONFS(vp);
454 : int error, sintr, stimeo;
455 :
456 : error = sintr = stimeo = 0;
457 :
458 0 : if (ISSET(nmp->nm_flag, NFSMNT_INT)) {
459 : sintr = PCATCH;
460 0 : stimeo = 2 * hz;
461 0 : }
462 :
463 : /* First wait for any other process doing a flush to complete. */
464 0 : while (np->n_flag & NFLUSHINPROG) {
465 0 : np->n_flag |= NFLUSHWANT;
466 0 : error = tsleep(&np->n_flag, PRIBIO|sintr, "nfsvinval", stimeo);
467 0 : if (error && sintr && nfs_sigintr(nmp, NULL, p))
468 0 : return (EINTR);
469 : }
470 :
471 : /* Now, flush as required. */
472 0 : np->n_flag |= NFLUSHINPROG;
473 0 : error = vinvalbuf(vp, flags, cred, p, sintr, 0);
474 0 : while (error) {
475 0 : if (sintr && nfs_sigintr(nmp, NULL, p)) {
476 0 : np->n_flag &= ~NFLUSHINPROG;
477 0 : if (np->n_flag & NFLUSHWANT) {
478 0 : np->n_flag &= ~NFLUSHWANT;
479 0 : wakeup(&np->n_flag);
480 0 : }
481 0 : return (EINTR);
482 : }
483 0 : error = vinvalbuf(vp, flags, cred, p, 0, stimeo);
484 : }
485 0 : np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
486 0 : if (np->n_flag & NFLUSHWANT) {
487 0 : np->n_flag &= ~NFLUSHWANT;
488 0 : wakeup(&np->n_flag);
489 0 : }
490 0 : return (0);
491 0 : }
492 :
493 : /*
494 : * Initiate asynchronous I/O. Return an error if no nfsiods are available.
495 : * This is mainly to avoid queueing async I/O requests when the nfsiods
496 : * are all hung on a dead server.
497 : */
498 : int
499 0 : nfs_asyncio(struct buf *bp, int readahead)
500 : {
501 0 : if (nfs_numasync == 0)
502 : goto out;
503 :
504 0 : while (nfs_bufqlen > nfs_bufqmax)
505 0 : if (readahead)
506 : goto out;
507 : else
508 0 : tsleep(&nfs_bufqlen, PRIBIO, "nfs_bufq", 0);
509 :
510 0 : if ((bp->b_flags & B_READ) == 0) {
511 0 : bp->b_flags |= B_WRITEINPROG;
512 0 : }
513 :
514 0 : TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
515 0 : nfs_bufqlen++;
516 :
517 0 : wakeup_one(&nfs_bufq);
518 0 : return (0);
519 :
520 : out:
521 0 : nfsstats.forcedsync++;
522 0 : return (EIO);
523 0 : }
524 :
525 : /*
526 : * Do an I/O operation to/from a cache block. This may be called
527 : * synchronously or from an nfsiod.
528 : */
529 : int
530 0 : nfs_doio(struct buf *bp, struct proc *p)
531 : {
532 : struct uio *uiop;
533 : struct vnode *vp;
534 : struct nfsnode *np;
535 : struct nfsmount *nmp;
536 0 : int s, error = 0, diff, len, iomode, must_commit = 0;
537 0 : struct uio uio;
538 0 : struct iovec io;
539 :
540 0 : vp = bp->b_vp;
541 0 : np = VTONFS(vp);
542 0 : nmp = VFSTONFS(vp->v_mount);
543 : uiop = &uio;
544 0 : uiop->uio_iov = &io;
545 0 : uiop->uio_iovcnt = 1;
546 0 : uiop->uio_segflg = UIO_SYSSPACE;
547 0 : uiop->uio_procp = p;
548 :
549 : /*
550 : * Historically, paging was done with physio, but no more.
551 : */
552 0 : if (bp->b_flags & B_PHYS) {
553 0 : io.iov_len = uiop->uio_resid = bp->b_bcount;
554 : /* mapping was done by vmapbuf() */
555 0 : io.iov_base = bp->b_data;
556 0 : uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
557 0 : if (bp->b_flags & B_READ) {
558 0 : uiop->uio_rw = UIO_READ;
559 0 : nfsstats.read_physios++;
560 0 : error = nfs_readrpc(vp, uiop);
561 0 : } else {
562 0 : iomode = NFSV3WRITE_DATASYNC;
563 0 : uiop->uio_rw = UIO_WRITE;
564 0 : nfsstats.write_physios++;
565 0 : error = nfs_writerpc(vp, uiop, &iomode, &must_commit);
566 : }
567 0 : if (error) {
568 0 : bp->b_flags |= B_ERROR;
569 0 : bp->b_error = error;
570 0 : }
571 0 : } else if (bp->b_flags & B_READ) {
572 0 : io.iov_len = uiop->uio_resid = bp->b_bcount;
573 0 : io.iov_base = bp->b_data;
574 0 : uiop->uio_rw = UIO_READ;
575 0 : switch (vp->v_type) {
576 : case VREG:
577 0 : uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
578 0 : nfsstats.read_bios++;
579 0 : bcstats.pendingreads++;
580 0 : bcstats.numreads++;
581 0 : error = nfs_readrpc(vp, uiop);
582 0 : if (!error) {
583 0 : bp->b_validoff = 0;
584 0 : if (uiop->uio_resid) {
585 : /*
586 : * If len > 0, there is a hole in the file and
587 : * no writes after the hole have been pushed to
588 : * the server yet.
589 : * Just zero fill the rest of the valid area.
590 : */
591 0 : diff = bp->b_bcount - uiop->uio_resid;
592 0 : len = np->n_size - ((((off_t)bp->b_blkno) << DEV_BSHIFT)
593 0 : + diff);
594 0 : if (len > 0) {
595 0 : len = ulmin(len, uiop->uio_resid);
596 0 : memset((char *)bp->b_data + diff, 0, len);
597 0 : bp->b_validend = diff + len;
598 0 : } else
599 0 : bp->b_validend = diff;
600 : } else
601 0 : bp->b_validend = bp->b_bcount;
602 : }
603 0 : if (p && (vp->v_flag & VTEXT) &&
604 0 : (timespeccmp(&np->n_mtime, &np->n_vattr.va_mtime, !=))) {
605 0 : uprintf("Process killed due to text file modification\n");
606 0 : psignal(p, SIGKILL);
607 0 : }
608 : break;
609 : case VLNK:
610 0 : uiop->uio_offset = (off_t)0;
611 0 : nfsstats.readlink_bios++;
612 0 : bcstats.pendingreads++;
613 0 : bcstats.numreads++;
614 0 : error = nfs_readlinkrpc(vp, uiop, curproc->p_ucred);
615 0 : break;
616 : default:
617 0 : panic("nfs_doio: type %x unexpected", vp->v_type);
618 : break;
619 : };
620 0 : if (error) {
621 0 : bp->b_flags |= B_ERROR;
622 0 : bp->b_error = error;
623 0 : }
624 : } else {
625 0 : io.iov_len = uiop->uio_resid = bp->b_dirtyend
626 0 : - bp->b_dirtyoff;
627 0 : uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
628 0 : + bp->b_dirtyoff;
629 0 : io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
630 0 : uiop->uio_rw = UIO_WRITE;
631 0 : nfsstats.write_bios++;
632 0 : bcstats.pendingwrites++;
633 0 : bcstats.numwrites++;
634 0 : if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC)
635 0 : iomode = NFSV3WRITE_UNSTABLE;
636 : else
637 0 : iomode = NFSV3WRITE_FILESYNC;
638 0 : bp->b_flags |= B_WRITEINPROG;
639 0 : error = nfs_writerpc(vp, uiop, &iomode, &must_commit);
640 :
641 0 : rw_enter_write(&np->n_commitlock);
642 0 : if (!error && iomode == NFSV3WRITE_UNSTABLE) {
643 0 : bp->b_flags |= B_NEEDCOMMIT;
644 0 : nfs_add_tobecommitted_range(vp, bp);
645 0 : } else {
646 0 : bp->b_flags &= ~B_NEEDCOMMIT;
647 0 : nfs_del_committed_range(vp, bp);
648 : }
649 0 : rw_exit_write(&np->n_commitlock);
650 :
651 0 : bp->b_flags &= ~B_WRITEINPROG;
652 :
653 : /*
654 : * For an interrupted write, the buffer is still valid and the
655 : * write hasn't been pushed to the server yet, so we can't set
656 : * B_ERROR and report the interruption by setting B_EINTR. For
657 : * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
658 : * is essentially a noop.
659 : * For the case of a V3 write rpc not being committed to stable
660 : * storage, the block is still dirty and requires either a commit
661 : * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC
662 : * before the block is reused. This is indicated by setting the
663 : * B_DELWRI and B_NEEDCOMMIT flags.
664 : */
665 0 : if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
666 0 : s = splbio();
667 0 : buf_dirty(bp);
668 0 : splx(s);
669 :
670 0 : if (!(bp->b_flags & B_ASYNC) && error)
671 0 : bp->b_flags |= B_EINTR;
672 : } else {
673 0 : if (error) {
674 0 : bp->b_flags |= B_ERROR;
675 0 : bp->b_error = np->n_error = error;
676 0 : np->n_flag |= NWRITEERR;
677 0 : }
678 0 : bp->b_dirtyoff = bp->b_dirtyend = 0;
679 : }
680 : }
681 0 : bp->b_resid = uiop->uio_resid;
682 0 : if (must_commit)
683 0 : nfs_clearcommit(vp->v_mount);
684 0 : s = splbio();
685 0 : biodone(bp);
686 0 : splx(s);
687 0 : return (error);
688 0 : }
|