Line data Source code
1 : /* $OpenBSD: ffs_vnops.c,v 1.92 2018/07/21 09:35:08 anton Exp $ */
2 : /* $NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $ */
3 :
4 : /*
5 : * Copyright (c) 1982, 1986, 1989, 1993
6 : * The Regents of the University of California. All rights reserved.
7 : *
8 : * Redistribution and use in source and binary forms, with or without
9 : * modification, are permitted provided that the following conditions
10 : * are met:
11 : * 1. Redistributions of source code must retain the above copyright
12 : * notice, this list of conditions and the following disclaimer.
13 : * 2. Redistributions in binary form must reproduce the above copyright
14 : * notice, this list of conditions and the following disclaimer in the
15 : * documentation and/or other materials provided with the distribution.
16 : * 3. Neither the name of the University nor the names of its contributors
17 : * may be used to endorse or promote products derived from this software
18 : * without specific prior written permission.
19 : *
20 : * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 : * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 : * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 : * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 : * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 : * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 : * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 : * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 : * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 : * SUCH DAMAGE.
31 : *
32 : * @(#)ffs_vnops.c 8.10 (Berkeley) 8/10/94
33 : */
34 :
35 : #include <sys/param.h>
36 : #include <sys/systm.h>
37 : #include <sys/resourcevar.h>
38 : #include <sys/kernel.h>
39 : #include <sys/stat.h>
40 : #include <sys/buf.h>
41 : #include <sys/mount.h>
42 : #include <sys/vnode.h>
43 : #include <sys/malloc.h>
44 : #include <sys/signalvar.h>
45 : #include <sys/pool.h>
46 : #include <sys/event.h>
47 : #include <sys/specdev.h>
48 :
49 : #include <miscfs/fifofs/fifo.h>
50 :
51 : #include <ufs/ufs/quota.h>
52 : #include <ufs/ufs/inode.h>
53 : #include <ufs/ufs/dir.h>
54 : #include <ufs/ufs/ufs_extern.h>
55 : #include <ufs/ufs/ufsmount.h>
56 :
57 : #include <ufs/ffs/fs.h>
58 : #include <ufs/ffs/ffs_extern.h>
59 :
60 : struct vops ffs_vops = {
61 : .vop_lookup = ufs_lookup,
62 : .vop_create = ufs_create,
63 : .vop_mknod = ufs_mknod,
64 : .vop_open = ufs_open,
65 : .vop_close = ufs_close,
66 : .vop_access = ufs_access,
67 : .vop_getattr = ufs_getattr,
68 : .vop_setattr = ufs_setattr,
69 : .vop_read = ffs_read,
70 : .vop_write = ffs_write,
71 : .vop_ioctl = ufs_ioctl,
72 : .vop_poll = ufs_poll,
73 : .vop_kqfilter = ufs_kqfilter,
74 : .vop_revoke = vop_generic_revoke,
75 : .vop_fsync = ffs_fsync,
76 : .vop_remove = ufs_remove,
77 : .vop_link = ufs_link,
78 : .vop_rename = ufs_rename,
79 : .vop_mkdir = ufs_mkdir,
80 : .vop_rmdir = ufs_rmdir,
81 : .vop_symlink = ufs_symlink,
82 : .vop_readdir = ufs_readdir,
83 : .vop_readlink = ufs_readlink,
84 : .vop_abortop = vop_generic_abortop,
85 : .vop_inactive = ufs_inactive,
86 : .vop_reclaim = ffs_reclaim,
87 : .vop_lock = ufs_lock,
88 : .vop_unlock = ufs_unlock,
89 : .vop_bmap = ufs_bmap,
90 : .vop_strategy = ufs_strategy,
91 : .vop_print = ufs_print,
92 : .vop_islocked = ufs_islocked,
93 : .vop_pathconf = ufs_pathconf,
94 : .vop_advlock = ufs_advlock,
95 : .vop_bwrite = vop_generic_bwrite
96 : };
97 :
98 : struct vops ffs_specvops = {
99 : .vop_close = ufsspec_close,
100 : .vop_access = ufs_access,
101 : .vop_getattr = ufs_getattr,
102 : .vop_setattr = ufs_setattr,
103 : .vop_read = ufsspec_read,
104 : .vop_write = ufsspec_write,
105 : .vop_fsync = ffs_fsync,
106 : .vop_inactive = ufs_inactive,
107 : .vop_reclaim = ffs_reclaim,
108 : .vop_lock = ufs_lock,
109 : .vop_unlock = ufs_unlock,
110 : .vop_print = ufs_print,
111 : .vop_islocked = ufs_islocked,
112 :
113 : /* XXX: Keep in sync with spec_vops */
114 : .vop_lookup = vop_generic_lookup,
115 : .vop_create = spec_badop,
116 : .vop_mknod = spec_badop,
117 : .vop_open = spec_open,
118 : .vop_ioctl = spec_ioctl,
119 : .vop_poll = spec_poll,
120 : .vop_kqfilter = spec_kqfilter,
121 : .vop_revoke = vop_generic_revoke,
122 : .vop_remove = spec_badop,
123 : .vop_link = spec_badop,
124 : .vop_rename = spec_badop,
125 : .vop_mkdir = spec_badop,
126 : .vop_rmdir = spec_badop,
127 : .vop_symlink = spec_badop,
128 : .vop_readdir = spec_badop,
129 : .vop_readlink = spec_badop,
130 : .vop_abortop = spec_badop,
131 : .vop_bmap = vop_generic_bmap,
132 : .vop_strategy = spec_strategy,
133 : .vop_pathconf = spec_pathconf,
134 : .vop_advlock = spec_advlock,
135 : .vop_bwrite = vop_generic_bwrite,
136 : };
137 :
138 : #ifdef FIFO
139 : struct vops ffs_fifovops = {
140 : .vop_close = ufsfifo_close,
141 : .vop_access = ufs_access,
142 : .vop_getattr = ufs_getattr,
143 : .vop_setattr = ufs_setattr,
144 : .vop_read = ufsfifo_read,
145 : .vop_write = ufsfifo_write,
146 : .vop_fsync = ffs_fsync,
147 : .vop_inactive = ufs_inactive,
148 : .vop_reclaim = ffsfifo_reclaim,
149 : .vop_lock = ufs_lock,
150 : .vop_unlock = ufs_unlock,
151 : .vop_print = ufs_print,
152 : .vop_islocked = ufs_islocked,
153 : .vop_bwrite = vop_generic_bwrite,
154 :
155 : /* XXX: Keep in sync with fifo_vops */
156 : .vop_lookup = vop_generic_lookup,
157 : .vop_create = fifo_badop,
158 : .vop_mknod = fifo_badop,
159 : .vop_open = fifo_open,
160 : .vop_ioctl = fifo_ioctl,
161 : .vop_poll = fifo_poll,
162 : .vop_kqfilter = fifo_kqfilter,
163 : .vop_revoke = vop_generic_revoke,
164 : .vop_remove = fifo_badop,
165 : .vop_link = fifo_badop,
166 : .vop_rename = fifo_badop,
167 : .vop_mkdir = fifo_badop,
168 : .vop_rmdir = fifo_badop,
169 : .vop_symlink = fifo_badop,
170 : .vop_readdir = fifo_badop,
171 : .vop_readlink = fifo_badop,
172 : .vop_abortop = fifo_badop,
173 : .vop_bmap = vop_generic_bmap,
174 : .vop_strategy = fifo_badop,
175 : .vop_pathconf = fifo_pathconf,
176 : .vop_advlock = fifo_advlock
177 : };
178 : #endif /* FIFO */
179 :
180 : /*
181 : * Vnode op for reading.
182 : */
183 : int
184 0 : ffs_read(void *v)
185 : {
186 0 : struct vop_read_args *ap = v;
187 : struct vnode *vp;
188 : struct inode *ip;
189 : struct uio *uio;
190 : struct fs *fs;
191 0 : struct buf *bp;
192 : daddr_t lbn, nextlbn;
193 : off_t bytesinfile;
194 : int size, xfersize, blkoffset;
195 : mode_t mode;
196 : int error;
197 :
198 0 : vp = ap->a_vp;
199 0 : ip = VTOI(vp);
200 0 : mode = DIP(ip, mode);
201 0 : uio = ap->a_uio;
202 :
203 : #ifdef DIAGNOSTIC
204 0 : if (uio->uio_rw != UIO_READ)
205 0 : panic("ffs_read: mode");
206 :
207 0 : if (vp->v_type == VLNK) {
208 0 : if (DIP(ip, size) < ip->i_ump->um_maxsymlinklen ||
209 0 : (ip->i_ump->um_maxsymlinklen == 0 && DIP(ip, blocks) == 0))
210 0 : panic("ffs_read: short symlink");
211 0 : } else if (vp->v_type != VREG && vp->v_type != VDIR)
212 0 : panic("ffs_read: type %d", vp->v_type);
213 : #endif
214 0 : fs = ip->i_fs;
215 0 : if (uio->uio_offset < 0)
216 0 : return (EINVAL);
217 0 : if (uio->uio_resid == 0)
218 0 : return (0);
219 :
220 0 : for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
221 0 : if ((bytesinfile = DIP(ip, size) - uio->uio_offset) <= 0)
222 : break;
223 0 : lbn = lblkno(fs, uio->uio_offset);
224 0 : nextlbn = lbn + 1;
225 0 : size = fs->fs_bsize; /* WAS blksize(fs, ip, lbn); */
226 0 : blkoffset = blkoff(fs, uio->uio_offset);
227 0 : xfersize = fs->fs_bsize - blkoffset;
228 0 : if (uio->uio_resid < xfersize)
229 0 : xfersize = uio->uio_resid;
230 0 : if (bytesinfile < xfersize)
231 0 : xfersize = bytesinfile;
232 :
233 0 : if (lblktosize(fs, nextlbn) >= DIP(ip, size))
234 0 : error = bread(vp, lbn, size, &bp);
235 : else
236 0 : error = bread_cluster(vp, lbn, size, &bp);
237 :
238 0 : if (error)
239 : break;
240 :
241 : /*
242 : * We should only get non-zero b_resid when an I/O error
243 : * has occurred, which should cause us to break above.
244 : * However, if the short read did not cause an error,
245 : * then we want to ensure that we do not uiomove bad
246 : * or uninitialized data.
247 : */
248 0 : size -= bp->b_resid;
249 0 : if (size < xfersize) {
250 0 : if (size == 0)
251 : break;
252 : xfersize = size;
253 0 : }
254 0 : error = uiomove(bp->b_data + blkoffset, xfersize, uio);
255 0 : if (error)
256 : break;
257 0 : brelse(bp);
258 : }
259 0 : if (bp != NULL)
260 0 : brelse(bp);
261 0 : if (!(vp->v_mount->mnt_flag & MNT_NOATIME) ||
262 0 : (ip->i_flag & (IN_CHANGE | IN_UPDATE))) {
263 0 : ip->i_flag |= IN_ACCESS;
264 0 : }
265 0 : return (error);
266 0 : }
267 :
268 : /*
269 : * Vnode op for writing.
270 : */
271 : int
272 0 : ffs_write(void *v)
273 : {
274 0 : struct vop_write_args *ap = v;
275 : struct vnode *vp;
276 : struct uio *uio;
277 : struct inode *ip;
278 : struct fs *fs;
279 0 : struct buf *bp;
280 : daddr_t lbn;
281 : off_t osize;
282 : int blkoffset, error, extended, flags, ioflag, size, xfersize;
283 : size_t resid;
284 0 : ssize_t overrun;
285 :
286 : extended = 0;
287 0 : ioflag = ap->a_ioflag;
288 0 : uio = ap->a_uio;
289 0 : vp = ap->a_vp;
290 0 : ip = VTOI(vp);
291 :
292 : #ifdef DIAGNOSTIC
293 0 : if (uio->uio_rw != UIO_WRITE)
294 0 : panic("ffs_write: mode");
295 : #endif
296 :
297 : /*
298 : * If writing 0 bytes, succeed and do not change
299 : * update time or file offset (standards compliance)
300 : */
301 0 : if (uio->uio_resid == 0)
302 0 : return (0);
303 :
304 0 : switch (vp->v_type) {
305 : case VREG:
306 0 : if (ioflag & IO_APPEND)
307 0 : uio->uio_offset = DIP(ip, size);
308 0 : if ((DIP(ip, flags) & APPEND) && uio->uio_offset != DIP(ip, size))
309 0 : return (EPERM);
310 : /* FALLTHROUGH */
311 : case VLNK:
312 : break;
313 : case VDIR:
314 0 : if ((ioflag & IO_SYNC) == 0)
315 0 : panic("ffs_write: nonsync dir write");
316 : break;
317 : default:
318 0 : panic("ffs_write: type %d", vp->v_type);
319 : }
320 :
321 0 : fs = ip->i_fs;
322 0 : if (uio->uio_offset < 0 ||
323 0 : (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
324 0 : return (EFBIG);
325 :
326 : /* do the filesize rlimit check */
327 0 : if ((error = vn_fsizechk(vp, uio, ioflag, &overrun)))
328 0 : return (error);
329 :
330 0 : resid = uio->uio_resid;
331 0 : osize = DIP(ip, size);
332 0 : flags = ioflag & IO_SYNC ? B_SYNC : 0;
333 :
334 0 : for (error = 0; uio->uio_resid > 0;) {
335 0 : lbn = lblkno(fs, uio->uio_offset);
336 0 : blkoffset = blkoff(fs, uio->uio_offset);
337 0 : xfersize = fs->fs_bsize - blkoffset;
338 0 : if (uio->uio_resid < xfersize)
339 0 : xfersize = uio->uio_resid;
340 0 : if (fs->fs_bsize > xfersize)
341 0 : flags |= B_CLRBUF;
342 : else
343 0 : flags &= ~B_CLRBUF;
344 :
345 0 : if ((error = UFS_BUF_ALLOC(ip, uio->uio_offset, xfersize,
346 0 : ap->a_cred, flags, &bp)) != 0)
347 : break;
348 0 : if (uio->uio_offset + xfersize > DIP(ip, size)) {
349 0 : DIP_ASSIGN(ip, size, uio->uio_offset + xfersize);
350 0 : uvm_vnp_setsize(vp, DIP(ip, size));
351 : extended = 1;
352 0 : }
353 0 : (void)uvm_vnp_uncache(vp);
354 :
355 0 : size = blksize(fs, ip, lbn) - bp->b_resid;
356 0 : if (size < xfersize)
357 0 : xfersize = size;
358 :
359 0 : error = uiomove(bp->b_data + blkoffset, xfersize, uio);
360 : /*
361 : * If the buffer is not already filled and we encounter an
362 : * error while trying to fill it, we have to clear out any
363 : * garbage data from the pages instantiated for the buffer.
364 : * If we do not, a failed uiomove() during a write can leave
365 : * the prior contents of the pages exposed to a userland mmap.
366 : *
367 : * Note that we don't need to clear buffers that were
368 : * allocated with the B_CLRBUF flag set.
369 : */
370 0 : if (error != 0 && !(flags & B_CLRBUF))
371 0 : memset(bp->b_data + blkoffset, 0, xfersize);
372 :
373 : #if 0
374 : if (ioflag & IO_NOCACHE)
375 : bp->b_flags |= B_NOCACHE;
376 : #endif
377 0 : if (ioflag & IO_SYNC)
378 0 : (void)bwrite(bp);
379 0 : else if (xfersize + blkoffset == fs->fs_bsize) {
380 0 : bawrite(bp);
381 0 : } else
382 0 : bdwrite(bp);
383 :
384 0 : if (error || xfersize == 0)
385 : break;
386 0 : ip->i_flag |= IN_CHANGE | IN_UPDATE;
387 : }
388 : /*
389 : * If we successfully wrote any data, and we are not the superuser
390 : * we clear the setuid and setgid bits as a precaution against
391 : * tampering.
392 : */
393 0 : if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0 &&
394 0 : (vp->v_mount->mnt_flag & MNT_NOPERM) == 0)
395 0 : DIP_ASSIGN(ip, mode, DIP(ip, mode) & ~(ISUID | ISGID));
396 0 : if (resid > uio->uio_resid)
397 0 : VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
398 0 : if (error) {
399 0 : if (ioflag & IO_UNIT) {
400 0 : (void)UFS_TRUNCATE(ip, osize,
401 : ioflag & IO_SYNC, ap->a_cred);
402 0 : uio->uio_offset -= resid - uio->uio_resid;
403 0 : uio->uio_resid = resid;
404 0 : }
405 0 : } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
406 0 : error = UFS_UPDATE(ip, 1);
407 0 : }
408 : /* correct the result for writes clamped by vn_fsizechk() */
409 0 : uio->uio_resid += overrun;
410 0 : return (error);
411 0 : }
412 :
413 : /*
414 : * Synch an open file.
415 : */
416 : int
417 0 : ffs_fsync(void *v)
418 : {
419 0 : struct vop_fsync_args *ap = v;
420 0 : struct vnode *vp = ap->a_vp;
421 : struct buf *bp, *nbp;
422 : int s, error, passes, skipmeta;
423 :
424 0 : if (vp->v_type == VBLK &&
425 0 : vp->v_specmountpoint != NULL &&
426 0 : (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP))
427 0 : softdep_fsync_mountdev(vp, ap->a_waitfor);
428 :
429 : /*
430 : * Flush all dirty buffers associated with a vnode.
431 : */
432 : passes = NIADDR + 1;
433 : skipmeta = 0;
434 0 : if (ap->a_waitfor == MNT_WAIT)
435 : skipmeta = 1;
436 0 : s = splbio();
437 : loop:
438 0 : LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
439 0 : bp->b_flags &= ~B_SCANNED;
440 : }
441 0 : LIST_FOREACH_SAFE(bp, &vp->v_dirtyblkhd, b_vnbufs, nbp) {
442 : /*
443 : * Reasons to skip this buffer: it has already been considered
444 : * on this pass, this pass is the first time through on a
445 : * synchronous flush request and the buffer being considered
446 : * is metadata, the buffer has dependencies that will cause
447 : * it to be redirtied and it has not already been deferred,
448 : * or it is already being written.
449 : */
450 0 : if (bp->b_flags & (B_BUSY | B_SCANNED))
451 : continue;
452 0 : if ((bp->b_flags & B_DELWRI) == 0)
453 0 : panic("ffs_fsync: not dirty");
454 0 : if (skipmeta && bp->b_lblkno < 0)
455 : continue;
456 0 : if (ap->a_waitfor != MNT_WAIT &&
457 0 : LIST_FIRST(&bp->b_dep) != NULL &&
458 0 : (bp->b_flags & B_DEFERRED) == 0 &&
459 0 : buf_countdeps(bp, 0, 1)) {
460 0 : bp->b_flags |= B_DEFERRED;
461 0 : continue;
462 : }
463 :
464 0 : bremfree(bp);
465 0 : buf_acquire(bp);
466 0 : bp->b_flags |= B_SCANNED;
467 0 : splx(s);
468 : /*
469 : * On our final pass through, do all I/O synchronously
470 : * so that we can find out if our flush is failing
471 : * because of write errors.
472 : */
473 0 : if (passes > 0 || ap->a_waitfor != MNT_WAIT)
474 0 : (void) bawrite(bp);
475 0 : else if ((error = bwrite(bp)) != 0)
476 0 : return (error);
477 0 : s = splbio();
478 : /*
479 : * Since we may have slept during the I/O, we need
480 : * to start from a known point.
481 : */
482 0 : nbp = LIST_FIRST(&vp->v_dirtyblkhd);
483 0 : }
484 0 : if (skipmeta) {
485 : skipmeta = 0;
486 0 : goto loop;
487 : }
488 0 : if (ap->a_waitfor == MNT_WAIT) {
489 0 : vwaitforio(vp, 0, "ffs_fsync", 0);
490 :
491 : /*
492 : * Ensure that any filesystem metadata associated
493 : * with the vnode has been written.
494 : */
495 0 : splx(s);
496 0 : if ((error = softdep_sync_metadata(ap)) != 0)
497 0 : return (error);
498 0 : s = splbio();
499 0 : if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
500 : /*
501 : * Block devices associated with filesystems may
502 : * have new I/O requests posted for them even if
503 : * the vnode is locked, so no amount of trying will
504 : * get them clean. Thus we give block devices a
505 : * good effort, then just give up. For all other file
506 : * types, go around and try again until it is clean.
507 : */
508 0 : if (passes > 0) {
509 0 : passes -= 1;
510 0 : goto loop;
511 : }
512 : #ifdef DIAGNOSTIC
513 0 : if (vp->v_type != VBLK)
514 0 : vprint("ffs_fsync: dirty", vp);
515 : #endif
516 : }
517 : }
518 0 : splx(s);
519 0 : return (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT));
520 0 : }
521 :
522 : /*
523 : * Reclaim an inode so that it can be used for other purposes.
524 : */
525 : int
526 0 : ffs_reclaim(void *v)
527 : {
528 0 : struct vop_reclaim_args *ap = v;
529 0 : struct vnode *vp = ap->a_vp;
530 0 : struct inode *ip = VTOI(vp);
531 : int error;
532 :
533 0 : if ((error = ufs_reclaim(vp, ap->a_p)) != 0)
534 0 : return (error);
535 :
536 0 : if (ip->i_din1 != NULL) {
537 : #ifdef FFS2
538 0 : if (ip->i_ump->um_fstype == UM_UFS2)
539 0 : pool_put(&ffs_dinode2_pool, ip->i_din2);
540 : else
541 : #endif
542 0 : pool_put(&ffs_dinode1_pool, ip->i_din1);
543 : }
544 :
545 0 : pool_put(&ffs_ino_pool, ip);
546 :
547 0 : vp->v_data = NULL;
548 :
549 0 : return (0);
550 0 : }
551 :
552 : #ifdef FIFO
553 : int
554 0 : ffsfifo_reclaim(void *v)
555 : {
556 0 : fifo_reclaim(v);
557 0 : return (ffs_reclaim(v));
558 : }
559 : #endif
|