LCOV - code coverage report
Current view: top level - ufs/ffs - ffs_softdep.c (source / functions) Hit Total Coverage
Test: 6.4 Lines: 0 2385 0.0 %
Date: 2018-10-19 03:25:38 Functions: 0 78 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*      $OpenBSD: ffs_softdep.c,v 1.143 2018/07/02 20:56:22 bluhm Exp $ */
       2             : 
       3             : /*
       4             :  * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
       5             :  *
       6             :  * The soft updates code is derived from the appendix of a University
       7             :  * of Michigan technical report (Gregory R. Ganger and Yale N. Patt,
       8             :  * "Soft Updates: A Solution to the Metadata Update Problem in File
       9             :  * Systems", CSE-TR-254-95, August 1995).
      10             :  *
      11             :  * Further information about soft updates can be obtained from:
      12             :  *
      13             :  *      Marshall Kirk McKusick          http://www.mckusick.com/softdep/
      14             :  *      1614 Oxford Street              mckusick@mckusick.com
      15             :  *      Berkeley, CA 94709-1608         +1-510-843-9542
      16             :  *      USA
      17             :  *
      18             :  * Redistribution and use in source and binary forms, with or without
      19             :  * modification, are permitted provided that the following conditions
      20             :  * are met:
      21             :  *
      22             :  * 1. Redistributions of source code must retain the above copyright
      23             :  *    notice, this list of conditions and the following disclaimer.
      24             :  * 2. Redistributions in binary form must reproduce the above copyright
      25             :  *    notice, this list of conditions and the following disclaimer in the
      26             :  *    documentation and/or other materials provided with the distribution.
      27             :  *
      28             :  * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
      29             :  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
      30             :  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
      31             :  * DISCLAIMED.  IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
      32             :  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
      33             :  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
      34             :  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
      35             :  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
      36             :  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
      37             :  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
      38             :  * SUCH DAMAGE.
      39             :  *
      40             :  *      from: @(#)ffs_softdep.c 9.59 (McKusick) 6/21/00
      41             :  * $FreeBSD: src/sys/ufs/ffs/ffs_softdep.c,v 1.86 2001/02/04 16:08:18 phk Exp $
      42             :  */
      43             : 
      44             : #include <sys/param.h>
      45             : #include <sys/buf.h>
      46             : #include <sys/kernel.h>
      47             : #include <sys/malloc.h>
      48             : #include <sys/mount.h>
      49             : #include <sys/proc.h>
      50             : #include <sys/pool.h>
      51             : #include <sys/syslog.h>
      52             : #include <sys/systm.h>
      53             : #include <sys/vnode.h>
      54             : #include <sys/specdev.h>
      55             : #include <crypto/siphash.h>
      56             : #include <ufs/ufs/dir.h>
      57             : #include <ufs/ufs/quota.h>
      58             : #include <ufs/ufs/inode.h>
      59             : #include <ufs/ufs/ufsmount.h>
      60             : #include <ufs/ffs/fs.h>
      61             : #include <ufs/ffs/softdep.h>
      62             : #include <ufs/ffs/ffs_extern.h>
      63             : #include <ufs/ufs/ufs_extern.h>
      64             : 
      65             : #define STATIC
      66             : 
      67             : /*
      68             :  * Mapping of dependency structure types to malloc types.
      69             :  */
      70             : #define D_PAGEDEP       0
      71             : #define D_INODEDEP      1
      72             : #define D_NEWBLK        2
      73             : #define D_BMSAFEMAP     3
      74             : #define D_ALLOCDIRECT   4
      75             : #define D_INDIRDEP      5
      76             : #define D_ALLOCINDIR    6
      77             : #define D_FREEFRAG      7
      78             : #define D_FREEBLKS      8
      79             : #define D_FREEFILE      9
      80             : #define D_DIRADD        10
      81             : #define D_MKDIR         11
      82             : #define D_DIRREM        12
      83             : #define D_NEWDIRBLK     13
      84             : #define D_LAST          13
      85             : /*
      86             :  * Names of softdep types.
      87             :  */
      88             : const char *softdep_typenames[] = {
      89             :         "pagedep",
      90             :         "inodedep",
      91             :         "newblk",
      92             :         "bmsafemap",
      93             :         "allocdirect",
      94             :         "indirdep",
      95             :         "allocindir",
      96             :         "freefrag",
      97             :         "freeblks",
      98             :         "freefile",
      99             :         "diradd",
     100             :         "mkdir",
     101             :         "dirrem",
     102             :         "newdirblk",
     103             : };
     104             : #define TYPENAME(type) \
     105             :         ((unsigned)(type) <= D_LAST ? softdep_typenames[type] : "???")
     106             : /*
     107             :  * Finding the current process.
     108             :  */
     109             : #define CURPROC curproc
     110             : /*
     111             :  * End system adaptation definitions.
     112             :  */
     113             : 
     114             : /*
     115             :  * Internal function prototypes.
     116             :  */
     117             : STATIC  void softdep_error(char *, int);
     118             : STATIC  void drain_output(struct vnode *, int);
     119             : STATIC  int getdirtybuf(struct buf *, int);
     120             : STATIC  void clear_remove(struct proc *);
     121             : STATIC  void clear_inodedeps(struct proc *);
     122             : STATIC  int flush_pagedep_deps(struct vnode *, struct mount *,
     123             :             struct diraddhd *);
     124             : STATIC  int flush_inodedep_deps(struct fs *, ufsino_t);
     125             : STATIC  int handle_written_filepage(struct pagedep *, struct buf *);
     126             : STATIC  void diradd_inode_written(struct diradd *, struct inodedep *);
     127             : STATIC  int handle_written_inodeblock(struct inodedep *, struct buf *);
     128             : STATIC  void handle_allocdirect_partdone(struct allocdirect *);
     129             : STATIC  void handle_allocindir_partdone(struct allocindir *);
     130             : STATIC  void initiate_write_filepage(struct pagedep *, struct buf *);
     131             : STATIC  void handle_written_mkdir(struct mkdir *, int);
     132             : STATIC  void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *);
     133             : #ifdef FFS2
     134             : STATIC  void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *);
     135             : #endif
     136             : STATIC  void handle_workitem_freefile(struct freefile *);
     137             : STATIC  void handle_workitem_remove(struct dirrem *);
     138             : STATIC  struct dirrem *newdirrem(struct buf *, struct inode *,
     139             :             struct inode *, int, struct dirrem **);
     140             : STATIC  void free_diradd(struct diradd *);
     141             : STATIC  void free_allocindir(struct allocindir *, struct inodedep *);
     142             : STATIC  void free_newdirblk(struct newdirblk *);
     143             : STATIC  int indir_trunc(struct inode *, daddr_t, int, daddr_t, long *);
     144             : STATIC  void deallocate_dependencies(struct buf *, struct inodedep *);
     145             : STATIC  void free_allocdirect(struct allocdirectlst *,
     146             :             struct allocdirect *, int);
     147             : STATIC  int check_inode_unwritten(struct inodedep *);
     148             : STATIC  int free_inodedep(struct inodedep *);
     149             : STATIC  void handle_workitem_freeblocks(struct freeblks *);
     150             : STATIC  void merge_inode_lists(struct inodedep *);
     151             : STATIC  void setup_allocindir_phase2(struct buf *, struct inode *,
     152             :             struct allocindir *);
     153             : STATIC  struct allocindir *newallocindir(struct inode *, int, daddr_t,
     154             :             daddr_t);
     155             : STATIC  void handle_workitem_freefrag(struct freefrag *);
     156             : STATIC  struct freefrag *newfreefrag(struct inode *, daddr_t, long);
     157             : STATIC  void allocdirect_merge(struct allocdirectlst *,
     158             :             struct allocdirect *, struct allocdirect *);
     159             : STATIC  struct bmsafemap *bmsafemap_lookup(struct buf *);
     160             : STATIC  int newblk_lookup(struct fs *, daddr_t, int,
     161             :             struct newblk **);
     162             : STATIC  int inodedep_lookup(struct fs *, ufsino_t, int, struct inodedep **);
     163             : STATIC  int pagedep_lookup(struct inode *, daddr_t, int, struct pagedep **);
     164             : STATIC  void pause_timer(void *);
     165             : STATIC  int request_cleanup(int, int);
     166             : STATIC  int process_worklist_item(struct mount *, int);
     167             : STATIC  void add_to_worklist(struct worklist *);
     168             : 
     169             : /*
     170             :  * Exported softdep operations.
     171             :  */
     172             : void softdep_disk_io_initiation(struct buf *);
     173             : void softdep_disk_write_complete(struct buf *);
     174             : void softdep_deallocate_dependencies(struct buf *);
     175             : void softdep_move_dependencies(struct buf *, struct buf *);
     176             : int softdep_count_dependencies(struct buf *bp, int, int);
     177             : 
     178             : /*
     179             :  * Locking primitives.
     180             :  *
     181             :  * For a uniprocessor, all we need to do is protect against disk
     182             :  * interrupts. For a multiprocessor, this lock would have to be
     183             :  * a mutex. A single mutex is used throughout this file, though
     184             :  * finer grain locking could be used if contention warranted it.
     185             :  *
     186             :  * For a multiprocessor, the sleep call would accept a lock and
     187             :  * release it after the sleep processing was complete. In a uniprocessor
     188             :  * implementation there is no such interlock, so we simple mark
     189             :  * the places where it needs to be done with the `interlocked' form
     190             :  * of the lock calls. Since the uniprocessor sleep already interlocks
     191             :  * the spl, there is nothing that really needs to be done.
     192             :  */
     193             : #ifndef /* NOT */ DEBUG
     194             : STATIC struct lockit {
     195             :         int     lkt_spl;
     196             : } lk = { 0 };
     197             : #define ACQUIRE_LOCK(lk)                (lk)->lkt_spl = splbio()
     198             : #define FREE_LOCK(lk)                   splx((lk)->lkt_spl)
     199             : #define ACQUIRE_LOCK_INTERLOCKED(lk,s)  (lk)->lkt_spl = (s)
     200             : #define FREE_LOCK_INTERLOCKED(lk)       ((lk)->lkt_spl)
     201             : 
     202             : #else /* DEBUG */
     203             : STATIC struct lockit {
     204             :         int     lkt_spl;
     205             :         pid_t   lkt_held;
     206             :         int     lkt_line;
     207             : } lk = { 0, -1 };
     208             : STATIC int lockcnt;
     209             : 
     210             : STATIC  void acquire_lock(struct lockit *, int);
     211             : STATIC  void free_lock(struct lockit *, int);
     212             : STATIC  void acquire_lock_interlocked(struct lockit *, int, int);
     213             : STATIC  int free_lock_interlocked(struct lockit *, int);
     214             : 
     215             : #define ACQUIRE_LOCK(lk)                acquire_lock(lk, __LINE__)
     216             : #define FREE_LOCK(lk)                   free_lock(lk, __LINE__)
     217             : #define ACQUIRE_LOCK_INTERLOCKED(lk,s)  acquire_lock_interlocked(lk, (s), __LINE__)
     218             : #define FREE_LOCK_INTERLOCKED(lk)       free_lock_interlocked(lk, __LINE__)
     219             : 
     220             : STATIC void
     221             : acquire_lock(struct lockit *lk, int line)
     222             : {
     223             :         pid_t holder;
     224             :         int original_line;
     225             : 
     226             :         if (lk->lkt_held != -1) {
     227             :                 holder = lk->lkt_held;
     228             :                 original_line = lk->lkt_line;
     229             :                 FREE_LOCK(lk);
     230             :                 if (holder == CURPROC->p_tid)
     231             :                         panic("softdep_lock: locking against myself, acquired at line %d, relocked at line %d", original_line, line);
     232             :                 else
     233             :                         panic("softdep_lock: lock held by %d, acquired at line %d, relocked at line %d", holder, original_line, line);
     234             :         }
     235             :         lk->lkt_spl = splbio();
     236             :         lk->lkt_held = CURPROC->p_tid;
     237             :         lk->lkt_line = line;
     238             :         lockcnt++;
     239             : }
     240             : 
     241             : STATIC void
     242             : free_lock(struct lockit *lk, int line)
     243             : {
     244             : 
     245             :         if (lk->lkt_held == -1)
     246             :                 panic("softdep_unlock: lock not held at line %d", line);
     247             :         lk->lkt_held = -1;
     248             :         splx(lk->lkt_spl);
     249             : }
     250             : 
     251             : STATIC void
     252             : acquire_lock_interlocked(struct lockit *lk, int s, int line)
     253             : {
     254             :         pid_t holder;
     255             :         int original_line;
     256             : 
     257             :         if (lk->lkt_held != -1) {
     258             :                 holder = lk->lkt_held;
     259             :                 original_line = lk->lkt_line;
     260             :                 FREE_LOCK_INTERLOCKED(lk);
     261             :                 if (holder == CURPROC->p_tid)
     262             :                         panic("softdep_lock: locking against myself, acquired at line %d, relocked at line %d", original_line, line);
     263             :                 else
     264             :                         panic("softdep_lock: lock held by %d, acquired at line %d, relocked at line %d", holder, original_line, line);
     265             :         }
     266             :         lk->lkt_held = CURPROC->p_tid;
     267             :         lk->lkt_line = line;
     268             :         lk->lkt_spl = s;
     269             :         lockcnt++;
     270             : }
     271             : 
     272             : STATIC int
     273             : free_lock_interlocked(struct lockit *lk, int line)
     274             : {
     275             : 
     276             :         if (lk->lkt_held == -1)
     277             :                 panic("softdep_unlock_interlocked: lock not held at line %d", line);
     278             :         lk->lkt_held = -1;
     279             : 
     280             :         return (lk->lkt_spl);
     281             : }
     282             : #endif /* DEBUG */
     283             : 
     284             : /*
     285             :  * Place holder for real semaphores.
     286             :  */
     287             : struct sema {
     288             :         int     value;
     289             :         pid_t   holder;
     290             :         char    *name;
     291             :         int     prio;
     292             :         int     timo;
     293             : };
     294             : STATIC  void sema_init(struct sema *, char *, int, int);
     295             : STATIC  int sema_get(struct sema *, struct lockit *);
     296             : STATIC  void sema_release(struct sema *);
     297             : 
     298             : STATIC void
     299           0 : sema_init(struct sema *semap, char *name, int prio, int timo)
     300             : {
     301             : 
     302           0 :         semap->holder = -1;
     303           0 :         semap->value = 0;
     304           0 :         semap->name = name;
     305           0 :         semap->prio = prio;
     306           0 :         semap->timo = timo;
     307           0 : }
     308             : 
     309             : STATIC int
     310           0 : sema_get(struct sema *semap, struct lockit *interlock)
     311             : {
     312             :         int s;
     313             : 
     314           0 :         if (semap->value++ > 0) {
     315           0 :                 if (interlock != NULL)
     316           0 :                         s = FREE_LOCK_INTERLOCKED(interlock);
     317           0 :                 tsleep((caddr_t)semap, semap->prio, semap->name, semap->timo);
     318           0 :                 if (interlock != NULL) {
     319           0 :                         ACQUIRE_LOCK_INTERLOCKED(interlock, s);
     320           0 :                         FREE_LOCK(interlock);
     321           0 :                 }
     322           0 :                 return (0);
     323             :         }
     324           0 :         semap->holder = CURPROC->p_tid;
     325           0 :         if (interlock != NULL)
     326           0 :                 FREE_LOCK(interlock);
     327           0 :         return (1);
     328           0 : }
     329             : 
     330             : STATIC void
     331           0 : sema_release(struct sema *semap)
     332             : {
     333             : 
     334           0 :         if (semap->value <= 0 || semap->holder != CURPROC->p_tid) {
     335             : #ifdef DEBUG
     336             :                 if (lk.lkt_held != -1)
     337             :                         FREE_LOCK(&lk);
     338             : #endif
     339           0 :                 panic("sema_release: not held");
     340             :         }
     341           0 :         if (--semap->value > 0) {
     342           0 :                 semap->value = 0;
     343           0 :                 wakeup(semap);
     344           0 :         }
     345           0 :         semap->holder = -1;
     346           0 : }
     347             : 
     348             : /*
     349             :  * Memory management.
     350             :  */
     351             : STATIC struct pool pagedep_pool;
     352             : STATIC struct pool inodedep_pool;
     353             : STATIC struct pool newblk_pool;
     354             : STATIC struct pool bmsafemap_pool;
     355             : STATIC struct pool allocdirect_pool;
     356             : STATIC struct pool indirdep_pool;
     357             : STATIC struct pool allocindir_pool;
     358             : STATIC struct pool freefrag_pool;
     359             : STATIC struct pool freeblks_pool;
     360             : STATIC struct pool freefile_pool;
     361             : STATIC struct pool diradd_pool;
     362             : STATIC struct pool mkdir_pool;
     363             : STATIC struct pool dirrem_pool;
     364             : STATIC struct pool newdirblk_pool;
     365             : 
     366             : static __inline void
     367           0 : softdep_free(struct worklist *item, int type)
     368             : {
     369             : 
     370           0 :         switch (type) {
     371             :         case D_PAGEDEP:
     372           0 :                 pool_put(&pagedep_pool, item);
     373           0 :                 break;
     374             : 
     375             :         case D_INODEDEP:
     376           0 :                 pool_put(&inodedep_pool, item);
     377           0 :                 break;
     378             : 
     379             :         case D_BMSAFEMAP:
     380           0 :                 pool_put(&bmsafemap_pool, item);
     381           0 :                 break;
     382             : 
     383             :         case D_ALLOCDIRECT:
     384           0 :                 pool_put(&allocdirect_pool, item);
     385           0 :                 break;
     386             : 
     387             :         case D_INDIRDEP:
     388           0 :                 pool_put(&indirdep_pool, item);
     389           0 :                 break;
     390             : 
     391             :         case D_ALLOCINDIR:
     392           0 :                 pool_put(&allocindir_pool, item);
     393           0 :                 break;
     394             : 
     395             :         case D_FREEFRAG:
     396           0 :                 pool_put(&freefrag_pool, item);
     397           0 :                 break;
     398             : 
     399             :         case D_FREEBLKS:
     400           0 :                 pool_put(&freeblks_pool, item);
     401           0 :                 break;
     402             : 
     403             :         case D_FREEFILE:
     404           0 :                 pool_put(&freefile_pool, item);
     405           0 :                 break;
     406             : 
     407             :         case D_DIRADD:
     408           0 :                 pool_put(&diradd_pool, item);
     409           0 :                 break;
     410             : 
     411             :         case D_MKDIR:
     412           0 :                 pool_put(&mkdir_pool, item);
     413           0 :                 break;
     414             : 
     415             :         case D_DIRREM:
     416           0 :                 pool_put(&dirrem_pool, item);
     417           0 :                 break;
     418             : 
     419             :         case D_NEWDIRBLK:
     420           0 :                 pool_put(&newdirblk_pool, item);
     421           0 :                 break;
     422             : 
     423             :         default:
     424             : #ifdef DEBUG
     425             :                 if (lk.lkt_held != -1)
     426             :                         FREE_LOCK(&lk);
     427             : #endif
     428           0 :                 panic("softdep_free: unknown type %d", type);
     429             :         }
     430           0 : }
     431             : 
     432             : struct workhead softdep_freequeue;
     433             : 
     434             : static __inline void
     435           0 : softdep_freequeue_add(struct worklist *item)
     436             : {
     437             :         int s;
     438             : 
     439           0 :         s = splbio();
     440           0 :         LIST_INSERT_HEAD(&softdep_freequeue, item, wk_list);
     441           0 :         splx(s);
     442           0 : }
     443             : 
     444             : static __inline void
     445           0 : softdep_freequeue_process(void)
     446             : {
     447             :         struct worklist *wk;
     448             : 
     449           0 :         splassert(IPL_BIO);
     450             : 
     451           0 :         while ((wk = LIST_FIRST(&softdep_freequeue)) != NULL) {
     452           0 :                 LIST_REMOVE(wk, wk_list);
     453           0 :                 FREE_LOCK(&lk);
     454           0 :                 softdep_free(wk, wk->wk_type);
     455           0 :                 ACQUIRE_LOCK(&lk);
     456             :         }
     457           0 : }
     458             : 
     459             : /*
     460             :  * Worklist queue management.
     461             :  * These routines require that the lock be held.
     462             :  */
     463             : #ifndef /* NOT */ DEBUG
     464             : #define WORKLIST_INSERT(head, item) do {        \
     465             :         (item)->wk_state |= ONWORKLIST;              \
     466             :         LIST_INSERT_HEAD(head, item, wk_list);  \
     467             : } while (0)
     468             : #define WORKLIST_REMOVE(item) do {              \
     469             :         (item)->wk_state &= ~ONWORKLIST; \
     470             :         LIST_REMOVE(item, wk_list);             \
     471             : } while (0)
     472             : #define WORKITEM_FREE(item, type) softdep_freequeue_add((struct worklist *)item)
     473             : 
     474             : #else /* DEBUG */
     475             : STATIC  void worklist_insert(struct workhead *, struct worklist *);
     476             : STATIC  void worklist_remove(struct worklist *);
     477             : STATIC  void workitem_free(struct worklist *);
     478             : 
     479             : #define WORKLIST_INSERT(head, item) worklist_insert(head, item)
     480             : #define WORKLIST_REMOVE(item) worklist_remove(item)
     481             : #define WORKITEM_FREE(item, type) workitem_free((struct worklist *)item)
     482             : 
     483             : STATIC void
     484             : worklist_insert(struct workhead *head, struct worklist *item)
     485             : {
     486             : 
     487             :         if (lk.lkt_held == -1)
     488             :                 panic("worklist_insert: lock not held");
     489             :         if (item->wk_state & ONWORKLIST) {
     490             :                 FREE_LOCK(&lk);
     491             :                 panic("worklist_insert: already on list");
     492             :         }
     493             :         item->wk_state |= ONWORKLIST;
     494             :         LIST_INSERT_HEAD(head, item, wk_list);
     495             : }
     496             : 
     497             : STATIC void
     498             : worklist_remove(struct worklist *item)
     499             : {
     500             : 
     501             :         if (lk.lkt_held == -1)
     502             :                 panic("worklist_remove: lock not held");
     503             :         if ((item->wk_state & ONWORKLIST) == 0) {
     504             :                 FREE_LOCK(&lk);
     505             :                 panic("worklist_remove: not on list");
     506             :         }
     507             :         item->wk_state &= ~ONWORKLIST;
     508             :         LIST_REMOVE(item, wk_list);
     509             : }
     510             : 
     511             : STATIC void
     512             : workitem_free(struct worklist *item)
     513             : {
     514             : 
     515             :         if (item->wk_state & ONWORKLIST) {
     516             :                 if (lk.lkt_held != -1)
     517             :                         FREE_LOCK(&lk);
     518             :                 panic("workitem_free: still on list");
     519             :         }
     520             :         softdep_freequeue_add(item);
     521             : }
     522             : #endif /* DEBUG */
     523             : 
     524             : /*
     525             :  * Workitem queue management
     526             :  */
     527             : STATIC struct workhead softdep_workitem_pending;
     528             : STATIC struct worklist *worklist_tail;
     529             : STATIC int num_on_worklist;     /* number of worklist items to be processed */
     530             : STATIC int softdep_worklist_busy; /* 1 => trying to do unmount */
     531             : STATIC int softdep_worklist_req; /* serialized waiters */
     532             : STATIC int max_softdeps;        /* maximum number of structs before slowdown */
     533             : STATIC int tickdelay = 2;       /* number of ticks to pause during slowdown */
     534             : STATIC int proc_waiting;        /* tracks whether we have a timeout posted */
     535             : STATIC int *stat_countp;        /* statistic to count in proc_waiting timeout */
     536             : STATIC struct timeout proc_waiting_timeout; 
     537             : STATIC struct proc *filesys_syncer; /* proc of filesystem syncer process */
     538             : STATIC int req_clear_inodedeps; /* syncer process flush some inodedeps */
     539             : #define FLUSH_INODES    1
     540             : STATIC int req_clear_remove;    /* syncer process flush some freeblks */
     541             : #define FLUSH_REMOVE    2
     542             : /*
     543             :  * runtime statistics
     544             :  */
     545             : STATIC int stat_worklist_push;  /* number of worklist cleanups */
     546             : STATIC int stat_blk_limit_push; /* number of times block limit neared */
     547             : STATIC int stat_ino_limit_push; /* number of times inode limit neared */
     548             : STATIC int stat_blk_limit_hit;  /* number of times block slowdown imposed */
     549             : STATIC int stat_ino_limit_hit;  /* number of times inode slowdown imposed */
     550             : STATIC int stat_sync_limit_hit; /* number of synchronous slowdowns imposed */
     551             : STATIC int stat_indir_blk_ptrs; /* bufs redirtied as indir ptrs not written */
     552             : STATIC int stat_inode_bitmap;   /* bufs redirtied as inode bitmap not written */
     553             : STATIC int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */
     554             : STATIC int stat_dir_entry;      /* bufs redirtied as dir entry cannot write */
     555             : 
     556             : /*
     557             :  * Add an item to the end of the work queue.
     558             :  * This routine requires that the lock be held.
     559             :  * This is the only routine that adds items to the list.
     560             :  * The following routine is the only one that removes items
     561             :  * and does so in order from first to last.
     562             :  */
     563             : STATIC void
     564           0 : add_to_worklist(struct worklist *wk)
     565             : {
     566             : 
     567           0 :         if (wk->wk_state & ONWORKLIST) {
     568             : #ifdef DEBUG
     569             :                 if (lk.lkt_held != -1)
     570             :                         FREE_LOCK(&lk);
     571             : #endif
     572           0 :                 panic("add_to_worklist: already on list");
     573             :         }
     574           0 :         wk->wk_state |= ONWORKLIST;
     575           0 :         if (LIST_FIRST(&softdep_workitem_pending) == NULL)
     576           0 :                 LIST_INSERT_HEAD(&softdep_workitem_pending, wk, wk_list);
     577             :         else
     578           0 :                 LIST_INSERT_AFTER(worklist_tail, wk, wk_list);
     579           0 :         worklist_tail = wk;
     580           0 :         num_on_worklist += 1;
     581           0 : }
     582             : 
     583             : /*
     584             :  * Process that runs once per second to handle items in the background queue.
     585             :  *
     586             :  * Note that we ensure that everything is done in the order in which they
     587             :  * appear in the queue. The code below depends on this property to ensure
     588             :  * that blocks of a file are freed before the inode itself is freed. This
     589             :  * ordering ensures that no new <vfsid, inum, lbn> triples will be generated
     590             :  * until all the old ones have been purged from the dependency lists.
     591             :  */
     592             : int 
     593           0 : softdep_process_worklist(struct mount *matchmnt)
     594             : {
     595           0 :         struct proc *p = CURPROC;
     596             :         int matchcnt, loopcount;
     597           0 :         struct timeval starttime;
     598             : 
     599             :         /*
     600             :          * First process any items on the delayed-free queue.
     601             :          */
     602           0 :         ACQUIRE_LOCK(&lk);
     603           0 :         softdep_freequeue_process();
     604           0 :         FREE_LOCK(&lk);
     605             : 
     606             :         /*
     607             :          * Record the process identifier of our caller so that we can give
     608             :          * this process preferential treatment in request_cleanup below.
     609             :          * We can't do this in softdep_initialize, because the syncer doesn't
     610             :          * have to run then.
     611             :          * NOTE! This function _could_ be called with a curproc != syncerproc.
     612             :          */
     613           0 :         filesys_syncer = syncerproc;
     614             :         matchcnt = 0;
     615             : 
     616             :         /*
     617             :          * There is no danger of having multiple processes run this
     618             :          * code, but we have to single-thread it when softdep_flushfiles()
     619             :          * is in operation to get an accurate count of the number of items
     620             :          * related to its mount point that are in the list.
     621             :          */
     622           0 :         if (matchmnt == NULL) {
     623           0 :                 if (softdep_worklist_busy < 0)
     624           0 :                         return(-1);
     625           0 :                 softdep_worklist_busy += 1;
     626           0 :         }
     627             : 
     628             :         /*
     629             :          * If requested, try removing inode or removal dependencies.
     630             :          */
     631           0 :         if (req_clear_inodedeps) {
     632           0 :                 clear_inodedeps(p);
     633           0 :                 req_clear_inodedeps -= 1;
     634           0 :                 wakeup_one(&proc_waiting);
     635           0 :         }
     636           0 :         if (req_clear_remove) {
     637           0 :                 clear_remove(p);
     638           0 :                 req_clear_remove -= 1;
     639           0 :                 wakeup_one(&proc_waiting);
     640           0 :         }
     641             :         loopcount = 1;
     642           0 :         getmicrouptime(&starttime);
     643           0 :         while (num_on_worklist > 0) {
     644           0 :                 matchcnt += process_worklist_item(matchmnt, 0);
     645             : 
     646             :                 /*
     647             :                  * If a umount operation wants to run the worklist
     648             :                  * accurately, abort.
     649             :                  */
     650           0 :                 if (softdep_worklist_req && matchmnt == NULL) {
     651             :                         matchcnt = -1;
     652           0 :                         break;
     653             :                 }
     654             : 
     655             :                 /*
     656             :                  * If requested, try removing inode or removal dependencies.
     657             :                  */
     658           0 :                 if (req_clear_inodedeps) {
     659           0 :                         clear_inodedeps(p);
     660           0 :                         req_clear_inodedeps -= 1;
     661           0 :                         wakeup_one(&proc_waiting);
     662           0 :                 }
     663           0 :                 if (req_clear_remove) {
     664           0 :                         clear_remove(p);
     665           0 :                         req_clear_remove -= 1;
     666           0 :                         wakeup_one(&proc_waiting);
     667           0 :                 }
     668             :                 /*
     669             :                  * We do not generally want to stop for buffer space, but if
     670             :                  * we are really being a buffer hog, we will stop and wait.
     671             :                  */
     672             : #if 0
     673             :                 if (loopcount++ % 128 == 0)
     674             :                         bwillwrite();
     675             : #endif
     676             :                 /*
     677             :                  * Never allow processing to run for more than one
     678             :                  * second. Otherwise the other syncer tasks may get
     679             :                  * excessively backlogged.
     680             :                  */
     681             :                 {
     682             :                         struct timeval diff;
     683           0 :                         struct timeval tv;
     684             : 
     685           0 :                         getmicrouptime(&tv);
     686           0 :                         timersub(&tv, &starttime, &diff);
     687           0 :                         if (diff.tv_sec != 0 && matchmnt == NULL) {
     688             :                                 matchcnt = -1;
     689           0 :                                 break;
     690             :                         }
     691           0 :                 }
     692             : 
     693             :                 /*
     694             :                  * Process any new items on the delayed-free queue.
     695             :                  */
     696           0 :                 ACQUIRE_LOCK(&lk);
     697           0 :                 softdep_freequeue_process();
     698           0 :                 FREE_LOCK(&lk);
     699             :         }
     700           0 :         if (matchmnt == NULL) {
     701           0 :                 softdep_worklist_busy -= 1;
     702           0 :                 if (softdep_worklist_req && softdep_worklist_busy == 0)
     703           0 :                         wakeup(&softdep_worklist_req);
     704             :         }
     705           0 :         return (matchcnt);
     706           0 : }
     707             : 
     708             : /*
     709             :  * Process one item on the worklist.
     710             :  */
     711             : STATIC int
     712           0 : process_worklist_item(struct mount *matchmnt, int flags)
     713             : {
     714             :         struct worklist *wk, *wkend;
     715             :         struct dirrem *dirrem;
     716             :         struct mount *mp;
     717             :         struct vnode *vp;
     718             :         int matchcnt = 0;
     719             : 
     720           0 :         ACQUIRE_LOCK(&lk);
     721             :         /*
     722             :          * Normally we just process each item on the worklist in order.
     723             :          * However, if we are in a situation where we cannot lock any
     724             :          * inodes, we have to skip over any dirrem requests whose
     725             :          * vnodes are resident and locked.
     726             :          */
     727           0 :         LIST_FOREACH(wk, &softdep_workitem_pending, wk_list) {
     728           0 :                 if ((flags & LK_NOWAIT) == 0 || wk->wk_type != D_DIRREM)
     729             :                         break;
     730           0 :                 dirrem = WK_DIRREM(wk);
     731           0 :                 vp = ufs_ihashlookup(VFSTOUFS(dirrem->dm_mnt)->um_dev,
     732           0 :                     dirrem->dm_oldinum);
     733           0 :                 if (vp == NULL || !VOP_ISLOCKED(vp))
     734             :                         break;
     735             :         }
     736           0 :         if (wk == NULL) {
     737           0 :                 FREE_LOCK(&lk);
     738           0 :                 return (0);
     739             :         }
     740             :         /*
     741             :          * Remove the item to be processed. If we are removing the last
     742             :          * item on the list, we need to recalculate the tail pointer.
     743             :          * As this happens rarely and usually when the list is short,
     744             :          * we just run down the list to find it rather than tracking it
     745             :          * in the above loop.
     746             :          */
     747           0 :         WORKLIST_REMOVE(wk);
     748           0 :         if (wk == worklist_tail) {
     749           0 :                 LIST_FOREACH(wkend, &softdep_workitem_pending, wk_list)
     750           0 :                         if (LIST_NEXT(wkend, wk_list) == NULL)
     751             :                                 break;
     752           0 :                 worklist_tail = wkend;
     753           0 :         }
     754           0 :         num_on_worklist -= 1;
     755           0 :         FREE_LOCK(&lk);
     756           0 :         switch (wk->wk_type) {
     757             : 
     758             :         case D_DIRREM:
     759             :                 /* removal of a directory entry */
     760           0 :                 mp = WK_DIRREM(wk)->dm_mnt;
     761             : #if 0
     762             :                 if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
     763             :                         panic("%s: dirrem on suspended filesystem",
     764             :                                 "process_worklist_item");
     765             : #endif
     766           0 :                 if (mp == matchmnt)
     767           0 :                         matchcnt += 1;
     768           0 :                 handle_workitem_remove(WK_DIRREM(wk));
     769           0 :                 break;
     770             : 
     771             :         case D_FREEBLKS:
     772             :                 /* releasing blocks and/or fragments from a file */
     773           0 :                 mp = WK_FREEBLKS(wk)->fb_mnt;
     774             : #if 0
     775             :                 if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
     776             :                         panic("%s: freeblks on suspended filesystem",
     777             :                                 "process_worklist_item");
     778             : #endif
     779           0 :                 if (mp == matchmnt)
     780           0 :                         matchcnt += 1;
     781           0 :                 handle_workitem_freeblocks(WK_FREEBLKS(wk));
     782           0 :                 break;
     783             : 
     784             :         case D_FREEFRAG:
     785             :                 /* releasing a fragment when replaced as a file grows */
     786           0 :                 mp = WK_FREEFRAG(wk)->ff_mnt;
     787             : #if 0
     788             :                 if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
     789             :                         panic("%s: freefrag on suspended filesystem",
     790             :                                 "process_worklist_item");
     791             : #endif
     792           0 :                 if (mp == matchmnt)
     793           0 :                         matchcnt += 1;
     794           0 :                 handle_workitem_freefrag(WK_FREEFRAG(wk));
     795           0 :                 break;
     796             : 
     797             :         case D_FREEFILE:
     798             :                 /* releasing an inode when its link count drops to 0 */
     799           0 :                 mp = WK_FREEFILE(wk)->fx_mnt;
     800             : #if 0
     801             :                 if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
     802             :                         panic("%s: freefile on suspended filesystem",
     803             :                                 "process_worklist_item");
     804             : #endif
     805           0 :                 if (mp == matchmnt)
     806           0 :                         matchcnt += 1;
     807           0 :                 handle_workitem_freefile(WK_FREEFILE(wk));
     808           0 :                 break;
     809             : 
     810             :         default:
     811           0 :                 panic("%s_process_worklist: Unknown type %s",
     812           0 :                     "softdep", TYPENAME(wk->wk_type));
     813             :                 /* NOTREACHED */
     814             :         }
     815           0 :         return (matchcnt);
     816           0 : }
     817             : 
     818             : /*
     819             :  * Move dependencies from one buffer to another.
     820             :  */
     821             : void
     822           0 : softdep_move_dependencies(struct buf *oldbp, struct buf *newbp)
     823             : {
     824             :         struct worklist *wk, *wktail;
     825             : 
     826           0 :         if (LIST_FIRST(&newbp->b_dep) != NULL)
     827           0 :                 panic("softdep_move_dependencies: need merge code");
     828             :         wktail = NULL;
     829           0 :         ACQUIRE_LOCK(&lk);
     830           0 :         while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) {
     831           0 :                 LIST_REMOVE(wk, wk_list);
     832           0 :                 if (wktail == NULL)
     833           0 :                         LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list);
     834             :                 else
     835           0 :                         LIST_INSERT_AFTER(wktail, wk, wk_list);
     836             :                 wktail = wk;
     837             :         }
     838           0 :         FREE_LOCK(&lk);
     839           0 : }
     840             : 
     841             : /*
     842             :  * Purge the work list of all items associated with a particular mount point.
     843             :  */
     844             : int
     845           0 : softdep_flushworklist(struct mount *oldmnt, int *countp, struct proc *p)
     846             : {
     847             :         struct vnode *devvp;
     848             :         int count, error = 0;
     849             : 
     850             :         /*
     851             :          * Await our turn to clear out the queue, then serialize access.
     852             :          */
     853           0 :         while (softdep_worklist_busy) {
     854           0 :                 softdep_worklist_req += 1;
     855           0 :                 tsleep(&softdep_worklist_req, PRIBIO, "softflush", 0);
     856           0 :                 softdep_worklist_req -= 1;
     857             :         }
     858           0 :         softdep_worklist_busy = -1;
     859             :         /*
     860             :          * Alternately flush the block device associated with the mount
     861             :          * point and process any dependencies that the flushing
     862             :          * creates. We continue until no more worklist dependencies
     863             :          * are found.
     864             :          */
     865           0 :         *countp = 0;
     866           0 :         devvp = VFSTOUFS(oldmnt)->um_devvp;
     867           0 :         while ((count = softdep_process_worklist(oldmnt)) > 0) {
     868           0 :                 *countp += count;
     869           0 :                 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
     870           0 :                 error = VOP_FSYNC(devvp, p->p_ucred, MNT_WAIT, p);
     871           0 :                 VOP_UNLOCK(devvp);
     872           0 :                 if (error)
     873             :                         break;
     874             :         }
     875           0 :         softdep_worklist_busy = 0;
     876           0 :         if (softdep_worklist_req)
     877           0 :                 wakeup(&softdep_worklist_req);
     878           0 :         return (error);
     879             : }
     880             : 
     881             : /*
     882             :  * Flush all vnodes and worklist items associated with a specified mount point.
     883             :  */
     884             : int
     885           0 : softdep_flushfiles(struct mount *oldmnt, int flags, struct proc *p)
     886             : {
     887           0 :         int error, count, loopcnt;
     888             : 
     889             :         /*
     890             :          * Alternately flush the vnodes associated with the mount
     891             :          * point and process any dependencies that the flushing
     892             :          * creates. In theory, this loop can happen at most twice,
     893             :          * but we give it a few extra just to be sure.
     894             :          */
     895           0 :         for (loopcnt = 10; loopcnt > 0; loopcnt--) {
     896             :                 /*
     897             :                  * Do another flush in case any vnodes were brought in
     898             :                  * as part of the cleanup operations.
     899             :                  */
     900           0 :                 if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0)
     901             :                         break;
     902           0 :                 if ((error = softdep_flushworklist(oldmnt, &count, p)) != 0 ||
     903           0 :                     count == 0)
     904             :                         break;
     905             :         }
     906             :         /*
     907             :          * If the reboot process sleeps during the loop, the update
     908             :          * process may call softdep_process_worklist() and create
     909             :          * new dirty vnodes at the mount point.  Call ffs_flushfiles()
     910             :          * again after the loop has flushed all soft dependencies.
     911             :          */
     912           0 :         if (error == 0)
     913           0 :                 error = ffs_flushfiles(oldmnt, flags, p);
     914             :         /*
     915             :          * If we are unmounting then it is an error to fail. If we
     916             :          * are simply trying to downgrade to read-only, then filesystem
     917             :          * activity can keep us busy forever, so we just fail with EBUSY.
     918             :          */
     919           0 :         if (loopcnt == 0) {
     920             :                 error = EBUSY;
     921           0 :         }
     922           0 :         return (error);
     923           0 : }
     924             : 
     925             : /*
     926             :  * Structure hashing.
     927             :  * 
     928             :  * There are three types of structures that can be looked up:
     929             :  *      1) pagedep structures identified by mount point, inode number,
     930             :  *         and logical block.
     931             :  *      2) inodedep structures identified by mount point and inode number.
     932             :  *      3) newblk structures identified by mount point and
     933             :  *         physical block number.
     934             :  *
     935             :  * The "pagedep" and "inodedep" dependency structures are hashed
     936             :  * separately from the file blocks and inodes to which they correspond.
     937             :  * This separation helps when the in-memory copy of an inode or
     938             :  * file block must be replaced. It also obviates the need to access
     939             :  * an inode or file page when simply updating (or de-allocating)
     940             :  * dependency structures. Lookup of newblk structures is needed to
     941             :  * find newly allocated blocks when trying to associate them with
     942             :  * their allocdirect or allocindir structure.
     943             :  *
     944             :  * The lookup routines optionally create and hash a new instance when
     945             :  * an existing entry is not found.
     946             :  */
     947             : #define DEPALLOC        0x0001  /* allocate structure if lookup fails */
     948             : #define NODELAY         0x0002  /* cannot do background work */
     949             : 
     950             : SIPHASH_KEY softdep_hashkey;
     951             : 
     952             : /*
     953             :  * Structures and routines associated with pagedep caching.
     954             :  */
     955             : LIST_HEAD(pagedep_hashhead, pagedep) *pagedep_hashtbl;
     956             : u_long  pagedep_hash;           /* size of hash table - 1 */
     957             : STATIC struct sema pagedep_in_progress;
     958             : 
     959             : /*
     960             :  * Look up a pagedep. Return 1 if found, 0 if not found or found
     961             :  * when asked to allocate but not associated with any buffer.
     962             :  * If not found, allocate if DEPALLOC flag is passed.
     963             :  * Found or allocated entry is returned in pagedeppp.
     964             :  * This routine must be called with splbio interrupts blocked.
     965             :  */
     966             : STATIC int
     967           0 : pagedep_lookup(struct inode *ip, daddr_t lbn, int flags,
     968             :     struct pagedep **pagedeppp)
     969             : {
     970           0 :         SIPHASH_CTX ctx;
     971             :         struct pagedep *pagedep;
     972             :         struct pagedep_hashhead *pagedephd;
     973           0 :         struct mount *mp;
     974             :         int i;
     975             : 
     976           0 :         splassert(IPL_BIO);
     977             : 
     978             : #ifdef DEBUG
     979             :         if (lk.lkt_held == -1)
     980             :                 panic("pagedep_lookup: lock not held");
     981             : #endif
     982           0 :         mp = ITOV(ip)->v_mount;
     983             : 
     984           0 :         SipHash24_Init(&ctx, &softdep_hashkey);
     985           0 :         SipHash24_Update(&ctx, &mp, sizeof(mp));
     986           0 :         SipHash24_Update(&ctx, &ip->i_number, sizeof(ip->i_number));
     987           0 :         SipHash24_Update(&ctx, &lbn, sizeof(lbn));
     988           0 :         pagedephd = &pagedep_hashtbl[SipHash24_End(&ctx) & pagedep_hash];
     989             : top:
     990           0 :         LIST_FOREACH(pagedep, pagedephd, pd_hash)
     991           0 :                 if (ip->i_number == pagedep->pd_ino &&
     992           0 :                     lbn == pagedep->pd_lbn &&
     993           0 :                     mp == pagedep->pd_mnt)
     994             :                         break;
     995           0 :         if (pagedep) {
     996           0 :                 *pagedeppp = pagedep;
     997           0 :                 if ((flags & DEPALLOC) != 0 &&
     998           0 :                     (pagedep->pd_state & ONWORKLIST) == 0)
     999           0 :                         return (0);
    1000           0 :                 return (1);
    1001             :         }
    1002           0 :         if ((flags & DEPALLOC) == 0) {
    1003           0 :                 *pagedeppp = NULL;
    1004           0 :                 return (0);
    1005             :         }
    1006           0 :         if (sema_get(&pagedep_in_progress, &lk) == 0) {
    1007           0 :                 ACQUIRE_LOCK(&lk);
    1008           0 :                 goto top;
    1009             :         }
    1010           0 :         pagedep = pool_get(&pagedep_pool, PR_WAITOK | PR_ZERO);
    1011           0 :         pagedep->pd_list.wk_type = D_PAGEDEP;
    1012           0 :         pagedep->pd_mnt = mp;
    1013           0 :         pagedep->pd_ino = ip->i_number;
    1014           0 :         pagedep->pd_lbn = lbn;
    1015           0 :         LIST_INIT(&pagedep->pd_dirremhd);
    1016           0 :         LIST_INIT(&pagedep->pd_pendinghd);
    1017           0 :         for (i = 0; i < DAHASHSZ; i++)
    1018           0 :                 LIST_INIT(&pagedep->pd_diraddhd[i]);
    1019           0 :         ACQUIRE_LOCK(&lk);
    1020           0 :         LIST_INSERT_HEAD(pagedephd, pagedep, pd_hash);
    1021           0 :         sema_release(&pagedep_in_progress);
    1022           0 :         *pagedeppp = pagedep;
    1023           0 :         return (0);
    1024           0 : }
    1025             : 
    1026             : /*
    1027             :  * Structures and routines associated with inodedep caching.
    1028             :  */
    1029             : LIST_HEAD(inodedep_hashhead, inodedep) *inodedep_hashtbl;
    1030             : STATIC u_long   inodedep_hash;  /* size of hash table - 1 */
    1031             : STATIC long     num_inodedep;   /* number of inodedep allocated */
    1032             : STATIC struct sema inodedep_in_progress;
    1033             : 
    1034             : /*
    1035             :  * Look up a inodedep. Return 1 if found, 0 if not found.
    1036             :  * If not found, allocate if DEPALLOC flag is passed.
    1037             :  * Found or allocated entry is returned in inodedeppp.
    1038             :  * This routine must be called with splbio interrupts blocked.
    1039             :  */
    1040             : STATIC int
    1041           0 : inodedep_lookup(struct fs *fs, ufsino_t inum, int flags,
    1042             :     struct inodedep **inodedeppp)
    1043             : {
    1044           0 :         SIPHASH_CTX ctx;
    1045             :         struct inodedep *inodedep;
    1046             :         struct inodedep_hashhead *inodedephd;
    1047             :         int firsttry;
    1048             : 
    1049           0 :         splassert(IPL_BIO);
    1050             : 
    1051             : #ifdef DEBUG
    1052             :         if (lk.lkt_held == -1)
    1053             :                 panic("inodedep_lookup: lock not held");
    1054             : #endif
    1055             :         firsttry = 1;
    1056           0 :         SipHash24_Init(&ctx, &softdep_hashkey);
    1057           0 :         SipHash24_Update(&ctx, &fs, sizeof(fs));
    1058           0 :         SipHash24_Update(&ctx, &inum, sizeof(inum));
    1059           0 :         inodedephd = &inodedep_hashtbl[SipHash24_End(&ctx) & inodedep_hash];
    1060             : top:
    1061           0 :         LIST_FOREACH(inodedep, inodedephd, id_hash)
    1062           0 :                 if (inum == inodedep->id_ino && fs == inodedep->id_fs)
    1063             :                         break;
    1064           0 :         if (inodedep) {
    1065           0 :                 *inodedeppp = inodedep;
    1066           0 :                 return (1);
    1067             :         }
    1068           0 :         if ((flags & DEPALLOC) == 0) {
    1069           0 :                 *inodedeppp = NULL;
    1070           0 :                 return (0);
    1071             :         }
    1072             :         /*
    1073             :          * If we are over our limit, try to improve the situation.
    1074             :          */
    1075           0 :         if (num_inodedep > max_softdeps && firsttry && (flags & NODELAY) == 0 &&
    1076           0 :             request_cleanup(FLUSH_INODES, 1)) {
    1077             :                 firsttry = 0;
    1078           0 :                 goto top;
    1079             :         }
    1080           0 :         if (sema_get(&inodedep_in_progress, &lk) == 0) {
    1081           0 :                 ACQUIRE_LOCK(&lk);
    1082           0 :                 goto top;
    1083             :         }
    1084           0 :         num_inodedep += 1;
    1085           0 :         inodedep = pool_get(&inodedep_pool, PR_WAITOK);
    1086           0 :         inodedep->id_list.wk_type = D_INODEDEP;
    1087           0 :         inodedep->id_fs = fs;
    1088           0 :         inodedep->id_ino = inum;
    1089           0 :         inodedep->id_state = ALLCOMPLETE;
    1090           0 :         inodedep->id_nlinkdelta = 0;
    1091           0 :         inodedep->id_savedino1 = NULL;
    1092           0 :         inodedep->id_savedsize = -1;
    1093           0 :         inodedep->id_buf = NULL;
    1094           0 :         LIST_INIT(&inodedep->id_pendinghd);
    1095           0 :         LIST_INIT(&inodedep->id_inowait);
    1096           0 :         LIST_INIT(&inodedep->id_bufwait);
    1097           0 :         TAILQ_INIT(&inodedep->id_inoupdt);
    1098           0 :         TAILQ_INIT(&inodedep->id_newinoupdt);
    1099           0 :         ACQUIRE_LOCK(&lk);
    1100           0 :         LIST_INSERT_HEAD(inodedephd, inodedep, id_hash);
    1101           0 :         sema_release(&inodedep_in_progress);
    1102           0 :         *inodedeppp = inodedep;
    1103           0 :         return (0);
    1104           0 : }
    1105             : 
    1106             : /*
    1107             :  * Structures and routines associated with newblk caching.
    1108             :  */
    1109             : LIST_HEAD(newblk_hashhead, newblk) *newblk_hashtbl;
    1110             : u_long  newblk_hash;            /* size of hash table - 1 */
    1111             : STATIC struct sema newblk_in_progress;
    1112             : 
    1113             : /*
    1114             :  * Look up a newblk. Return 1 if found, 0 if not found.
    1115             :  * If not found, allocate if DEPALLOC flag is passed.
    1116             :  * Found or allocated entry is returned in newblkpp.
    1117             :  */
    1118             : STATIC int
    1119           0 : newblk_lookup(struct fs *fs, daddr_t newblkno, int flags,
    1120             :     struct newblk **newblkpp)
    1121             : {
    1122           0 :         SIPHASH_CTX ctx;
    1123             :         struct newblk *newblk;
    1124             :         struct newblk_hashhead *newblkhd;
    1125             : 
    1126           0 :         SipHash24_Init(&ctx, &softdep_hashkey);
    1127           0 :         SipHash24_Update(&ctx, &fs, sizeof(fs));
    1128           0 :         SipHash24_Update(&ctx, &newblkno, sizeof(newblkno));
    1129           0 :         newblkhd = &newblk_hashtbl[SipHash24_End(&ctx) & newblk_hash];
    1130             : top:
    1131           0 :         LIST_FOREACH(newblk, newblkhd, nb_hash)
    1132           0 :                 if (newblkno == newblk->nb_newblkno && fs == newblk->nb_fs)
    1133             :                         break;
    1134           0 :         if (newblk) {
    1135           0 :                 *newblkpp = newblk;
    1136           0 :                 return (1);
    1137             :         }
    1138           0 :         if ((flags & DEPALLOC) == 0) {
    1139           0 :                 *newblkpp = NULL;
    1140           0 :                 return (0);
    1141             :         }
    1142           0 :         if (sema_get(&newblk_in_progress, NULL) == 0)
    1143           0 :                 goto top;
    1144           0 :         newblk = pool_get(&newblk_pool, PR_WAITOK);
    1145           0 :         newblk->nb_state = 0;
    1146           0 :         newblk->nb_fs = fs;
    1147           0 :         newblk->nb_newblkno = newblkno;
    1148           0 :         LIST_INSERT_HEAD(newblkhd, newblk, nb_hash);
    1149           0 :         sema_release(&newblk_in_progress);
    1150           0 :         *newblkpp = newblk;
    1151           0 :         return (0);
    1152           0 : }
    1153             : 
    1154             : /*
    1155             :  * Executed during filesystem system initialization before
    1156             :  * mounting any file systems.
    1157             :  */
    1158             : void 
    1159           0 : softdep_initialize(void)
    1160             : {
    1161             : 
    1162           0 :         bioops.io_start = softdep_disk_io_initiation;
    1163           0 :         bioops.io_complete = softdep_disk_write_complete;
    1164           0 :         bioops.io_deallocate = softdep_deallocate_dependencies;
    1165           0 :         bioops.io_movedeps = softdep_move_dependencies;
    1166           0 :         bioops.io_countdeps = softdep_count_dependencies;
    1167             : 
    1168           0 :         LIST_INIT(&mkdirlisthd);
    1169           0 :         LIST_INIT(&softdep_workitem_pending);
    1170             : #ifdef KMEMSTATS
    1171           0 :         max_softdeps = min (initialvnodes * 8,
    1172           0 :             kmemstats[M_INODEDEP].ks_limit / (2 * sizeof(struct inodedep)));
    1173             : #else
    1174             :         max_softdeps = initialvnodes * 4;
    1175             : #endif
    1176           0 :         arc4random_buf(&softdep_hashkey, sizeof(softdep_hashkey));
    1177           0 :         pagedep_hashtbl = hashinit(initialvnodes / 5, M_PAGEDEP, M_WAITOK,
    1178             :             &pagedep_hash);
    1179           0 :         sema_init(&pagedep_in_progress, "pagedep", PRIBIO, 0);
    1180           0 :         inodedep_hashtbl = hashinit(initialvnodes, M_INODEDEP, M_WAITOK,
    1181             :             &inodedep_hash);
    1182           0 :         sema_init(&inodedep_in_progress, "inodedep", PRIBIO, 0);
    1183           0 :         newblk_hashtbl = hashinit(64, M_NEWBLK, M_WAITOK, &newblk_hash);
    1184           0 :         sema_init(&newblk_in_progress, "newblk", PRIBIO, 0);
    1185           0 :         timeout_set(&proc_waiting_timeout, pause_timer, NULL);
    1186           0 :         pool_init(&pagedep_pool, sizeof(struct pagedep), 0, IPL_NONE,
    1187             :             PR_WAITOK, "pagedep", NULL);
    1188           0 :         pool_init(&inodedep_pool, sizeof(struct inodedep), 0, IPL_NONE,
    1189             :             PR_WAITOK, "inodedep", NULL);
    1190           0 :         pool_init(&newblk_pool, sizeof(struct newblk), 0, IPL_NONE,
    1191             :             PR_WAITOK, "newblk", NULL);
    1192           0 :         pool_init(&bmsafemap_pool, sizeof(struct bmsafemap), 0, IPL_NONE,
    1193             :             PR_WAITOK, "bmsafemap", NULL);
    1194           0 :         pool_init(&allocdirect_pool, sizeof(struct allocdirect), 0, IPL_NONE,
    1195             :             PR_WAITOK, "allocdir", NULL);
    1196           0 :         pool_init(&indirdep_pool, sizeof(struct indirdep), 0, IPL_NONE,
    1197             :             PR_WAITOK, "indirdep", NULL);
    1198           0 :         pool_init(&allocindir_pool, sizeof(struct allocindir), 0, IPL_NONE,
    1199             :             PR_WAITOK, "allocindir", NULL);
    1200           0 :         pool_init(&freefrag_pool, sizeof(struct freefrag), 0, IPL_NONE,
    1201             :             PR_WAITOK, "freefrag", NULL);
    1202           0 :         pool_init(&freeblks_pool, sizeof(struct freeblks), 0, IPL_NONE,
    1203             :             PR_WAITOK, "freeblks", NULL);
    1204           0 :         pool_init(&freefile_pool, sizeof(struct freefile), 0, IPL_NONE,
    1205             :             PR_WAITOK, "freefile", NULL);
    1206           0 :         pool_init(&diradd_pool, sizeof(struct diradd), 0, IPL_NONE,
    1207             :             PR_WAITOK, "diradd", NULL);
    1208           0 :         pool_init(&mkdir_pool, sizeof(struct mkdir), 0, IPL_NONE,
    1209             :             PR_WAITOK, "mkdir", NULL);
    1210           0 :         pool_init(&dirrem_pool, sizeof(struct dirrem), 0, IPL_NONE,
    1211             :             PR_WAITOK, "dirrem", NULL);
    1212           0 :         pool_init(&newdirblk_pool, sizeof(struct newdirblk), 0, IPL_NONE,
    1213             :             PR_WAITOK, "newdirblk", NULL);
    1214           0 : }
    1215             : 
    1216             : /*
    1217             :  * Called at mount time to notify the dependency code that a
    1218             :  * filesystem wishes to use it.
    1219             :  */
    1220             : int
    1221           0 : softdep_mount(struct vnode *devvp, struct mount *mp, struct fs *fs,
    1222             :     struct ucred *cred)
    1223             : {
    1224           0 :         struct csum_total cstotal;
    1225             :         struct cg *cgp;
    1226           0 :         struct buf *bp;
    1227             :         int error, cyl;
    1228             : 
    1229             :         /*
    1230             :          * When doing soft updates, the counters in the
    1231             :          * superblock may have gotten out of sync, so we have
    1232             :          * to scan the cylinder groups and recalculate them.
    1233             :          */
    1234           0 :         if ((fs->fs_flags & FS_UNCLEAN) == 0)
    1235           0 :                 return (0);
    1236           0 :         memset(&cstotal, 0, sizeof(cstotal));
    1237           0 :         for (cyl = 0; cyl < fs->fs_ncg; cyl++) {
    1238           0 :                 if ((error = bread(devvp, fsbtodb(fs, cgtod(fs, cyl)),
    1239           0 :                     fs->fs_cgsize, &bp)) != 0) {
    1240           0 :                         brelse(bp);
    1241           0 :                         return (error);
    1242             :                 }
    1243           0 :                 cgp = (struct cg *)bp->b_data;
    1244           0 :                 cstotal.cs_nffree += cgp->cg_cs.cs_nffree;
    1245           0 :                 cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree;
    1246           0 :                 cstotal.cs_nifree += cgp->cg_cs.cs_nifree;
    1247           0 :                 cstotal.cs_ndir += cgp->cg_cs.cs_ndir;
    1248           0 :                 fs->fs_cs(fs, cyl) = cgp->cg_cs;
    1249           0 :                 brelse(bp);
    1250             :         }
    1251             : #ifdef DEBUG
    1252             :         if (memcmp(&cstotal, &fs->fs_cstotal, sizeof(cstotal)))
    1253             :                 printf("ffs_mountfs: superblock updated for soft updates\n");
    1254             : #endif
    1255           0 :         memcpy(&fs->fs_cstotal, &cstotal, sizeof(cstotal));
    1256           0 :         return (0);
    1257           0 : }
    1258             : 
    1259             : /*
    1260             :  * Protecting the freemaps (or bitmaps).
    1261             :  * 
    1262             :  * To eliminate the need to execute fsck before mounting a file system
    1263             :  * after a power failure, one must (conservatively) guarantee that the
    1264             :  * on-disk copy of the bitmaps never indicate that a live inode or block is
    1265             :  * free.  So, when a block or inode is allocated, the bitmap should be
    1266             :  * updated (on disk) before any new pointers.  When a block or inode is
    1267             :  * freed, the bitmap should not be updated until all pointers have been
    1268             :  * reset.  The latter dependency is handled by the delayed de-allocation
    1269             :  * approach described below for block and inode de-allocation.  The former
    1270             :  * dependency is handled by calling the following procedure when a block or
    1271             :  * inode is allocated. When an inode is allocated an "inodedep" is created
    1272             :  * with its DEPCOMPLETE flag cleared until its bitmap is written to disk.
    1273             :  * Each "inodedep" is also inserted into the hash indexing structure so
    1274             :  * that any additional link additions can be made dependent on the inode
    1275             :  * allocation.
    1276             :  * 
    1277             :  * The ufs file system maintains a number of free block counts (e.g., per
    1278             :  * cylinder group, per cylinder and per <cylinder, rotational position> pair)
    1279             :  * in addition to the bitmaps.  These counts are used to improve efficiency
    1280             :  * during allocation and therefore must be consistent with the bitmaps.
    1281             :  * There is no convenient way to guarantee post-crash consistency of these
    1282             :  * counts with simple update ordering, for two main reasons: (1) The counts
    1283             :  * and bitmaps for a single cylinder group block are not in the same disk
    1284             :  * sector.  If a disk write is interrupted (e.g., by power failure), one may
    1285             :  * be written and the other not.  (2) Some of the counts are located in the
    1286             :  * superblock rather than the cylinder group block. So, we focus our soft
    1287             :  * updates implementation on protecting the bitmaps. When mounting a
    1288             :  * filesystem, we recompute the auxiliary counts from the bitmaps.
    1289             :  */
    1290             : 
    1291             : /*
    1292             :  * Called just after updating the cylinder group block to allocate an inode.
    1293             :  */
    1294             : /* buffer for cylgroup block with inode map */
    1295             : /* inode related to allocation */
    1296             : /* new inode number being allocated */
    1297             : void
    1298           0 : softdep_setup_inomapdep(struct buf *bp, struct inode *ip, ufsino_t newinum)
    1299             : {
    1300           0 :         struct inodedep *inodedep;
    1301             :         struct bmsafemap *bmsafemap;
    1302             : 
    1303             :         /*
    1304             :          * Create a dependency for the newly allocated inode.
    1305             :          * Panic if it already exists as something is seriously wrong.
    1306             :          * Otherwise add it to the dependency list for the buffer holding
    1307             :          * the cylinder group map from which it was allocated.
    1308             :          */
    1309           0 :         ACQUIRE_LOCK(&lk);
    1310           0 :         if (inodedep_lookup(ip->i_fs, newinum, DEPALLOC | NODELAY, &inodedep)
    1311           0 :             != 0) {
    1312           0 :                 FREE_LOCK(&lk);
    1313           0 :                 panic("softdep_setup_inomapdep: found inode");
    1314             :         }
    1315           0 :         inodedep->id_buf = bp;
    1316           0 :         inodedep->id_state &= ~DEPCOMPLETE;
    1317           0 :         bmsafemap = bmsafemap_lookup(bp);
    1318           0 :         LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps);
    1319           0 :         FREE_LOCK(&lk);
    1320           0 : }
    1321             : 
    1322             : /*
    1323             :  * Called just after updating the cylinder group block to
    1324             :  * allocate block or fragment.
    1325             :  */
    1326             : /* buffer for cylgroup block with block map */
    1327             : /* filesystem doing allocation */
    1328             : /* number of newly allocated block */
    1329             : void
    1330           0 : softdep_setup_blkmapdep(struct buf *bp, struct fs *fs, daddr_t newblkno)
    1331             : {
    1332           0 :         struct newblk *newblk;
    1333             :         struct bmsafemap *bmsafemap;
    1334             : 
    1335             :         /*
    1336             :          * Create a dependency for the newly allocated block.
    1337             :          * Add it to the dependency list for the buffer holding
    1338             :          * the cylinder group map from which it was allocated.
    1339             :          */
    1340           0 :         if (newblk_lookup(fs, newblkno, DEPALLOC, &newblk) != 0)
    1341           0 :                 panic("softdep_setup_blkmapdep: found block");
    1342           0 :         ACQUIRE_LOCK(&lk);
    1343           0 :         newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(bp);
    1344           0 :         LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps);
    1345           0 :         FREE_LOCK(&lk);
    1346           0 : }
    1347             : 
    1348             : /*
    1349             :  * Find the bmsafemap associated with a cylinder group buffer.
    1350             :  * If none exists, create one. The buffer must be locked when
    1351             :  * this routine is called and this routine must be called with
    1352             :  * splbio interrupts blocked.
    1353             :  */
    1354             : STATIC struct bmsafemap *
    1355           0 : bmsafemap_lookup(struct buf *bp)
    1356             : {
    1357             :         struct bmsafemap *bmsafemap;
    1358             :         struct worklist *wk;
    1359             : 
    1360           0 :         splassert(IPL_BIO);
    1361             : 
    1362             : #ifdef DEBUG
    1363             :         if (lk.lkt_held == -1)
    1364             :                 panic("bmsafemap_lookup: lock not held");
    1365             : #endif
    1366           0 :         LIST_FOREACH(wk, &bp->b_dep, wk_list)
    1367           0 :                 if (wk->wk_type == D_BMSAFEMAP)
    1368           0 :                         return (WK_BMSAFEMAP(wk));
    1369           0 :         FREE_LOCK(&lk);
    1370           0 :         bmsafemap = pool_get(&bmsafemap_pool, PR_WAITOK);
    1371           0 :         bmsafemap->sm_list.wk_type = D_BMSAFEMAP;
    1372           0 :         bmsafemap->sm_list.wk_state = 0;
    1373           0 :         bmsafemap->sm_buf = bp;
    1374           0 :         LIST_INIT(&bmsafemap->sm_allocdirecthd);
    1375           0 :         LIST_INIT(&bmsafemap->sm_allocindirhd);
    1376           0 :         LIST_INIT(&bmsafemap->sm_inodedephd);
    1377           0 :         LIST_INIT(&bmsafemap->sm_newblkhd);
    1378           0 :         ACQUIRE_LOCK(&lk);
    1379           0 :         WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list);
    1380           0 :         return (bmsafemap);
    1381           0 : }
    1382             : 
    1383             : /*
    1384             :  * Direct block allocation dependencies.
    1385             :  * 
    1386             :  * When a new block is allocated, the corresponding disk locations must be
    1387             :  * initialized (with zeros or new data) before the on-disk inode points to
    1388             :  * them.  Also, the freemap from which the block was allocated must be
    1389             :  * updated (on disk) before the inode's pointer. These two dependencies are
    1390             :  * independent of each other and are needed for all file blocks and indirect
    1391             :  * blocks that are pointed to directly by the inode.  Just before the
    1392             :  * "in-core" version of the inode is updated with a newly allocated block
    1393             :  * number, a procedure (below) is called to setup allocation dependency
    1394             :  * structures.  These structures are removed when the corresponding
    1395             :  * dependencies are satisfied or when the block allocation becomes obsolete
    1396             :  * (i.e., the file is deleted, the block is de-allocated, or the block is a
    1397             :  * fragment that gets upgraded).  All of these cases are handled in
    1398             :  * procedures described later.
    1399             :  * 
    1400             :  * When a file extension causes a fragment to be upgraded, either to a larger
    1401             :  * fragment or to a full block, the on-disk location may change (if the
    1402             :  * previous fragment could not simply be extended). In this case, the old
    1403             :  * fragment must be de-allocated, but not until after the inode's pointer has
    1404             :  * been updated. In most cases, this is handled by later procedures, which
    1405             :  * will construct a "freefrag" structure to be added to the workitem queue
    1406             :  * when the inode update is complete (or obsolete).  The main exception to
    1407             :  * this is when an allocation occurs while a pending allocation dependency
    1408             :  * (for the same block pointer) remains.  This case is handled in the main
    1409             :  * allocation dependency setup procedure by immediately freeing the
    1410             :  * unreferenced fragments.
    1411             :  */ 
    1412             : /* inode to which block is being added */
    1413             : /* block pointer within inode */
    1414             : /* disk block number being added */
    1415             : /* previous block number, 0 unless frag */
    1416             : /* size of new block */
    1417             : /* size of new block */
    1418             : /* bp for allocated block */
    1419             : void 
    1420           0 : softdep_setup_allocdirect(struct inode *ip, daddr_t lbn, daddr_t newblkno,
    1421             :     daddr_t oldblkno, long newsize, long oldsize, struct buf *bp)
    1422             : {
    1423             :         struct allocdirect *adp, *oldadp;
    1424             :         struct allocdirectlst *adphead;
    1425             :         struct bmsafemap *bmsafemap;
    1426           0 :         struct inodedep *inodedep;
    1427           0 :         struct pagedep *pagedep;
    1428           0 :         struct newblk *newblk;
    1429             : 
    1430           0 :         adp = pool_get(&allocdirect_pool, PR_WAITOK | PR_ZERO);
    1431           0 :         adp->ad_list.wk_type = D_ALLOCDIRECT;
    1432           0 :         adp->ad_lbn = lbn;
    1433           0 :         adp->ad_newblkno = newblkno;
    1434           0 :         adp->ad_oldblkno = oldblkno;
    1435           0 :         adp->ad_newsize = newsize;
    1436           0 :         adp->ad_oldsize = oldsize;
    1437           0 :         adp->ad_state = ATTACHED;
    1438           0 :         LIST_INIT(&adp->ad_newdirblk);
    1439           0 :         if (newblkno == oldblkno)
    1440           0 :                 adp->ad_freefrag = NULL;
    1441             :         else
    1442           0 :                 adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize);
    1443             : 
    1444           0 :         if (newblk_lookup(ip->i_fs, newblkno, 0, &newblk) == 0)
    1445           0 :                 panic("softdep_setup_allocdirect: lost block");
    1446             : 
    1447           0 :         ACQUIRE_LOCK(&lk);
    1448           0 :         inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC | NODELAY, &inodedep);
    1449           0 :         adp->ad_inodedep = inodedep;
    1450             : 
    1451           0 :         if (newblk->nb_state == DEPCOMPLETE) {
    1452           0 :                 adp->ad_state |= DEPCOMPLETE;
    1453           0 :                 adp->ad_buf = NULL;
    1454           0 :         } else {
    1455           0 :                 bmsafemap = newblk->nb_bmsafemap;
    1456           0 :                 adp->ad_buf = bmsafemap->sm_buf;
    1457           0 :                 LIST_REMOVE(newblk, nb_deps);
    1458           0 :                 LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps);
    1459             :         }
    1460           0 :         LIST_REMOVE(newblk, nb_hash);
    1461           0 :         pool_put(&newblk_pool, newblk);
    1462             : 
    1463           0 :         if (bp == NULL) {
    1464             :                 /*
    1465             :                  * XXXUBC - Yes, I know how to fix this, but not right now.
    1466             :                  */
    1467           0 :                 panic("softdep_setup_allocdirect: Bonk art in the head");
    1468             :         }
    1469           0 :         WORKLIST_INSERT(&bp->b_dep, &adp->ad_list);
    1470           0 :         if (lbn >= NDADDR) {
    1471             :                 /* allocating an indirect block */
    1472           0 :                 if (oldblkno != 0) {
    1473           0 :                         FREE_LOCK(&lk);
    1474           0 :                         panic("softdep_setup_allocdirect: non-zero indir");
    1475             :                 }
    1476             :         } else {
    1477             :                 /*
    1478             :                  * Allocating a direct block.
    1479             :                  *
    1480             :                  * If we are allocating a directory block, then we must
    1481             :                  * allocate an associated pagedep to track additions and
    1482             :                  * deletions.
    1483             :                  */
    1484           0 :                 if ((DIP(ip, mode) & IFMT) == IFDIR &&
    1485           0 :                     pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
    1486           0 :                         WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
    1487             :         }
    1488             :         /*
    1489             :          * The list of allocdirects must be kept in sorted and ascending
    1490             :          * order so that the rollback routines can quickly determine the
    1491             :          * first uncommitted block (the size of the file stored on disk
    1492             :          * ends at the end of the lowest committed fragment, or if there
    1493             :          * are no fragments, at the end of the highest committed block).
    1494             :          * Since files generally grow, the typical case is that the new
    1495             :          * block is to be added at the end of the list. We speed this
    1496             :          * special case by checking against the last allocdirect in the
    1497             :          * list before laboriously traversing the list looking for the
    1498             :          * insertion point.
    1499             :          */
    1500           0 :         adphead = &inodedep->id_newinoupdt;
    1501           0 :         oldadp = TAILQ_LAST(adphead, allocdirectlst);
    1502           0 :         if (oldadp == NULL || oldadp->ad_lbn <= lbn) {
    1503             :                 /* insert at end of list */
    1504           0 :                 TAILQ_INSERT_TAIL(adphead, adp, ad_next);
    1505           0 :                 if (oldadp != NULL && oldadp->ad_lbn == lbn)
    1506           0 :                         allocdirect_merge(adphead, adp, oldadp);
    1507           0 :                 FREE_LOCK(&lk);
    1508           0 :                 return;
    1509             :         }
    1510           0 :         TAILQ_FOREACH(oldadp, adphead, ad_next) {
    1511           0 :                 if (oldadp->ad_lbn >= lbn)
    1512             :                         break;
    1513             :         }
    1514           0 :         if (oldadp == NULL) {
    1515           0 :                 FREE_LOCK(&lk);
    1516           0 :                 panic("softdep_setup_allocdirect: lost entry");
    1517             :         }
    1518             :         /* insert in middle of list */
    1519           0 :         TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);
    1520           0 :         if (oldadp->ad_lbn == lbn)
    1521           0 :                 allocdirect_merge(adphead, adp, oldadp);
    1522           0 :         FREE_LOCK(&lk);
    1523           0 : }
    1524             : 
    1525             : /*
    1526             :  * Replace an old allocdirect dependency with a newer one.
    1527             :  * This routine must be called with splbio interrupts blocked.
    1528             :  */
    1529             : /* head of list holding allocdirects */
    1530             : /* allocdirect being added */
    1531             : /* existing allocdirect being checked */
    1532             : STATIC void
    1533           0 : allocdirect_merge(struct allocdirectlst *adphead, struct allocdirect *newadp,
    1534             :     struct allocdirect *oldadp)
    1535             : {
    1536             :         struct worklist *wk;
    1537             :         struct freefrag *freefrag;
    1538             :         struct newdirblk *newdirblk;
    1539             : 
    1540           0 :         splassert(IPL_BIO);
    1541             : 
    1542             : #ifdef DEBUG
    1543             :         if (lk.lkt_held == -1)
    1544             :                 panic("allocdirect_merge: lock not held");
    1545             : #endif
    1546           0 :         if (newadp->ad_oldblkno != oldadp->ad_newblkno ||
    1547           0 :             newadp->ad_oldsize != oldadp->ad_newsize ||
    1548           0 :             newadp->ad_lbn >= NDADDR) {
    1549           0 :                 FREE_LOCK(&lk);
    1550           0 :                 panic("allocdirect_merge: old %lld != new %lld || lbn %lld >= "
    1551           0 :                     "%d", (long long)newadp->ad_oldblkno,
    1552           0 :                     (long long)oldadp->ad_newblkno, (long long)newadp->ad_lbn,
    1553             :                     NDADDR);
    1554             :         }
    1555           0 :         newadp->ad_oldblkno = oldadp->ad_oldblkno;
    1556           0 :         newadp->ad_oldsize = oldadp->ad_oldsize;
    1557             :         /*
    1558             :          * If the old dependency had a fragment to free or had never
    1559             :          * previously had a block allocated, then the new dependency
    1560             :          * can immediately post its freefrag and adopt the old freefrag.
    1561             :          * This action is done by swapping the freefrag dependencies.
    1562             :          * The new dependency gains the old one's freefrag, and the
    1563             :          * old one gets the new one and then immediately puts it on
    1564             :          * the worklist when it is freed by free_allocdirect. It is
    1565             :          * not possible to do this swap when the old dependency had a
    1566             :          * non-zero size but no previous fragment to free. This condition
    1567             :          * arises when the new block is an extension of the old block.
    1568             :          * Here, the first part of the fragment allocated to the new
    1569             :          * dependency is part of the block currently claimed on disk by
    1570             :          * the old dependency, so cannot legitimately be freed until the
    1571             :          * conditions for the new dependency are fulfilled.
    1572             :          */
    1573           0 :         if (oldadp->ad_freefrag != NULL || oldadp->ad_oldblkno == 0) {
    1574           0 :                 freefrag = newadp->ad_freefrag;
    1575           0 :                 newadp->ad_freefrag = oldadp->ad_freefrag;
    1576           0 :                 oldadp->ad_freefrag = freefrag;
    1577           0 :         }
    1578             :         /*
    1579             :          * If we are tracking a new directory-block allocation,
    1580             :          * move it from the old allocdirect to the new allocdirect.
    1581             :          */
    1582           0 :         if ((wk = LIST_FIRST(&oldadp->ad_newdirblk)) != NULL) {
    1583           0 :                 newdirblk = WK_NEWDIRBLK(wk);
    1584           0 :                 WORKLIST_REMOVE(&newdirblk->db_list);
    1585           0 :                 if (LIST_FIRST(&oldadp->ad_newdirblk) != NULL)
    1586           0 :                         panic("allocdirect_merge: extra newdirblk");
    1587           0 :                 WORKLIST_INSERT(&newadp->ad_newdirblk, &newdirblk->db_list);
    1588           0 :         }
    1589           0 :         free_allocdirect(adphead, oldadp, 0);
    1590           0 : }
    1591             :                 
    1592             : /*
    1593             :  * Allocate a new freefrag structure if needed.
    1594             :  */
    1595             : STATIC struct freefrag *
    1596           0 : newfreefrag(struct inode *ip, daddr_t blkno, long size)
    1597             : {
    1598             :         struct freefrag *freefrag;
    1599             :         struct fs *fs;
    1600             : 
    1601           0 :         if (blkno == 0)
    1602           0 :                 return (NULL);
    1603           0 :         fs = ip->i_fs;
    1604           0 :         if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag)
    1605           0 :                 panic("newfreefrag: frag size");
    1606           0 :         freefrag = pool_get(&freefrag_pool, PR_WAITOK);
    1607           0 :         freefrag->ff_list.wk_type = D_FREEFRAG;
    1608           0 :         freefrag->ff_state = DIP(ip, uid) & ~ONWORKLIST; /* used below */
    1609           0 :         freefrag->ff_inum = ip->i_number;
    1610           0 :         freefrag->ff_mnt = ITOV(ip)->v_mount;
    1611           0 :         freefrag->ff_devvp = ip->i_devvp;
    1612           0 :         freefrag->ff_blkno = blkno;
    1613           0 :         freefrag->ff_fragsize = size;
    1614           0 :         return (freefrag);
    1615           0 : }
    1616             : 
    1617             : /*
    1618             :  * This workitem de-allocates fragments that were replaced during
    1619             :  * file block allocation.
    1620             :  */
    1621             : STATIC void 
    1622           0 : handle_workitem_freefrag(struct freefrag *freefrag)
    1623             : {
    1624           0 :         struct inode tip;
    1625           0 :         struct ufs1_dinode dtip1;
    1626             : 
    1627           0 :         tip.i_vnode = NULL;
    1628           0 :         tip.i_din1 = &dtip1;
    1629           0 :         tip.i_fs = VFSTOUFS(freefrag->ff_mnt)->um_fs;
    1630           0 :         tip.i_ump = VFSTOUFS(freefrag->ff_mnt);
    1631           0 :         tip.i_dev = freefrag->ff_devvp->v_rdev;
    1632           0 :         tip.i_number = freefrag->ff_inum;
    1633           0 :         tip.i_ffs1_uid = freefrag->ff_state & ~ONWORKLIST; /* set above */
    1634           0 :         ffs_blkfree(&tip, freefrag->ff_blkno, freefrag->ff_fragsize);
    1635           0 :         pool_put(&freefrag_pool, freefrag);
    1636           0 : }
    1637             : 
    1638             : /*
    1639             :  * Indirect block allocation dependencies.
    1640             :  * 
    1641             :  * The same dependencies that exist for a direct block also exist when
    1642             :  * a new block is allocated and pointed to by an entry in a block of
    1643             :  * indirect pointers. The undo/redo states described above are also
    1644             :  * used here. Because an indirect block contains many pointers that
    1645             :  * may have dependencies, a second copy of the entire in-memory indirect
    1646             :  * block is kept. The buffer cache copy is always completely up-to-date.
    1647             :  * The second copy, which is used only as a source for disk writes,
    1648             :  * contains only the safe pointers (i.e., those that have no remaining
    1649             :  * update dependencies). The second copy is freed when all pointers
    1650             :  * are safe. The cache is not allowed to replace indirect blocks with
    1651             :  * pending update dependencies. If a buffer containing an indirect
    1652             :  * block with dependencies is written, these routines will mark it
    1653             :  * dirty again. It can only be successfully written once all the
    1654             :  * dependencies are removed. The ffs_fsync routine in conjunction with
    1655             :  * softdep_sync_metadata work together to get all the dependencies
    1656             :  * removed so that a file can be successfully written to disk. Three
    1657             :  * procedures are used when setting up indirect block pointer
    1658             :  * dependencies. The division is necessary because of the organization
    1659             :  * of the "balloc" routine and because of the distinction between file
    1660             :  * pages and file metadata blocks.
    1661             :  */
    1662             : 
    1663             : /*
    1664             :  * Allocate a new allocindir structure.
    1665             :  */
    1666             : /* inode for file being extended */
    1667             : /* offset of pointer in indirect block */
    1668             : /* disk block number being added */
    1669             : /* previous block number, 0 if none */
    1670             : STATIC struct allocindir *
    1671           0 : newallocindir(struct inode *ip, int ptrno, daddr_t newblkno,
    1672             :     daddr_t oldblkno)
    1673             : {
    1674             :         struct allocindir *aip;
    1675             : 
    1676           0 :         aip = pool_get(&allocindir_pool, PR_WAITOK | PR_ZERO);
    1677           0 :         aip->ai_list.wk_type = D_ALLOCINDIR;
    1678           0 :         aip->ai_state = ATTACHED;
    1679           0 :         aip->ai_offset = ptrno;
    1680           0 :         aip->ai_newblkno = newblkno;
    1681           0 :         aip->ai_oldblkno = oldblkno;
    1682           0 :         aip->ai_freefrag = newfreefrag(ip, oldblkno, ip->i_fs->fs_bsize);
    1683           0 :         return (aip);
    1684             : }
    1685             : 
    1686             : /*
    1687             :  * Called just before setting an indirect block pointer
    1688             :  * to a newly allocated file page.
    1689             :  */
    1690             : /* inode for file being extended */
    1691             : /* allocated block number within file */
    1692             : /* buffer with indirect blk referencing page */
    1693             : /* offset of pointer in indirect block */
    1694             : /* disk block number being added */
    1695             : /* previous block number, 0 if none */
    1696             : /* buffer holding allocated page */
    1697             : void
    1698           0 : softdep_setup_allocindir_page(struct inode *ip, daddr_t lbn, struct buf *bp,
    1699             :     int ptrno, daddr_t newblkno, daddr_t oldblkno, struct buf *nbp)
    1700             : {
    1701             :         struct allocindir *aip;
    1702           0 :         struct pagedep *pagedep;
    1703             : 
    1704           0 :         aip = newallocindir(ip, ptrno, newblkno, oldblkno);
    1705           0 :         ACQUIRE_LOCK(&lk);
    1706             :         /*
    1707             :          * If we are allocating a directory page, then we must
    1708             :          * allocate an associated pagedep to track additions and
    1709             :          * deletions.
    1710             :          */
    1711           0 :         if ((DIP(ip, mode) & IFMT) == IFDIR &&
    1712           0 :             pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
    1713           0 :                 WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list);
    1714           0 :         if (nbp == NULL) {
    1715             :                 /*
    1716             :                  * XXXUBC - Yes, I know how to fix this, but not right now.
    1717             :                  */
    1718           0 :                 panic("softdep_setup_allocindir_page: Bonk art in the head");
    1719             :         }
    1720           0 :         WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
    1721           0 :         FREE_LOCK(&lk);
    1722           0 :         setup_allocindir_phase2(bp, ip, aip);
    1723           0 : }
    1724             : 
    1725             : /*
    1726             :  * Called just before setting an indirect block pointer to a
    1727             :  * newly allocated indirect block.
    1728             :  */
    1729             : /* newly allocated indirect block */
    1730             : /* inode for file being extended */
    1731             : /* indirect block referencing allocated block */
    1732             : /* offset of pointer in indirect block */
    1733             : /* disk block number being added */
    1734             : void
    1735           0 : softdep_setup_allocindir_meta(struct buf *nbp, struct inode *ip,
    1736             :     struct buf *bp, int ptrno, daddr_t newblkno)
    1737             : {
    1738             :         struct allocindir *aip;
    1739             : 
    1740           0 :         aip = newallocindir(ip, ptrno, newblkno, 0);
    1741           0 :         ACQUIRE_LOCK(&lk);
    1742           0 :         WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
    1743           0 :         FREE_LOCK(&lk);
    1744           0 :         setup_allocindir_phase2(bp, ip, aip);
    1745           0 : }
    1746             : 
    1747             : /*
    1748             :  * Called to finish the allocation of the "aip" allocated
    1749             :  * by one of the two routines above.
    1750             :  */
    1751             : /* in-memory copy of the indirect block */
    1752             : /* inode for file being extended */
    1753             : /* allocindir allocated by the above routines */
    1754             : STATIC void 
    1755           0 : setup_allocindir_phase2(struct buf *bp, struct inode *ip,
    1756             :     struct allocindir *aip)
    1757             : {
    1758             :         struct worklist *wk;
    1759             :         struct indirdep *indirdep, *newindirdep;
    1760             :         struct bmsafemap *bmsafemap;
    1761             :         struct allocindir *oldaip;
    1762             :         struct freefrag *freefrag;
    1763           0 :         struct newblk *newblk;
    1764             : 
    1765           0 :         if (bp->b_lblkno >= 0)
    1766           0 :                 panic("setup_allocindir_phase2: not indir blk");
    1767           0 :         for (indirdep = NULL, newindirdep = NULL; ; ) {
    1768           0 :                 ACQUIRE_LOCK(&lk);
    1769           0 :                 LIST_FOREACH(wk, &bp->b_dep, wk_list) {
    1770           0 :                         if (wk->wk_type != D_INDIRDEP)
    1771             :                                 continue;
    1772           0 :                         indirdep = WK_INDIRDEP(wk);
    1773           0 :                         break;
    1774             :                 }
    1775           0 :                 if (indirdep == NULL && newindirdep) {
    1776             :                         indirdep = newindirdep;
    1777           0 :                         WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list);
    1778             :                         newindirdep = NULL;
    1779           0 :                 }
    1780           0 :                 FREE_LOCK(&lk);
    1781           0 :                 if (indirdep) {
    1782           0 :                         if (newblk_lookup(ip->i_fs, aip->ai_newblkno, 0,
    1783           0 :                             &newblk) == 0)
    1784           0 :                                 panic("setup_allocindir: lost block");
    1785           0 :                         ACQUIRE_LOCK(&lk);
    1786           0 :                         if (newblk->nb_state == DEPCOMPLETE) {
    1787           0 :                                 aip->ai_state |= DEPCOMPLETE;
    1788           0 :                                 aip->ai_buf = NULL;
    1789           0 :                         } else {
    1790           0 :                                 bmsafemap = newblk->nb_bmsafemap;
    1791           0 :                                 aip->ai_buf = bmsafemap->sm_buf;
    1792           0 :                                 LIST_REMOVE(newblk, nb_deps);
    1793           0 :                                 LIST_INSERT_HEAD(&bmsafemap->sm_allocindirhd,
    1794             :                                     aip, ai_deps);
    1795             :                         }
    1796           0 :                         LIST_REMOVE(newblk, nb_hash);
    1797           0 :                         pool_put(&newblk_pool, newblk);
    1798           0 :                         aip->ai_indirdep = indirdep;
    1799             :                         /*
    1800             :                          * Check to see if there is an existing dependency
    1801             :                          * for this block. If there is, merge the old
    1802             :                          * dependency into the new one.
    1803             :                          */
    1804           0 :                         if (aip->ai_oldblkno == 0)
    1805           0 :                                 oldaip = NULL;
    1806             :                         else
    1807             : 
    1808           0 :                                 LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, ai_next)
    1809           0 :                                         if (oldaip->ai_offset == aip->ai_offset)
    1810             :                                                 break;
    1811             :                         freefrag = NULL;
    1812           0 :                         if (oldaip != NULL) {
    1813           0 :                                 if (oldaip->ai_newblkno != aip->ai_oldblkno) {
    1814           0 :                                         FREE_LOCK(&lk);
    1815           0 :                                         panic("setup_allocindir_phase2: blkno");
    1816             :                                 }
    1817           0 :                                 aip->ai_oldblkno = oldaip->ai_oldblkno;
    1818           0 :                                 freefrag = aip->ai_freefrag;
    1819           0 :                                 aip->ai_freefrag = oldaip->ai_freefrag;
    1820           0 :                                 oldaip->ai_freefrag = NULL;
    1821           0 :                                 free_allocindir(oldaip, NULL);
    1822           0 :                         }
    1823           0 :                         LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next);
    1824           0 :                         if (ip->i_ump->um_fstype == UM_UFS1)
    1825           0 :                                 ((int32_t *)indirdep->ir_savebp->b_data)
    1826           0 :                                     [aip->ai_offset] = aip->ai_oldblkno;
    1827             :                         else
    1828           0 :                                 ((int64_t *)indirdep->ir_savebp->b_data)
    1829           0 :                                     [aip->ai_offset] = aip->ai_oldblkno;
    1830           0 :                         FREE_LOCK(&lk);
    1831           0 :                         if (freefrag != NULL)
    1832           0 :                                 handle_workitem_freefrag(freefrag);
    1833             :                 }
    1834           0 :                 if (newindirdep) {
    1835           0 :                         if (indirdep->ir_savebp != NULL)
    1836           0 :                                 brelse(newindirdep->ir_savebp);
    1837           0 :                         WORKITEM_FREE(newindirdep, D_INDIRDEP);
    1838           0 :                 }
    1839           0 :                 if (indirdep)
    1840             :                         break;
    1841           0 :                 newindirdep = pool_get(&indirdep_pool, PR_WAITOK);
    1842           0 :                 newindirdep->ir_list.wk_type = D_INDIRDEP;
    1843           0 :                 newindirdep->ir_state = ATTACHED;
    1844           0 :                 if (ip->i_ump->um_fstype == UM_UFS1)
    1845           0 :                         newindirdep->ir_state |= UFS1FMT;
    1846           0 :                 LIST_INIT(&newindirdep->ir_deplisthd);
    1847           0 :                 LIST_INIT(&newindirdep->ir_donehd);
    1848           0 :                 if (bp->b_blkno == bp->b_lblkno) {
    1849           0 :                         VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno,
    1850             :                                 NULL);
    1851           0 :                 }
    1852           0 :                 newindirdep->ir_savebp =
    1853           0 :                     getblk(ip->i_devvp, bp->b_blkno, bp->b_bcount, 0, 0);
    1854             : #if 0
    1855             :                 BUF_KERNPROC(newindirdep->ir_savebp);
    1856             : #endif
    1857           0 :                 memcpy(newindirdep->ir_savebp->b_data, bp->b_data, bp->b_bcount);
    1858             :         }
    1859           0 : }
    1860             : 
    1861             : /*
    1862             :  * Block de-allocation dependencies.
    1863             :  * 
    1864             :  * When blocks are de-allocated, the on-disk pointers must be nullified before
    1865             :  * the blocks are made available for use by other files.  (The true
    1866             :  * requirement is that old pointers must be nullified before new on-disk
    1867             :  * pointers are set.  We chose this slightly more stringent requirement to
    1868             :  * reduce complexity.) Our implementation handles this dependency by updating
    1869             :  * the inode (or indirect block) appropriately but delaying the actual block
    1870             :  * de-allocation (i.e., freemap and free space count manipulation) until
    1871             :  * after the updated versions reach stable storage.  After the disk is
    1872             :  * updated, the blocks can be safely de-allocated whenever it is convenient.
    1873             :  * This implementation handles only the common case of reducing a file's
    1874             :  * length to zero. Other cases are handled by the conventional synchronous
    1875             :  * write approach.
    1876             :  *
    1877             :  * The ffs implementation with which we worked double-checks
    1878             :  * the state of the block pointers and file size as it reduces
    1879             :  * a file's length.  Some of this code is replicated here in our
    1880             :  * soft updates implementation.  The freeblks->fb_chkcnt field is
    1881             :  * used to transfer a part of this information to the procedure
    1882             :  * that eventually de-allocates the blocks.
    1883             :  *
    1884             :  * This routine should be called from the routine that shortens
    1885             :  * a file's length, before the inode's size or block pointers
    1886             :  * are modified. It will save the block pointer information for
    1887             :  * later release and zero the inode so that the calling routine
    1888             :  * can release it.
    1889             :  */
    1890             : /* The inode whose length is to be reduced */
    1891             : /* The new length for the file */
    1892             : void
    1893           0 : softdep_setup_freeblocks(struct inode *ip, off_t length)
    1894             : {
    1895             :         struct freeblks *freeblks;
    1896           0 :         struct inodedep *inodedep;
    1897             :         struct allocdirect *adp;
    1898             :         struct vnode *vp;
    1899           0 :         struct buf *bp;
    1900             :         struct fs *fs;
    1901             :         int i, delay, error;
    1902             : 
    1903           0 :         fs = ip->i_fs;
    1904           0 :         if (length != 0)
    1905           0 :                 panic("softdep_setup_freeblocks: non-zero length");
    1906           0 :         freeblks = pool_get(&freeblks_pool, PR_WAITOK | PR_ZERO);
    1907           0 :         freeblks->fb_list.wk_type = D_FREEBLKS;
    1908           0 :         freeblks->fb_state = ATTACHED;
    1909           0 :         freeblks->fb_uid = DIP(ip, uid);
    1910           0 :         freeblks->fb_previousinum = ip->i_number;
    1911           0 :         freeblks->fb_devvp = ip->i_devvp;
    1912           0 :         freeblks->fb_mnt = ITOV(ip)->v_mount;
    1913           0 :         freeblks->fb_oldsize = DIP(ip, size);
    1914           0 :         freeblks->fb_newsize = length;
    1915           0 :         freeblks->fb_chkcnt = DIP(ip, blocks);
    1916             : 
    1917           0 :         for (i = 0; i < NDADDR; i++) {
    1918           0 :                 freeblks->fb_dblks[i] = DIP(ip, db[i]);
    1919           0 :                 DIP_ASSIGN(ip, db[i], 0);
    1920             :         }
    1921             : 
    1922           0 :         for (i = 0; i < NIADDR; i++) {
    1923           0 :                 freeblks->fb_iblks[i] = DIP(ip, ib[i]);
    1924           0 :                 DIP_ASSIGN(ip, ib[i], 0);
    1925             :         }
    1926             : 
    1927           0 :         DIP_ASSIGN(ip, blocks, 0);
    1928           0 :         DIP_ASSIGN(ip, size, 0);
    1929             : 
    1930             :         /*
    1931             :          * Push the zero'ed inode to to its disk buffer so that we are free
    1932             :          * to delete its dependencies below. Once the dependencies are gone
    1933             :          * the buffer can be safely released.
    1934             :          */
    1935           0 :         if ((error = bread(ip->i_devvp,
    1936           0 :             fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
    1937           0 :             (int)fs->fs_bsize, &bp)) != 0)
    1938           0 :                 softdep_error("softdep_setup_freeblocks", error);
    1939             : 
    1940           0 :         if (ip->i_ump->um_fstype == UM_UFS1)
    1941           0 :                 *((struct ufs1_dinode *) bp->b_data +
    1942           0 :                     ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;
    1943             :         else
    1944           0 :                 *((struct ufs2_dinode *) bp->b_data +
    1945           0 :                     ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
    1946             : 
    1947             :         /*
    1948             :          * Find and eliminate any inode dependencies.
    1949             :          */
    1950           0 :         ACQUIRE_LOCK(&lk);
    1951           0 :         (void) inodedep_lookup(fs, ip->i_number, DEPALLOC, &inodedep);
    1952           0 :         if ((inodedep->id_state & IOSTARTED) != 0) {
    1953           0 :                 FREE_LOCK(&lk);
    1954           0 :                 panic("softdep_setup_freeblocks: inode busy");
    1955             :         }
    1956             :         /*
    1957             :          * Add the freeblks structure to the list of operations that
    1958             :          * must await the zero'ed inode being written to disk. If we
    1959             :          * still have a bitmap dependency (delay == 0), then the inode
    1960             :          * has never been written to disk, so we can process the
    1961             :          * freeblks below once we have deleted the dependencies.
    1962             :          */
    1963           0 :         delay = (inodedep->id_state & DEPCOMPLETE);
    1964           0 :         if (delay)
    1965           0 :                 WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list);
    1966             :         /*
    1967             :          * Because the file length has been truncated to zero, any
    1968             :          * pending block allocation dependency structures associated
    1969             :          * with this inode are obsolete and can simply be de-allocated.
    1970             :          * We must first merge the two dependency lists to get rid of
    1971             :          * any duplicate freefrag structures, then purge the merged list.
    1972             :          * If we still have a bitmap dependency, then the inode has never
    1973             :          * been written to disk, so we can free any fragments without delay.
    1974             :          */
    1975           0 :         merge_inode_lists(inodedep);
    1976           0 :         while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
    1977           0 :                 free_allocdirect(&inodedep->id_inoupdt, adp, delay);
    1978           0 :         FREE_LOCK(&lk);
    1979           0 :         bdwrite(bp);
    1980             :         /*
    1981             :          * We must wait for any I/O in progress to finish so that
    1982             :          * all potential buffers on the dirty list will be visible.
    1983             :          * Once they are all there, walk the list and get rid of
    1984             :          * any dependencies.
    1985             :          */
    1986           0 :         vp = ITOV(ip);
    1987           0 :         ACQUIRE_LOCK(&lk);
    1988           0 :         drain_output(vp, 1);
    1989           0 :         while ((bp = LIST_FIRST(&vp->v_dirtyblkhd))) {
    1990           0 :                 if (getdirtybuf(bp, MNT_WAIT) <= 0)
    1991             :                         break;
    1992           0 :                 (void) inodedep_lookup(fs, ip->i_number, 0, &inodedep);
    1993           0 :                 deallocate_dependencies(bp, inodedep);
    1994           0 :                 bp->b_flags |= B_INVAL | B_NOCACHE;
    1995           0 :                 FREE_LOCK(&lk);
    1996           0 :                 brelse(bp);
    1997           0 :                 ACQUIRE_LOCK(&lk);
    1998             :         }
    1999           0 :         if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0)
    2000           0 :                 (void) free_inodedep(inodedep);
    2001             : 
    2002           0 :         if (delay) {
    2003           0 :                 freeblks->fb_state |= DEPCOMPLETE;
    2004             :                 /*
    2005             :                  * If the inode with zeroed block pointers is now on disk we
    2006             :                  * can start freeing blocks. Add freeblks to the worklist
    2007             :                  * instead of calling handle_workitem_freeblocks() directly as
    2008             :                  * it is more likely that additional IO is needed to complete
    2009             :                  * the request than in the !delay case.
    2010             :                  */
    2011           0 :                 if ((freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE)
    2012           0 :                         add_to_worklist(&freeblks->fb_list);
    2013             :         }
    2014             : 
    2015           0 :         FREE_LOCK(&lk);
    2016             :         /*
    2017             :          * If the inode has never been written to disk (delay == 0),
    2018             :          * then we can process the freeblks now that we have deleted
    2019             :          * the dependencies.
    2020             :          */
    2021           0 :         if (!delay)
    2022           0 :                 handle_workitem_freeblocks(freeblks);
    2023           0 : }
    2024             : 
    2025             : /*
    2026             :  * Reclaim any dependency structures from a buffer that is about to
    2027             :  * be reallocated to a new vnode. The buffer must be locked, thus,
    2028             :  * no I/O completion operations can occur while we are manipulating
    2029             :  * its associated dependencies. The mutex is held so that other I/O's
    2030             :  * associated with related dependencies do not occur.
    2031             :  */
    2032             : STATIC void
    2033           0 : deallocate_dependencies(struct buf *bp, struct inodedep *inodedep)
    2034             : {
    2035             :         struct worklist *wk;
    2036             :         struct indirdep *indirdep;
    2037             :         struct allocindir *aip;
    2038             :         struct pagedep *pagedep;
    2039             :         struct dirrem *dirrem;
    2040             :         struct diradd *dap;
    2041             :         int i;
    2042             : 
    2043           0 :         while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {
    2044           0 :                 switch (wk->wk_type) {
    2045             : 
    2046             :                 case D_INDIRDEP:
    2047           0 :                         indirdep = WK_INDIRDEP(wk);
    2048             :                         /*
    2049             :                          * None of the indirect pointers will ever be visible,
    2050             :                          * so they can simply be tossed. GOINGAWAY ensures
    2051             :                          * that allocated pointers will be saved in the buffer
    2052             :                          * cache until they are freed. Note that they will
    2053             :                          * only be able to be found by their physical address
    2054             :                          * since the inode mapping the logical address will
    2055             :                          * be gone. The save buffer used for the safe copy
    2056             :                          * was allocated in setup_allocindir_phase2 using
    2057             :                          * the physical address so it could be used for this
    2058             :                          * purpose. Hence we swap the safe copy with the real
    2059             :                          * copy, allowing the safe copy to be freed and holding
    2060             :                          * on to the real copy for later use in indir_trunc.
    2061             :                          */
    2062           0 :                         if (indirdep->ir_state & GOINGAWAY) {
    2063           0 :                                 FREE_LOCK(&lk);
    2064           0 :                                 panic("deallocate_dependencies: already gone");
    2065             :                         }
    2066           0 :                         indirdep->ir_state |= GOINGAWAY;
    2067           0 :                         while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)))
    2068           0 :                                 free_allocindir(aip, inodedep);
    2069           0 :                         if (bp->b_lblkno >= 0 ||
    2070           0 :                             bp->b_blkno != indirdep->ir_savebp->b_lblkno) {
    2071           0 :                                 FREE_LOCK(&lk);
    2072           0 :                                 panic("deallocate_dependencies: not indir");
    2073             :                         }
    2074           0 :                         memcpy(indirdep->ir_savebp->b_data, bp->b_data,
    2075             :                             bp->b_bcount);
    2076           0 :                         WORKLIST_REMOVE(wk);
    2077           0 :                         WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, wk);
    2078           0 :                         continue;
    2079             : 
    2080             :                 case D_PAGEDEP:
    2081           0 :                         pagedep = WK_PAGEDEP(wk);
    2082             :                         /*
    2083             :                          * None of the directory additions will ever be
    2084             :                          * visible, so they can simply be tossed.
    2085             :                          */
    2086           0 :                         for (i = 0; i < DAHASHSZ; i++)
    2087           0 :                                 while ((dap =
    2088           0 :                                     LIST_FIRST(&pagedep->pd_diraddhd[i])))
    2089           0 :                                         free_diradd(dap);
    2090           0 :                         while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)))
    2091           0 :                                 free_diradd(dap);
    2092             :                         /*
    2093             :                          * Copy any directory remove dependencies to the list
    2094             :                          * to be processed after the zero'ed inode is written.
    2095             :                          * If the inode has already been written, then they 
    2096             :                          * can be dumped directly onto the work list.
    2097             :                          */
    2098           0 :                         while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd))) {
    2099           0 :                                 LIST_REMOVE(dirrem, dm_next);
    2100           0 :                                 dirrem->dm_dirinum = pagedep->pd_ino;
    2101           0 :                                 if (inodedep == NULL ||
    2102           0 :                                     (inodedep->id_state & ALLCOMPLETE) ==
    2103             :                                      ALLCOMPLETE)
    2104           0 :                                         add_to_worklist(&dirrem->dm_list);
    2105             :                                 else
    2106           0 :                                         WORKLIST_INSERT(&inodedep->id_bufwait,
    2107             :                                             &dirrem->dm_list);
    2108             :                         }
    2109           0 :                         if ((pagedep->pd_state & NEWBLOCK) != 0) {
    2110           0 :                                 LIST_FOREACH(wk, &inodedep->id_bufwait, wk_list)
    2111           0 :                                         if (wk->wk_type == D_NEWDIRBLK &&
    2112           0 :                                             WK_NEWDIRBLK(wk)->db_pagedep ==
    2113             :                                             pagedep)
    2114             :                                                 break;
    2115           0 :                                 if (wk != NULL) {
    2116           0 :                                         WORKLIST_REMOVE(wk);
    2117           0 :                                         free_newdirblk(WK_NEWDIRBLK(wk));
    2118             :                                 } else {
    2119           0 :                                         FREE_LOCK(&lk);
    2120           0 :                                         panic("deallocate_dependencies: "
    2121             :                                             "lost pagedep");
    2122             :                                         }
    2123           0 :                         }
    2124           0 :                         WORKLIST_REMOVE(&pagedep->pd_list);
    2125           0 :                         LIST_REMOVE(pagedep, pd_hash);
    2126           0 :                         WORKITEM_FREE(pagedep, D_PAGEDEP);
    2127           0 :                         continue;
    2128             : 
    2129             :                 case D_ALLOCINDIR:
    2130           0 :                         free_allocindir(WK_ALLOCINDIR(wk), inodedep);
    2131           0 :                         continue;
    2132             : 
    2133             :                 case D_ALLOCDIRECT:
    2134             :                 case D_INODEDEP:
    2135           0 :                         FREE_LOCK(&lk);
    2136           0 :                         panic("deallocate_dependencies: Unexpected type %s",
    2137           0 :                             TYPENAME(wk->wk_type));
    2138             :                         /* NOTREACHED */
    2139             : 
    2140             :                 default:
    2141           0 :                         FREE_LOCK(&lk);
    2142           0 :                         panic("deallocate_dependencies: Unknown type %s",
    2143           0 :                             TYPENAME(wk->wk_type));
    2144             :                         /* NOTREACHED */
    2145             :                 }
    2146             :         }
    2147           0 : }
    2148             : 
    2149             : /*
    2150             :  * Free an allocdirect. Generate a new freefrag work request if appropriate.
    2151             :  * This routine must be called with splbio interrupts blocked.
    2152             :  */
    2153             : STATIC void
    2154           0 : free_allocdirect(struct allocdirectlst *adphead, struct allocdirect *adp,
    2155             :     int delay)
    2156             : {
    2157             :         struct newdirblk *newdirblk;
    2158             :         struct worklist *wk;
    2159             : 
    2160           0 :         splassert(IPL_BIO);
    2161             : 
    2162             : #ifdef DEBUG
    2163             :         if (lk.lkt_held == -1)
    2164             :                 panic("free_allocdirect: lock not held");
    2165             : #endif
    2166           0 :         if ((adp->ad_state & DEPCOMPLETE) == 0)
    2167           0 :                 LIST_REMOVE(adp, ad_deps);
    2168           0 :         TAILQ_REMOVE(adphead, adp, ad_next);
    2169           0 :         if ((adp->ad_state & COMPLETE) == 0)
    2170           0 :                 WORKLIST_REMOVE(&adp->ad_list);
    2171           0 :         if (adp->ad_freefrag != NULL) {
    2172           0 :                 if (delay)
    2173           0 :                         WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,
    2174             :                             &adp->ad_freefrag->ff_list);
    2175             :                 else
    2176           0 :                         add_to_worklist(&adp->ad_freefrag->ff_list);
    2177             :         }
    2178           0 :         if ((wk = LIST_FIRST(&adp->ad_newdirblk)) != NULL) {
    2179           0 :                 newdirblk = WK_NEWDIRBLK(wk);
    2180           0 :                 WORKLIST_REMOVE(&newdirblk->db_list);
    2181           0 :                 if (LIST_FIRST(&adp->ad_newdirblk) != NULL)
    2182           0 :                         panic("free_allocdirect: extra newdirblk");
    2183           0 :                 if (delay)
    2184           0 :                         WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,
    2185             :                             &newdirblk->db_list);
    2186             :                 else
    2187           0 :                         free_newdirblk(newdirblk);
    2188             :         }
    2189           0 :         WORKITEM_FREE(adp, D_ALLOCDIRECT);
    2190           0 : }
    2191             : 
    2192             : /*
    2193             :  * Free a newdirblk. Clear the NEWBLOCK flag on its associated pagedep.
    2194             :  * This routine must be called with splbio interrupts blocked.
    2195             :  */
    2196             : void
    2197           0 : free_newdirblk(struct newdirblk *newdirblk)
    2198             : {
    2199             :         struct pagedep *pagedep;
    2200             :         struct diradd *dap;
    2201             :         int i;
    2202             : 
    2203           0 :         splassert(IPL_BIO);
    2204             : 
    2205             : #ifdef DEBUG
    2206             :         if (lk.lkt_held == -1)
    2207             :                 panic("free_newdirblk: lock not held");
    2208             : #endif
    2209             :         /*
    2210             :          * If the pagedep is still linked onto the directory buffer
    2211             :          * dependency chain, then some of the entries on the
    2212             :          * pd_pendinghd list may not be committed to disk yet. In
    2213             :          * this case, we will simply clear the NEWBLOCK flag and
    2214             :          * let the pd_pendinghd list be processed when the pagedep
    2215             :          * is next written. If the pagedep is no longer on the buffer
    2216             :          * dependency chain, then all the entries on the pd_pending
    2217             :          * list are committed to disk and we can free them here.
    2218             :          */
    2219           0 :         pagedep = newdirblk->db_pagedep;
    2220           0 :         pagedep->pd_state &= ~NEWBLOCK;
    2221           0 :         if ((pagedep->pd_state & ONWORKLIST) == 0)
    2222           0 :                 while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
    2223           0 :                         free_diradd(dap);
    2224             :         /*
    2225             :          * If no dependencies remain, the pagedep will be freed.
    2226             :          */
    2227           0 :         for (i = 0; i < DAHASHSZ; i++)
    2228           0 :                 if (LIST_FIRST(&pagedep->pd_diraddhd[i]) != NULL)
    2229             :                         break;
    2230           0 :         if (i == DAHASHSZ && (pagedep->pd_state & ONWORKLIST) == 0) {
    2231           0 :                 LIST_REMOVE(pagedep, pd_hash);
    2232           0 :                 WORKITEM_FREE(pagedep, D_PAGEDEP);
    2233           0 :         }
    2234           0 :         WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
    2235           0 : }
    2236             : 
    2237             : /*
    2238             :  * Prepare an inode to be freed. The actual free operation is not
    2239             :  * done until the zero'ed inode has been written to disk.
    2240             :  */
    2241             : void
    2242           0 : softdep_freefile(struct vnode *pvp, ufsino_t ino, mode_t mode)
    2243             : {
    2244           0 :         struct inode *ip = VTOI(pvp);
    2245           0 :         struct inodedep *inodedep;
    2246             :         struct freefile *freefile;
    2247             : 
    2248             :         /*
    2249             :          * This sets up the inode de-allocation dependency.
    2250             :          */
    2251           0 :         freefile = pool_get(&freefile_pool, PR_WAITOK);
    2252           0 :         freefile->fx_list.wk_type = D_FREEFILE;
    2253           0 :         freefile->fx_list.wk_state = 0;
    2254           0 :         freefile->fx_mode = mode;
    2255           0 :         freefile->fx_oldinum = ino;
    2256           0 :         freefile->fx_devvp = ip->i_devvp;
    2257           0 :         freefile->fx_mnt = ITOV(ip)->v_mount;
    2258             : 
    2259             :         /*
    2260             :          * If the inodedep does not exist, then the zero'ed inode has
    2261             :          * been written to disk. If the allocated inode has never been
    2262             :          * written to disk, then the on-disk inode is zero'ed. In either
    2263             :          * case we can free the file immediately.
    2264             :          */
    2265           0 :         ACQUIRE_LOCK(&lk);
    2266           0 :         if (inodedep_lookup(ip->i_fs, ino, 0, &inodedep) == 0 ||
    2267           0 :             check_inode_unwritten(inodedep)) {
    2268           0 :                 FREE_LOCK(&lk);
    2269           0 :                 handle_workitem_freefile(freefile);
    2270           0 :                 return;
    2271             :         }
    2272           0 :         WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list);
    2273           0 :         FREE_LOCK(&lk);
    2274           0 : }
    2275             : 
    2276             : /*
    2277             :  * Check to see if an inode has never been written to disk. If
    2278             :  * so free the inodedep and return success, otherwise return failure.
    2279             :  * This routine must be called with splbio interrupts blocked.
    2280             :  *
    2281             :  * If we still have a bitmap dependency, then the inode has never
    2282             :  * been written to disk. Drop the dependency as it is no longer
    2283             :  * necessary since the inode is being deallocated. We set the
    2284             :  * ALLCOMPLETE flags since the bitmap now properly shows that the
    2285             :  * inode is not allocated. Even if the inode is actively being
    2286             :  * written, it has been rolled back to its zero'ed state, so we
    2287             :  * are ensured that a zero inode is what is on the disk. For short
    2288             :  * lived files, this change will usually result in removing all the
    2289             :  * dependencies from the inode so that it can be freed immediately.
    2290             :  */
    2291             : STATIC int
    2292           0 : check_inode_unwritten(struct inodedep *inodedep)
    2293             : {
    2294           0 :         splassert(IPL_BIO);
    2295             : 
    2296           0 :         if ((inodedep->id_state & DEPCOMPLETE) != 0 ||
    2297           0 :             LIST_FIRST(&inodedep->id_pendinghd) != NULL ||
    2298           0 :             LIST_FIRST(&inodedep->id_bufwait) != NULL ||
    2299           0 :             LIST_FIRST(&inodedep->id_inowait) != NULL ||
    2300           0 :             TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
    2301           0 :             TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL ||
    2302           0 :             inodedep->id_nlinkdelta != 0)
    2303           0 :                 return (0);
    2304           0 :         inodedep->id_state |= ALLCOMPLETE;
    2305           0 :         LIST_REMOVE(inodedep, id_deps);
    2306           0 :         inodedep->id_buf = NULL;
    2307           0 :         if (inodedep->id_state & ONWORKLIST)
    2308           0 :                 WORKLIST_REMOVE(&inodedep->id_list);
    2309           0 :         if (inodedep->id_savedino1 != NULL) {
    2310           0 :                 free(inodedep->id_savedino1, M_INODEDEP, inodedep->id_unsize);
    2311           0 :                 inodedep->id_savedino1 = NULL;
    2312           0 :         }
    2313           0 :         if (free_inodedep(inodedep) == 0) {
    2314           0 :                 FREE_LOCK(&lk);
    2315           0 :                 panic("check_inode_unwritten: busy inode");
    2316             :         }
    2317           0 :         return (1);
    2318           0 : }
    2319             : 
    2320             : /*
    2321             :  * Try to free an inodedep structure. Return 1 if it could be freed.
    2322             :  */
    2323             : STATIC int
    2324           0 : free_inodedep(struct inodedep *inodedep)
    2325             : {
    2326             : 
    2327           0 :         if ((inodedep->id_state & ONWORKLIST) != 0 ||
    2328           0 :             (inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE ||
    2329           0 :             LIST_FIRST(&inodedep->id_pendinghd) != NULL ||
    2330           0 :             LIST_FIRST(&inodedep->id_bufwait) != NULL ||
    2331           0 :             LIST_FIRST(&inodedep->id_inowait) != NULL ||
    2332           0 :             TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
    2333           0 :             TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL ||
    2334           0 :             inodedep->id_nlinkdelta != 0 || inodedep->id_savedino1 != NULL)
    2335           0 :                 return (0);
    2336           0 :         LIST_REMOVE(inodedep, id_hash);
    2337           0 :         WORKITEM_FREE(inodedep, D_INODEDEP);
    2338           0 :         num_inodedep -= 1;
    2339           0 :         return (1);
    2340           0 : }
    2341             : 
    2342             : /*
    2343             :  * This workitem routine performs the block de-allocation.
    2344             :  * The workitem is added to the pending list after the updated
    2345             :  * inode block has been written to disk.  As mentioned above,
    2346             :  * checks regarding the number of blocks de-allocated (compared
    2347             :  * to the number of blocks allocated for the file) are also
    2348             :  * performed in this function.
    2349             :  */
    2350             : STATIC void
    2351           0 : handle_workitem_freeblocks(struct freeblks *freeblks)
    2352             : {
    2353           0 :         struct inode tip;
    2354             :         daddr_t bn;
    2355           0 :         union {
    2356             :                 struct ufs1_dinode di1;
    2357             :                 struct ufs2_dinode di2;
    2358             :         } di;
    2359             :         struct fs *fs;
    2360             :         int i, level, bsize;
    2361           0 :         long nblocks, blocksreleased = 0;
    2362             :         int error, allerror = 0;
    2363           0 :         daddr_t baselbns[NIADDR], tmpval;
    2364             : 
    2365           0 :         if (VFSTOUFS(freeblks->fb_mnt)->um_fstype == UM_UFS1)
    2366           0 :                 tip.i_din1 = &di.di1;
    2367             :         else
    2368           0 :                 tip.i_din2 = &di.di2;
    2369             : 
    2370           0 :         tip.i_fs = fs = VFSTOUFS(freeblks->fb_mnt)->um_fs;
    2371           0 :         tip.i_number = freeblks->fb_previousinum;
    2372           0 :         tip.i_ump = VFSTOUFS(freeblks->fb_mnt);
    2373           0 :         tip.i_dev = freeblks->fb_devvp->v_rdev;
    2374           0 :         DIP_ASSIGN(&tip, size, freeblks->fb_oldsize);
    2375           0 :         DIP_ASSIGN(&tip, uid, freeblks->fb_uid);
    2376           0 :         tip.i_vnode = NULL;
    2377             :         tmpval = 1;
    2378           0 :         baselbns[0] = NDADDR;
    2379           0 :         for (i = 1; i < NIADDR; i++) {
    2380           0 :                 tmpval *= NINDIR(fs);
    2381           0 :                 baselbns[i] = baselbns[i - 1] + tmpval;
    2382             :         }
    2383           0 :         nblocks = btodb(fs->fs_bsize);
    2384           0 :         blocksreleased = 0;
    2385             :         /*
    2386             :          * Indirect blocks first.
    2387             :          */
    2388           0 :         for (level = (NIADDR - 1); level >= 0; level--) {
    2389           0 :                 if ((bn = freeblks->fb_iblks[level]) == 0)
    2390             :                         continue;
    2391           0 :                 if ((error = indir_trunc(&tip, fsbtodb(fs, bn), level,
    2392           0 :                     baselbns[level], &blocksreleased)) != 0)
    2393           0 :                         allerror = error;
    2394           0 :                 ffs_blkfree(&tip, bn, fs->fs_bsize);
    2395           0 :                 blocksreleased += nblocks;
    2396           0 :         }
    2397             :         /*
    2398             :          * All direct blocks or frags.
    2399             :          */
    2400           0 :         for (i = (NDADDR - 1); i >= 0; i--) {
    2401           0 :                 if ((bn = freeblks->fb_dblks[i]) == 0)
    2402             :                         continue;
    2403           0 :                 bsize = blksize(fs, &tip, i);
    2404           0 :                 ffs_blkfree(&tip, bn, bsize);
    2405           0 :                 blocksreleased += btodb(bsize);
    2406           0 :         }
    2407             : 
    2408             : #ifdef DIAGNOSTIC
    2409           0 :         if (freeblks->fb_chkcnt != blocksreleased)
    2410           0 :                 printf("handle_workitem_freeblocks: block count\n");
    2411           0 :         if (allerror)
    2412           0 :                 softdep_error("handle_workitem_freeblks", allerror);
    2413             : #endif /* DIAGNOSTIC */
    2414           0 :         WORKITEM_FREE(freeblks, D_FREEBLKS);
    2415           0 : }
    2416             : 
    2417             : /*
    2418             :  * Release blocks associated with the inode ip and stored in the indirect
    2419             :  * block dbn. If level is greater than SINGLE, the block is an indirect block
    2420             :  * and recursive calls to indirtrunc must be used to cleanse other indirect
    2421             :  * blocks.
    2422             :  */
    2423             : STATIC int
    2424           0 : indir_trunc(struct inode *ip, daddr_t dbn, int level, daddr_t lbn,
    2425             :     long *countp)
    2426             : {
    2427           0 :         struct buf *bp;
    2428             :         int32_t *bap1 = NULL;
    2429             :         int64_t nb, *bap2 = NULL;
    2430             :         struct fs *fs;
    2431             :         struct worklist *wk;
    2432             :         struct indirdep *indirdep;
    2433             :         int i, lbnadd, nblocks, ufs1fmt;
    2434             :         int error, allerror = 0;
    2435             : 
    2436           0 :         fs = ip->i_fs;
    2437             :         lbnadd = 1;
    2438           0 :         for (i = level; i > 0; i--)
    2439           0 :                 lbnadd *= NINDIR(fs);
    2440             :         /*
    2441             :          * Get buffer of block pointers to be freed. This routine is not
    2442             :          * called until the zero'ed inode has been written, so it is safe
    2443             :          * to free blocks as they are encountered. Because the inode has
    2444             :          * been zero'ed, calls to bmap on these blocks will fail. So, we
    2445             :          * have to use the on-disk address and the block device for the
    2446             :          * filesystem to look them up. If the file was deleted before its
    2447             :          * indirect blocks were all written to disk, the routine that set
    2448             :          * us up (deallocate_dependencies) will have arranged to leave
    2449             :          * a complete copy of the indirect block in memory for our use.
    2450             :          * Otherwise we have to read the blocks in from the disk.
    2451             :          */
    2452           0 :         ACQUIRE_LOCK(&lk);
    2453           0 :         if ((bp = incore(ip->i_devvp, dbn)) != NULL &&
    2454           0 :             (wk = LIST_FIRST(&bp->b_dep)) != NULL) {
    2455           0 :                 if (wk->wk_type != D_INDIRDEP ||
    2456           0 :                     (indirdep = WK_INDIRDEP(wk))->ir_savebp != bp ||
    2457           0 :                     (indirdep->ir_state & GOINGAWAY) == 0) {
    2458           0 :                         FREE_LOCK(&lk);
    2459           0 :                         panic("indir_trunc: lost indirdep");
    2460             :                 }
    2461           0 :                 WORKLIST_REMOVE(wk);
    2462           0 :                 WORKITEM_FREE(indirdep, D_INDIRDEP);
    2463           0 :                 if (LIST_FIRST(&bp->b_dep) != NULL) {
    2464             :                         FREE_LOCK(&lk);
    2465           0 :                         panic("indir_trunc: dangling dep");
    2466             :                 }
    2467             :                 FREE_LOCK(&lk);
    2468             :         } else {
    2469           0 :                 FREE_LOCK(&lk);
    2470           0 :                 error = bread(ip->i_devvp, dbn, (int)fs->fs_bsize, &bp);
    2471           0 :                 if (error)
    2472           0 :                         return (error);
    2473             :         }
    2474             :         /*
    2475             :          * Recursively free indirect blocks.
    2476             :          */
    2477           0 :         if (ip->i_ump->um_fstype == UM_UFS1) {
    2478             :                 ufs1fmt = 1;
    2479           0 :                 bap1 = (int32_t *)bp->b_data;
    2480           0 :         } else {
    2481             :                 ufs1fmt = 0;
    2482           0 :                 bap2 = (int64_t *)bp->b_data;
    2483             :         }
    2484           0 :         nblocks = btodb(fs->fs_bsize);
    2485           0 :         for (i = NINDIR(fs) - 1; i >= 0; i--) {
    2486           0 :                 if (ufs1fmt)
    2487           0 :                         nb = bap1[i];
    2488             :                 else
    2489           0 :                         nb = bap2[i];
    2490           0 :                 if (nb == 0)
    2491             :                         continue;
    2492           0 :                 if (level != 0) {
    2493           0 :                         if ((error = indir_trunc(ip, fsbtodb(fs, nb),
    2494           0 :                              level - 1, lbn + (i * lbnadd), countp)) != 0)
    2495           0 :                                 allerror = error;
    2496             :                 }
    2497           0 :                 ffs_blkfree(ip, nb, fs->fs_bsize);
    2498           0 :                 *countp += nblocks;
    2499           0 :         }
    2500           0 :         bp->b_flags |= B_INVAL | B_NOCACHE;
    2501           0 :         brelse(bp);
    2502           0 :         return (allerror);
    2503           0 : }
    2504             : 
    2505             : /*
    2506             :  * Free an allocindir.
    2507             :  * This routine must be called with splbio interrupts blocked.
    2508             :  */
    2509             : STATIC void
    2510           0 : free_allocindir(struct allocindir *aip, struct inodedep *inodedep)
    2511             : {
    2512             :         struct freefrag *freefrag;
    2513             : 
    2514           0 :         splassert(IPL_BIO);
    2515             : 
    2516             : #ifdef DEBUG
    2517             :         if (lk.lkt_held == -1)
    2518             :                 panic("free_allocindir: lock not held");
    2519             : #endif
    2520           0 :         if ((aip->ai_state & DEPCOMPLETE) == 0)
    2521           0 :                 LIST_REMOVE(aip, ai_deps);
    2522           0 :         if (aip->ai_state & ONWORKLIST)
    2523           0 :                 WORKLIST_REMOVE(&aip->ai_list);
    2524           0 :         LIST_REMOVE(aip, ai_next);
    2525           0 :         if ((freefrag = aip->ai_freefrag) != NULL) {
    2526           0 :                 if (inodedep == NULL)
    2527           0 :                         add_to_worklist(&freefrag->ff_list);
    2528             :                 else
    2529           0 :                         WORKLIST_INSERT(&inodedep->id_bufwait,
    2530             :                             &freefrag->ff_list);
    2531             :         }
    2532           0 :         WORKITEM_FREE(aip, D_ALLOCINDIR);
    2533           0 : }
    2534             : 
    2535             : /*
    2536             :  * Directory entry addition dependencies.
    2537             :  * 
    2538             :  * When adding a new directory entry, the inode (with its incremented link
    2539             :  * count) must be written to disk before the directory entry's pointer to it.
    2540             :  * Also, if the inode is newly allocated, the corresponding freemap must be
    2541             :  * updated (on disk) before the directory entry's pointer. These requirements
    2542             :  * are met via undo/redo on the directory entry's pointer, which consists
    2543             :  * simply of the inode number.
    2544             :  * 
    2545             :  * As directory entries are added and deleted, the free space within a
    2546             :  * directory block can become fragmented.  The ufs file system will compact
    2547             :  * a fragmented directory block to make space for a new entry. When this
    2548             :  * occurs, the offsets of previously added entries change. Any "diradd"
    2549             :  * dependency structures corresponding to these entries must be updated with
    2550             :  * the new offsets.
    2551             :  */
    2552             : 
    2553             : /*
    2554             :  * This routine is called after the in-memory inode's link
    2555             :  * count has been incremented, but before the directory entry's
    2556             :  * pointer to the inode has been set.
    2557             :  */
    2558             : /* buffer containing directory block */
    2559             : /* inode for directory */
    2560             : /* offset of new entry in directory */
    2561             : /* inode referenced by new directory entry */
    2562             : /* non-NULL => contents of new mkdir */
    2563             : /* entry is in a newly allocated block */
    2564             : int 
    2565           0 : softdep_setup_directory_add(struct buf *bp, struct inode *dp, off_t diroffset,
    2566             :     long newinum, struct buf *newdirbp, int isnewblk)
    2567             : {
    2568             :         int offset;             /* offset of new entry within directory block */
    2569             :         daddr_t lbn;            /* block in directory containing new entry */
    2570             :         struct fs *fs;
    2571             :         struct diradd *dap;
    2572             :         struct allocdirect *adp;
    2573           0 :         struct pagedep *pagedep;
    2574           0 :         struct inodedep *inodedep;
    2575             :         struct newdirblk *newdirblk = NULL;
    2576             :         struct mkdir *mkdir1, *mkdir2;
    2577             :         
    2578             : 
    2579           0 :         fs = dp->i_fs;
    2580           0 :         lbn = lblkno(fs, diroffset);
    2581           0 :         offset = blkoff(fs, diroffset);
    2582           0 :         dap = pool_get(&diradd_pool, PR_WAITOK | PR_ZERO);
    2583           0 :         dap->da_list.wk_type = D_DIRADD;
    2584           0 :         dap->da_offset = offset;
    2585           0 :         dap->da_newinum = newinum;
    2586           0 :         dap->da_state = ATTACHED;
    2587           0 :         if (isnewblk && lbn < NDADDR && fragoff(fs, diroffset) == 0) {
    2588           0 :                 newdirblk = pool_get(&newdirblk_pool, PR_WAITOK);
    2589           0 :                 newdirblk->db_list.wk_type = D_NEWDIRBLK;
    2590           0 :                 newdirblk->db_state = 0;
    2591           0 :         }
    2592           0 :         if (newdirbp == NULL) {
    2593           0 :                 dap->da_state |= DEPCOMPLETE;
    2594           0 :                 ACQUIRE_LOCK(&lk);
    2595           0 :         } else {
    2596           0 :                 dap->da_state |= MKDIR_BODY | MKDIR_PARENT;
    2597           0 :                 mkdir1 = pool_get(&mkdir_pool, PR_WAITOK);
    2598           0 :                 mkdir1->md_list.wk_type = D_MKDIR;
    2599           0 :                 mkdir1->md_state = MKDIR_BODY;
    2600           0 :                 mkdir1->md_diradd = dap;
    2601           0 :                 mkdir2 = pool_get(&mkdir_pool, PR_WAITOK);
    2602           0 :                 mkdir2->md_list.wk_type = D_MKDIR;
    2603           0 :                 mkdir2->md_state = MKDIR_PARENT;
    2604           0 :                 mkdir2->md_diradd = dap;
    2605             :                 /*
    2606             :                  * Dependency on "." and ".." being written to disk.
    2607             :                  */
    2608           0 :                 mkdir1->md_buf = newdirbp;
    2609           0 :                 ACQUIRE_LOCK(&lk);
    2610           0 :                 LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs);
    2611           0 :                 WORKLIST_INSERT(&newdirbp->b_dep, &mkdir1->md_list);
    2612           0 :                 FREE_LOCK(&lk);
    2613           0 :                 bdwrite(newdirbp);
    2614             :                 /*
    2615             :                  * Dependency on link count increase for parent directory
    2616             :                  */
    2617           0 :                 ACQUIRE_LOCK(&lk);
    2618           0 :                 if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0
    2619           0 :                     || (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
    2620           0 :                         dap->da_state &= ~MKDIR_PARENT;
    2621           0 :                         WORKITEM_FREE(mkdir2, D_MKDIR);
    2622           0 :                 } else {
    2623           0 :                         LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs);
    2624           0 :                         WORKLIST_INSERT(&inodedep->id_bufwait,&mkdir2->md_list);
    2625             :                 }
    2626             :         }
    2627             :         /*
    2628             :          * Link into parent directory pagedep to await its being written.
    2629             :          */
    2630           0 :         if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0)
    2631           0 :                 WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
    2632           0 :         dap->da_pagedep = pagedep;
    2633           0 :         LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap,
    2634             :             da_pdlist);
    2635             :         /*
    2636             :          * Link into its inodedep. Put it on the id_bufwait list if the inode
    2637             :          * is not yet written. If it is written, do the post-inode write
    2638             :          * processing to put it on the id_pendinghd list.
    2639             :          */
    2640           0 :         (void) inodedep_lookup(fs, newinum, DEPALLOC, &inodedep);
    2641           0 :         if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE)
    2642           0 :                 diradd_inode_written(dap, inodedep);
    2643             :         else
    2644           0 :                 WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
    2645           0 :         if (isnewblk) {
    2646             :                 /*
    2647             :                  * Directories growing into indirect blocks are rare
    2648             :                  * enough and the frequency of new block allocation
    2649             :                  * in those cases even more rare, that we choose not
    2650             :                  * to bother tracking them. Rather we simply force the
    2651             :                  * new directory entry to disk.
    2652             :                  */
    2653           0 :                 if (lbn >= NDADDR) {
    2654           0 :                         FREE_LOCK(&lk);
    2655             :                         /*
    2656             :                          * We only have a new allocation when at the
    2657             :                          * beginning of a new block, not when we are
    2658             :                          * expanding into an existing block.
    2659             :                          */
    2660           0 :                         if (blkoff(fs, diroffset) == 0)
    2661           0 :                                 return (1);
    2662           0 :                         return (0);
    2663             :                 }
    2664             :                 /*
    2665             :                  * We only have a new allocation when at the beginning
    2666             :                  * of a new fragment, not when we are expanding into an
    2667             :                  * existing fragment. Also, there is nothing to do if we
    2668             :                  * are already tracking this block.
    2669             :                  */
    2670           0 :                 if (fragoff(fs, diroffset) != 0) {
    2671           0 :                         FREE_LOCK(&lk);
    2672           0 :                         return (0);
    2673             :                 }
    2674             :                         
    2675           0 :                 if ((pagedep->pd_state & NEWBLOCK) != 0) {
    2676           0 :                         WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
    2677           0 :                         FREE_LOCK(&lk);
    2678           0 :                         return (0);
    2679             :                 }
    2680             :                 /*
    2681             :                  * Find our associated allocdirect and have it track us.
    2682             :                  */
    2683           0 :                 if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0)
    2684           0 :                         panic("softdep_setup_directory_add: lost inodedep");
    2685           0 :                 adp = TAILQ_LAST(&inodedep->id_newinoupdt, allocdirectlst);
    2686           0 :                 if (adp == NULL || adp->ad_lbn != lbn) {
    2687           0 :                         FREE_LOCK(&lk);
    2688           0 :                         panic("softdep_setup_directory_add: lost entry");
    2689             :                 }
    2690           0 :                 pagedep->pd_state |= NEWBLOCK;
    2691           0 :                 newdirblk->db_pagedep = pagedep;
    2692           0 :                 WORKLIST_INSERT(&adp->ad_newdirblk, &newdirblk->db_list);
    2693           0 :         }
    2694           0 :         FREE_LOCK(&lk);
    2695           0 :         return (0);
    2696           0 : }
    2697             : 
    2698             : /*
    2699             :  * This procedure is called to change the offset of a directory
    2700             :  * entry when compacting a directory block which must be owned
    2701             :  * exclusively by the caller. Note that the actual entry movement
    2702             :  * must be done in this procedure to ensure that no I/O completions
    2703             :  * occur while the move is in progress.
    2704             :  */
    2705             : /* inode for directory */
    2706             : /* address of dp->i_offset */
    2707             : /* address of old directory location */
    2708             : /* address of new directory location */
    2709             : /* size of directory entry */
    2710             : void 
    2711           0 : softdep_change_directoryentry_offset(struct inode *dp, caddr_t base,
    2712             :     caddr_t oldloc, caddr_t newloc, int entrysize)
    2713             : {
    2714             :         int offset, oldoffset, newoffset;
    2715           0 :         struct pagedep *pagedep;
    2716             :         struct diradd *dap;
    2717             :         daddr_t lbn;
    2718             : 
    2719           0 :         ACQUIRE_LOCK(&lk);
    2720           0 :         lbn = lblkno(dp->i_fs, dp->i_offset);
    2721           0 :         offset = blkoff(dp->i_fs, dp->i_offset);
    2722           0 :         if (pagedep_lookup(dp, lbn, 0, &pagedep) == 0)
    2723             :                 goto done;
    2724           0 :         oldoffset = offset + (oldloc - base);
    2725           0 :         newoffset = offset + (newloc - base);
    2726             : 
    2727           0 :         LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(oldoffset)], da_pdlist) {
    2728           0 :                 if (dap->da_offset != oldoffset)
    2729             :                         continue;
    2730           0 :                 dap->da_offset = newoffset;
    2731           0 :                 if (DIRADDHASH(newoffset) == DIRADDHASH(oldoffset))
    2732             :                         break;
    2733           0 :                 LIST_REMOVE(dap, da_pdlist);
    2734           0 :                 LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(newoffset)],
    2735             :                     dap, da_pdlist);
    2736           0 :                 break;
    2737             :         }
    2738           0 :         if (dap == NULL) {
    2739             : 
    2740           0 :                 LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist) {
    2741           0 :                         if (dap->da_offset == oldoffset) {
    2742           0 :                                 dap->da_offset = newoffset;
    2743           0 :                                 break;
    2744             :                         }
    2745             :                 }
    2746             :         }
    2747             : done:
    2748           0 :         memmove(newloc, oldloc, entrysize);
    2749           0 :         FREE_LOCK(&lk);
    2750           0 : }
    2751             : 
    2752             : /*
    2753             :  * Free a diradd dependency structure. This routine must be called
    2754             :  * with splbio interrupts blocked.
    2755             :  */
    2756             : STATIC void
    2757           0 : free_diradd(struct diradd *dap)
    2758             : {
    2759             :         struct dirrem *dirrem;
    2760             :         struct pagedep *pagedep;
    2761           0 :         struct inodedep *inodedep;
    2762             :         struct mkdir *mkdir, *nextmd;
    2763             : 
    2764           0 :         splassert(IPL_BIO);
    2765             : 
    2766             : #ifdef DEBUG
    2767             :         if (lk.lkt_held == -1)
    2768             :                 panic("free_diradd: lock not held");
    2769             : #endif
    2770           0 :         WORKLIST_REMOVE(&dap->da_list);
    2771           0 :         LIST_REMOVE(dap, da_pdlist);
    2772           0 :         if ((dap->da_state & DIRCHG) == 0) {
    2773           0 :                 pagedep = dap->da_pagedep;
    2774           0 :         } else {
    2775           0 :                 dirrem = dap->da_previous;
    2776           0 :                 pagedep = dirrem->dm_pagedep;
    2777           0 :                 dirrem->dm_dirinum = pagedep->pd_ino;
    2778           0 :                 add_to_worklist(&dirrem->dm_list);
    2779             :         }
    2780           0 :         if (inodedep_lookup(VFSTOUFS(pagedep->pd_mnt)->um_fs, dap->da_newinum,
    2781           0 :             0, &inodedep) != 0)
    2782           0 :                 (void) free_inodedep(inodedep);
    2783           0 :         if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
    2784           0 :                 for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir; mkdir = nextmd) {
    2785           0 :                         nextmd = LIST_NEXT(mkdir, md_mkdirs);
    2786           0 :                         if (mkdir->md_diradd != dap)
    2787             :                                 continue;
    2788           0 :                         dap->da_state &= ~mkdir->md_state;
    2789           0 :                         WORKLIST_REMOVE(&mkdir->md_list);
    2790           0 :                         LIST_REMOVE(mkdir, md_mkdirs);
    2791           0 :                         WORKITEM_FREE(mkdir, D_MKDIR);
    2792           0 :                 }
    2793           0 :                 if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
    2794           0 :                         FREE_LOCK(&lk);
    2795           0 :                         panic("free_diradd: unfound ref");
    2796             :                 }
    2797             :         }
    2798           0 :         WORKITEM_FREE(dap, D_DIRADD);
    2799           0 : }
    2800             : 
    2801             : /*
    2802             :  * Directory entry removal dependencies.
    2803             :  * 
    2804             :  * When removing a directory entry, the entry's inode pointer must be
    2805             :  * zero'ed on disk before the corresponding inode's link count is decremented
    2806             :  * (possibly freeing the inode for re-use). This dependency is handled by
    2807             :  * updating the directory entry but delaying the inode count reduction until
    2808             :  * after the directory block has been written to disk. After this point, the
    2809             :  * inode count can be decremented whenever it is convenient.
    2810             :  */
    2811             : 
    2812             : /*
    2813             :  * This routine should be called immediately after removing
    2814             :  * a directory entry.  The inode's link count should not be
    2815             :  * decremented by the calling procedure -- the soft updates
    2816             :  * code will do this task when it is safe.
    2817             :  */
    2818             : /* buffer containing directory block */
    2819             : /* inode for the directory being modified */
    2820             : /* inode for directory entry being removed */
    2821             : /* indicates if doing RMDIR */
    2822             : void 
    2823           0 : softdep_setup_remove(struct buf *bp, struct inode *dp, struct inode *ip,
    2824             :     int isrmdir)
    2825             : {
    2826           0 :         struct dirrem *dirrem, *prevdirrem;
    2827             : 
    2828             :         /*
    2829             :          * Allocate a new dirrem if appropriate and ACQUIRE_LOCK.
    2830             :          */
    2831           0 :         dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
    2832             : 
    2833             :         /*
    2834             :          * If the COMPLETE flag is clear, then there were no active
    2835             :          * entries and we want to roll back to a zeroed entry until
    2836             :          * the new inode is committed to disk. If the COMPLETE flag is
    2837             :          * set then we have deleted an entry that never made it to
    2838             :          * disk. If the entry we deleted resulted from a name change,
    2839             :          * then the old name still resides on disk. We cannot delete
    2840             :          * its inode (returned to us in prevdirrem) until the zeroed
    2841             :          * directory entry gets to disk. The new inode has never been
    2842             :          * referenced on the disk, so can be deleted immediately.
    2843             :          */
    2844           0 :         if ((dirrem->dm_state & COMPLETE) == 0) {
    2845           0 :                 LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,
    2846             :                     dm_next);
    2847           0 :                 FREE_LOCK(&lk);
    2848           0 :         } else {
    2849           0 :                 if (prevdirrem != NULL)
    2850           0 :                         LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd,
    2851             :                             prevdirrem, dm_next);
    2852           0 :                 dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino;
    2853           0 :                 FREE_LOCK(&lk);
    2854           0 :                 handle_workitem_remove(dirrem);
    2855             :         }
    2856           0 : }
    2857             : 
    2858             : STATIC long num_dirrem;         /* number of dirrem allocated */
    2859             : /*
    2860             :  * Allocate a new dirrem if appropriate and return it along with
    2861             :  * its associated pagedep. Called without a lock, returns with lock.
    2862             :  */
    2863             : /* buffer containing directory block */
    2864             : /* inode for the directory being modified */
    2865             : /* inode for directory entry being removed */
    2866             : /* indicates if doing RMDIR */
    2867             : /* previously referenced inode, if any */
    2868             : STATIC struct dirrem *
    2869           0 : newdirrem(struct buf *bp, struct inode *dp, struct inode *ip, int isrmdir,
    2870             :     struct dirrem **prevdirremp)
    2871             : {
    2872             :         int offset;
    2873             :         daddr_t lbn;
    2874             :         struct diradd *dap;
    2875             :         struct dirrem *dirrem;
    2876           0 :         struct pagedep *pagedep;
    2877             : 
    2878             :         /*
    2879             :          * Whiteouts have no deletion dependencies.
    2880             :          */
    2881           0 :         if (ip == NULL)
    2882           0 :                 panic("newdirrem: whiteout");
    2883             :         /*
    2884             :          * If we are over our limit, try to improve the situation.
    2885             :          * Limiting the number of dirrem structures will also limit
    2886             :          * the number of freefile and freeblks structures.
    2887             :          */
    2888           0 :         if (num_dirrem > max_softdeps / 2)
    2889           0 :                 (void) request_cleanup(FLUSH_REMOVE, 0);
    2890           0 :         num_dirrem += 1;
    2891           0 :         dirrem = pool_get(&dirrem_pool, PR_WAITOK | PR_ZERO);
    2892           0 :         dirrem->dm_list.wk_type = D_DIRREM;
    2893           0 :         dirrem->dm_state = isrmdir ? RMDIR : 0;
    2894           0 :         dirrem->dm_mnt = ITOV(ip)->v_mount;
    2895           0 :         dirrem->dm_oldinum = ip->i_number;
    2896           0 :         *prevdirremp = NULL;
    2897             : 
    2898           0 :         ACQUIRE_LOCK(&lk);
    2899           0 :         lbn = lblkno(dp->i_fs, dp->i_offset);
    2900           0 :         offset = blkoff(dp->i_fs, dp->i_offset);
    2901           0 :         if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0)
    2902           0 :                 WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
    2903           0 :         dirrem->dm_pagedep = pagedep;
    2904             :         /*
    2905             :          * Check for a diradd dependency for the same directory entry.
    2906             :          * If present, then both dependencies become obsolete and can
    2907             :          * be de-allocated. Check for an entry on both the pd_dirraddhd
    2908             :          * list and the pd_pendinghd list.
    2909             :          */
    2910             : 
    2911           0 :         LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(offset)], da_pdlist)
    2912           0 :                 if (dap->da_offset == offset)
    2913             :                         break;
    2914           0 :         if (dap == NULL) {
    2915             : 
    2916           0 :                 LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist)
    2917           0 :                         if (dap->da_offset == offset)
    2918             :                                 break;
    2919           0 :                 if (dap == NULL)
    2920           0 :                         return (dirrem);
    2921             :         }
    2922             :         /*
    2923             :          * Must be ATTACHED at this point.
    2924             :          */
    2925           0 :         if ((dap->da_state & ATTACHED) == 0) {
    2926           0 :                 FREE_LOCK(&lk);
    2927           0 :                 panic("newdirrem: not ATTACHED");
    2928             :         }
    2929           0 :         if (dap->da_newinum != ip->i_number) {
    2930           0 :                 FREE_LOCK(&lk);
    2931           0 :                 panic("newdirrem: inum %u should be %u",
    2932           0 :                     ip->i_number, dap->da_newinum);
    2933             :         }
    2934             :         /*
    2935             :          * If we are deleting a changed name that never made it to disk,
    2936             :          * then return the dirrem describing the previous inode (which
    2937             :          * represents the inode currently referenced from this entry on disk).
    2938             :          */
    2939           0 :         if ((dap->da_state & DIRCHG) != 0) {
    2940           0 :                 *prevdirremp = dap->da_previous;
    2941           0 :                 dap->da_state &= ~DIRCHG;
    2942           0 :                 dap->da_pagedep = pagedep;
    2943           0 :         }
    2944             :         /*
    2945             :          * We are deleting an entry that never made it to disk.
    2946             :          * Mark it COMPLETE so we can delete its inode immediately.
    2947             :          */
    2948           0 :         dirrem->dm_state |= COMPLETE;
    2949           0 :         free_diradd(dap);
    2950           0 :         return (dirrem);
    2951           0 : }
    2952             : 
    2953             : /*
    2954             :  * Directory entry change dependencies.
    2955             :  * 
    2956             :  * Changing an existing directory entry requires that an add operation
    2957             :  * be completed first followed by a deletion. The semantics for the addition
    2958             :  * are identical to the description of adding a new entry above except
    2959             :  * that the rollback is to the old inode number rather than zero. Once
    2960             :  * the addition dependency is completed, the removal is done as described
    2961             :  * in the removal routine above.
    2962             :  */
    2963             : 
    2964             : /*
    2965             :  * This routine should be called immediately after changing
    2966             :  * a directory entry.  The inode's link count should not be
    2967             :  * decremented by the calling procedure -- the soft updates
    2968             :  * code will perform this task when it is safe.
    2969             :  */
    2970             : /* buffer containing directory block */
    2971             : /* inode for the directory being modified */
    2972             : /* inode for directory entry being removed */
    2973             : /* new inode number for changed entry */
    2974             : /* indicates if doing RMDIR */
    2975             : void 
    2976           0 : softdep_setup_directory_change(struct buf *bp, struct inode *dp,
    2977             :     struct inode *ip, long newinum, int isrmdir)
    2978             : {
    2979             :         int offset;
    2980             :         struct diradd *dap;
    2981           0 :         struct dirrem *dirrem, *prevdirrem;
    2982             :         struct pagedep *pagedep;
    2983           0 :         struct inodedep *inodedep;
    2984             : 
    2985           0 :         offset = blkoff(dp->i_fs, dp->i_offset);
    2986           0 :         dap = pool_get(&diradd_pool, PR_WAITOK | PR_ZERO);
    2987           0 :         dap->da_list.wk_type = D_DIRADD;
    2988           0 :         dap->da_state = DIRCHG | ATTACHED | DEPCOMPLETE;
    2989           0 :         dap->da_offset = offset;
    2990           0 :         dap->da_newinum = newinum;
    2991             : 
    2992             :         /*
    2993             :          * Allocate a new dirrem and ACQUIRE_LOCK.
    2994             :          */
    2995           0 :         dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
    2996           0 :         pagedep = dirrem->dm_pagedep;
    2997             :         /*
    2998             :          * The possible values for isrmdir:
    2999             :          *      0 - non-directory file rename
    3000             :          *      1 - directory rename within same directory
    3001             :          *   inum - directory rename to new directory of given inode number
    3002             :          * When renaming to a new directory, we are both deleting and
    3003             :          * creating a new directory entry, so the link count on the new
    3004             :          * directory should not change. Thus we do not need the followup
    3005             :          * dirrem which is usually done in handle_workitem_remove. We set
    3006             :          * the DIRCHG flag to tell handle_workitem_remove to skip the 
    3007             :          * followup dirrem.
    3008             :          */
    3009           0 :         if (isrmdir > 1)
    3010           0 :                 dirrem->dm_state |= DIRCHG;
    3011             : 
    3012             :         /*
    3013             :          * If the COMPLETE flag is clear, then there were no active
    3014             :          * entries and we want to roll back to the previous inode until
    3015             :          * the new inode is committed to disk. If the COMPLETE flag is
    3016             :          * set, then we have deleted an entry that never made it to disk.
    3017             :          * If the entry we deleted resulted from a name change, then the old
    3018             :          * inode reference still resides on disk. Any rollback that we do
    3019             :          * needs to be to that old inode (returned to us in prevdirrem). If
    3020             :          * the entry we deleted resulted from a create, then there is
    3021             :          * no entry on the disk, so we want to roll back to zero rather
    3022             :          * than the uncommitted inode. In either of the COMPLETE cases we
    3023             :          * want to immediately free the unwritten and unreferenced inode.
    3024             :          */
    3025           0 :         if ((dirrem->dm_state & COMPLETE) == 0) {
    3026           0 :                 dap->da_previous = dirrem;
    3027           0 :         } else {
    3028           0 :                 if (prevdirrem != NULL) {
    3029           0 :                         dap->da_previous = prevdirrem;
    3030           0 :                 } else {
    3031           0 :                         dap->da_state &= ~DIRCHG;
    3032           0 :                         dap->da_pagedep = pagedep;
    3033             :                 }
    3034           0 :                 dirrem->dm_dirinum = pagedep->pd_ino;
    3035           0 :                 add_to_worklist(&dirrem->dm_list);
    3036             :         }
    3037             :         /*
    3038             :          * Link into its inodedep. Put it on the id_bufwait list if the inode
    3039             :          * is not yet written. If it is written, do the post-inode write
    3040             :          * processing to put it on the id_pendinghd list.
    3041             :          */
    3042           0 :         if (inodedep_lookup(dp->i_fs, newinum, DEPALLOC, &inodedep) == 0 ||
    3043           0 :             (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
    3044           0 :                 dap->da_state |= COMPLETE;
    3045           0 :                 LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
    3046           0 :                 WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
    3047           0 :         } else {
    3048           0 :                 LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)],
    3049             :                     dap, da_pdlist);
    3050           0 :                 WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
    3051             :         }
    3052           0 :         FREE_LOCK(&lk);
    3053           0 : }
    3054             : 
    3055             : /*
    3056             :  * Called whenever the link count on an inode is changed.
    3057             :  * It creates an inode dependency so that the new reference(s)
    3058             :  * to the inode cannot be committed to disk until the updated
    3059             :  * inode has been written.
    3060             :  */
    3061             : /* the inode with the increased link count */
    3062             : /* do background work or not */
    3063             : void
    3064           0 : softdep_change_linkcnt(struct inode *ip, int nodelay)
    3065             : {
    3066           0 :         struct inodedep *inodedep;
    3067             :         int flags;
    3068             : 
    3069             :         /*
    3070             :          * If requested, do not allow background work to happen.
    3071             :          */
    3072             :         flags = DEPALLOC;
    3073           0 :         if (nodelay)
    3074           0 :                 flags |= NODELAY;
    3075             : 
    3076           0 :         ACQUIRE_LOCK(&lk);
    3077             : 
    3078           0 :         (void) inodedep_lookup(ip->i_fs, ip->i_number, flags, &inodedep);
    3079           0 :         if (DIP(ip, nlink) < ip->i_effnlink) {
    3080           0 :                 FREE_LOCK(&lk);
    3081           0 :                 panic("softdep_change_linkcnt: bad delta");
    3082             :         }
    3083             : 
    3084           0 :         inodedep->id_nlinkdelta = DIP(ip, nlink) - ip->i_effnlink;
    3085             : 
    3086           0 :         FREE_LOCK(&lk);
    3087           0 : }
    3088             : 
    3089             : /*
    3090             :  * This workitem decrements the inode's link count.
    3091             :  * If the link count reaches zero, the file is removed.
    3092             :  */
    3093             : STATIC void 
    3094           0 : handle_workitem_remove(struct dirrem *dirrem)
    3095             : {
    3096           0 :         struct proc *p = CURPROC;       /* XXX */
    3097           0 :         struct inodedep *inodedep;
    3098           0 :         struct vnode *vp;
    3099             :         struct inode *ip;
    3100             :         ufsino_t oldinum;
    3101             :         int error;
    3102             : 
    3103           0 :         if ((error = VFS_VGET(dirrem->dm_mnt, dirrem->dm_oldinum, &vp)) != 0) {
    3104           0 :                 softdep_error("handle_workitem_remove: vget", error);
    3105           0 :                 return;
    3106             :         }
    3107           0 :         ip = VTOI(vp);
    3108           0 :         ACQUIRE_LOCK(&lk);
    3109           0 :         if ((inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, 0, &inodedep)) 
    3110           0 :             == 0) {
    3111           0 :                 FREE_LOCK(&lk);
    3112           0 :                 panic("handle_workitem_remove: lost inodedep");
    3113             :         }
    3114             :         /*
    3115             :          * Normal file deletion.
    3116             :          */
    3117           0 :         if ((dirrem->dm_state & RMDIR) == 0) {
    3118           0 :                 DIP_ADD(ip, nlink, -1);
    3119           0 :                 ip->i_flag |= IN_CHANGE;
    3120           0 :                 if (DIP(ip, nlink) < ip->i_effnlink) {
    3121           0 :                         FREE_LOCK(&lk);
    3122           0 :                         panic("handle_workitem_remove: bad file delta");
    3123             :                 }
    3124           0 :                 inodedep->id_nlinkdelta = DIP(ip, nlink) - ip->i_effnlink;
    3125           0 :                 FREE_LOCK(&lk);
    3126           0 :                 vput(vp);
    3127           0 :                 num_dirrem -= 1;
    3128           0 :                 WORKITEM_FREE(dirrem, D_DIRREM);
    3129           0 :                 return;
    3130             :         }
    3131             :         /*
    3132             :          * Directory deletion. Decrement reference count for both the
    3133             :          * just deleted parent directory entry and the reference for ".".
    3134             :          * Next truncate the directory to length zero. When the
    3135             :          * truncation completes, arrange to have the reference count on
    3136             :          * the parent decremented to account for the loss of "..".
    3137             :          */
    3138           0 :         DIP_ADD(ip, nlink, -2);
    3139           0 :         ip->i_flag |= IN_CHANGE;
    3140           0 :         if (DIP(ip, nlink) < ip->i_effnlink)
    3141           0 :                 panic("handle_workitem_remove: bad dir delta");
    3142           0 :         inodedep->id_nlinkdelta = DIP(ip, nlink) - ip->i_effnlink;
    3143           0 :         FREE_LOCK(&lk);
    3144           0 :         if ((error = UFS_TRUNCATE(ip, (off_t)0, 0, p->p_ucred)) != 0)
    3145           0 :                 softdep_error("handle_workitem_remove: truncate", error);
    3146             :         /*
    3147             :          * Rename a directory to a new parent. Since, we are both deleting
    3148             :          * and creating a new directory entry, the link count on the new
    3149             :          * directory should not change. Thus we skip the followup dirrem.
    3150             :          */
    3151           0 :         if (dirrem->dm_state & DIRCHG) {
    3152           0 :                 vput(vp);
    3153           0 :                 num_dirrem -= 1;
    3154           0 :                 WORKITEM_FREE(dirrem, D_DIRREM);
    3155           0 :                 return;
    3156             :         }
    3157             :         /*
    3158             :          * If the inodedep does not exist, then the zero'ed inode has
    3159             :          * been written to disk. If the allocated inode has never been
    3160             :          * written to disk, then the on-disk inode is zero'ed. In either
    3161             :          * case we can remove the file immediately.
    3162             :          */
    3163           0 :         ACQUIRE_LOCK(&lk);
    3164           0 :         dirrem->dm_state = 0;
    3165           0 :         oldinum = dirrem->dm_oldinum;
    3166           0 :         dirrem->dm_oldinum = dirrem->dm_dirinum;
    3167           0 :         if (inodedep_lookup(ip->i_fs, oldinum, 0, &inodedep) == 0 ||
    3168           0 :             check_inode_unwritten(inodedep)) {
    3169           0 :                 FREE_LOCK(&lk);
    3170           0 :                 vput(vp);
    3171           0 :                 handle_workitem_remove(dirrem);
    3172           0 :                 return;
    3173             :         }
    3174           0 :         WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
    3175           0 :         FREE_LOCK(&lk);
    3176           0 :         ip->i_flag |= IN_CHANGE;
    3177           0 :         UFS_UPDATE(VTOI(vp), 0);
    3178           0 :         vput(vp);
    3179           0 : }
    3180             : 
    3181             : /*
    3182             :  * Inode de-allocation dependencies.
    3183             :  * 
    3184             :  * When an inode's link count is reduced to zero, it can be de-allocated. We
    3185             :  * found it convenient to postpone de-allocation until after the inode is
    3186             :  * written to disk with its new link count (zero).  At this point, all of the
    3187             :  * on-disk inode's block pointers are nullified and, with careful dependency
    3188             :  * list ordering, all dependencies related to the inode will be satisfied and
    3189             :  * the corresponding dependency structures de-allocated.  So, if/when the
    3190             :  * inode is reused, there will be no mixing of old dependencies with new
    3191             :  * ones.  This artificial dependency is set up by the block de-allocation
    3192             :  * procedure above (softdep_setup_freeblocks) and completed by the
    3193             :  * following procedure.
    3194             :  */
    3195             : STATIC void 
    3196           0 : handle_workitem_freefile(struct freefile *freefile)
    3197             : {
    3198             :         struct fs *fs;
    3199           0 :         struct vnode vp;
    3200           0 :         struct inode tip;
    3201             : #ifdef DEBUG
    3202             :         struct inodedep *idp;
    3203             : #endif
    3204             :         int error;
    3205             : 
    3206           0 :         fs = VFSTOUFS(freefile->fx_mnt)->um_fs;
    3207             : #ifdef DEBUG
    3208             :         ACQUIRE_LOCK(&lk);
    3209             :         error = inodedep_lookup(fs, freefile->fx_oldinum, 0, &idp);
    3210             :         FREE_LOCK(&lk);
    3211             :         if (error)
    3212             :                 panic("handle_workitem_freefile: inodedep survived");
    3213             : #endif
    3214           0 :         tip.i_ump = VFSTOUFS(freefile->fx_mnt);
    3215           0 :         tip.i_dev = freefile->fx_devvp->v_rdev;
    3216           0 :         tip.i_fs = fs;
    3217           0 :         tip.i_vnode = &vp;
    3218           0 :         vp.v_data = &tip;
    3219             : 
    3220           0 :         if ((error = ffs_freefile(&tip, freefile->fx_oldinum, 
    3221           0 :                  freefile->fx_mode)) != 0) {
    3222           0 :                 softdep_error("handle_workitem_freefile", error);
    3223           0 :         }
    3224           0 :         WORKITEM_FREE(freefile, D_FREEFILE);
    3225           0 : }
    3226             : 
    3227             : /*
    3228             :  * Disk writes.
    3229             :  * 
    3230             :  * The dependency structures constructed above are most actively used when file
    3231             :  * system blocks are written to disk.  No constraints are placed on when a
    3232             :  * block can be written, but unsatisfied update dependencies are made safe by
    3233             :  * modifying (or replacing) the source memory for the duration of the disk
    3234             :  * write.  When the disk write completes, the memory block is again brought
    3235             :  * up-to-date.
    3236             :  *
    3237             :  * In-core inode structure reclamation.
    3238             :  * 
    3239             :  * Because there are a finite number of "in-core" inode structures, they are
    3240             :  * reused regularly.  By transferring all inode-related dependencies to the
    3241             :  * in-memory inode block and indexing them separately (via "inodedep"s), we
    3242             :  * can allow "in-core" inode structures to be reused at any time and avoid
    3243             :  * any increase in contention.
    3244             :  *
    3245             :  * Called just before entering the device driver to initiate a new disk I/O.
    3246             :  * The buffer must be locked, thus, no I/O completion operations can occur
    3247             :  * while we are manipulating its associated dependencies.
    3248             :  */
    3249             : /* structure describing disk write to occur */
    3250             : void 
    3251           0 : softdep_disk_io_initiation(struct buf *bp)
    3252             : {
    3253             :         struct worklist *wk, *nextwk;
    3254             :         struct indirdep *indirdep;
    3255             :         struct inodedep *inodedep;
    3256             :         struct buf *sbp;
    3257             : 
    3258             :         /*
    3259             :          * We only care about write operations. There should never
    3260             :          * be dependencies for reads.
    3261             :          */
    3262           0 :         if (bp->b_flags & B_READ)
    3263           0 :                 panic("softdep_disk_io_initiation: read");
    3264             : 
    3265           0 :         ACQUIRE_LOCK(&lk);
    3266             : 
    3267             :         /*
    3268             :          * Do any necessary pre-I/O processing.
    3269             :          */
    3270           0 :         for (wk = LIST_FIRST(&bp->b_dep); wk; wk = nextwk) {
    3271           0 :                 nextwk = LIST_NEXT(wk, wk_list);
    3272           0 :                 switch (wk->wk_type) {
    3273             : 
    3274             :                 case D_PAGEDEP:
    3275           0 :                         initiate_write_filepage(WK_PAGEDEP(wk), bp);
    3276           0 :                         continue;
    3277             : 
    3278             :                 case D_INODEDEP:
    3279           0 :                         inodedep = WK_INODEDEP(wk);
    3280           0 :                         if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC)
    3281           0 :                                 initiate_write_inodeblock_ufs1(inodedep, bp);
    3282             : #ifdef FFS2
    3283             :                         else
    3284           0 :                                 initiate_write_inodeblock_ufs2(inodedep, bp);
    3285             : #endif
    3286             :                         continue;
    3287             : 
    3288             :                 case D_INDIRDEP:
    3289           0 :                         indirdep = WK_INDIRDEP(wk);
    3290           0 :                         if (indirdep->ir_state & GOINGAWAY)
    3291           0 :                                 panic("disk_io_initiation: indirdep gone");
    3292             :                         /*
    3293             :                          * If there are no remaining dependencies, this
    3294             :                          * will be writing the real pointers, so the
    3295             :                          * dependency can be freed.
    3296             :                          */
    3297           0 :                         if (LIST_FIRST(&indirdep->ir_deplisthd) == NULL) {
    3298           0 :                                 sbp = indirdep->ir_savebp;
    3299           0 :                                 sbp->b_flags |= B_INVAL | B_NOCACHE;
    3300             :                                 /* inline expand WORKLIST_REMOVE(wk); */
    3301           0 :                                 wk->wk_state &= ~ONWORKLIST;
    3302           0 :                                 LIST_REMOVE(wk, wk_list);
    3303           0 :                                 WORKITEM_FREE(indirdep, D_INDIRDEP);
    3304           0 :                                 FREE_LOCK(&lk);
    3305           0 :                                 brelse(sbp);
    3306           0 :                                 ACQUIRE_LOCK(&lk);
    3307           0 :                                 continue;
    3308             :                         }
    3309             :                         /*
    3310             :                          * Replace up-to-date version with safe version.
    3311             :                          */
    3312           0 :                         FREE_LOCK(&lk);
    3313           0 :                         indirdep->ir_saveddata = malloc(bp->b_bcount,
    3314             :                             M_INDIRDEP, M_WAITOK);
    3315           0 :                         ACQUIRE_LOCK(&lk);
    3316           0 :                         indirdep->ir_state &= ~ATTACHED;
    3317           0 :                         indirdep->ir_state |= UNDONE;
    3318           0 :                         memcpy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount);
    3319           0 :                         memcpy(bp->b_data, indirdep->ir_savebp->b_data,
    3320             :                             bp->b_bcount);
    3321           0 :                         continue;
    3322             : 
    3323             :                 case D_MKDIR:
    3324             :                 case D_BMSAFEMAP:
    3325             :                 case D_ALLOCDIRECT:
    3326             :                 case D_ALLOCINDIR:
    3327             :                         continue;
    3328             : 
    3329             :                 default:
    3330           0 :                         FREE_LOCK(&lk);
    3331           0 :                         panic("handle_disk_io_initiation: Unexpected type %s",
    3332           0 :                             TYPENAME(wk->wk_type));
    3333             :                         /* NOTREACHED */
    3334             :                 }
    3335             :         }
    3336             : 
    3337           0 :         FREE_LOCK(&lk);
    3338           0 : }
    3339             : 
    3340             : /*
    3341             :  * Called from within the procedure above to deal with unsatisfied
    3342             :  * allocation dependencies in a directory. The buffer must be locked,
    3343             :  * thus, no I/O completion operations can occur while we are
    3344             :  * manipulating its associated dependencies.
    3345             :  */
    3346             : STATIC void
    3347           0 : initiate_write_filepage(struct pagedep *pagedep, struct buf *bp)
    3348             : {
    3349             :         struct diradd *dap;
    3350             :         struct direct *ep;
    3351             :         int i;
    3352             : 
    3353           0 :         if (pagedep->pd_state & IOSTARTED) {
    3354             :                 /*
    3355             :                  * This can only happen if there is a driver that does not
    3356             :                  * understand chaining. Here biodone will reissue the call
    3357             :                  * to strategy for the incomplete buffers.
    3358             :                  */
    3359           0 :                 printf("initiate_write_filepage: already started\n");
    3360           0 :                 return;
    3361             :         }
    3362           0 :         pagedep->pd_state |= IOSTARTED;
    3363           0 :         for (i = 0; i < DAHASHSZ; i++) {
    3364           0 :                 LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {
    3365           0 :                         ep = (struct direct *)
    3366           0 :                             ((char *)bp->b_data + dap->da_offset);
    3367           0 :                         if (ep->d_ino != dap->da_newinum) {
    3368           0 :                                 FREE_LOCK(&lk);
    3369           0 :                                 panic("%s: dir inum %u != new %u",
    3370             :                                     "initiate_write_filepage",
    3371           0 :                                     ep->d_ino, dap->da_newinum);
    3372             :                         }
    3373           0 :                         if (dap->da_state & DIRCHG)
    3374           0 :                                 ep->d_ino = dap->da_previous->dm_oldinum;
    3375             :                         else
    3376           0 :                                 ep->d_ino = 0;
    3377           0 :                         dap->da_state &= ~ATTACHED;
    3378           0 :                         dap->da_state |= UNDONE;
    3379             :                 }
    3380             :         }
    3381           0 : }
    3382             : 
    3383             : /*
    3384             :  * Called from within the procedure above to deal with unsatisfied
    3385             :  * allocation dependencies in an inodeblock. The buffer must be
    3386             :  * locked, thus, no I/O completion operations can occur while we
    3387             :  * are manipulating its associated dependencies.
    3388             :  */
    3389             : /* The inode block */
    3390             : STATIC void 
    3391           0 : initiate_write_inodeblock_ufs1(struct inodedep *inodedep, struct buf *bp)
    3392             : {
    3393             :         struct allocdirect *adp, *lastadp;
    3394             :         struct ufs1_dinode *dp;
    3395             :         struct fs *fs;
    3396             : #ifdef DIAGNOSTIC
    3397             :         daddr_t prevlbn = 0;
    3398             :         int32_t d1, d2;
    3399             : #endif
    3400             :         int i, deplist;
    3401             : 
    3402           0 :         if (inodedep->id_state & IOSTARTED) {
    3403           0 :                 FREE_LOCK(&lk);
    3404           0 :                 panic("initiate_write_inodeblock: already started");
    3405             :         }
    3406           0 :         inodedep->id_state |= IOSTARTED;
    3407           0 :         fs = inodedep->id_fs;
    3408           0 :         dp = (struct ufs1_dinode *)bp->b_data +
    3409           0 :             ino_to_fsbo(fs, inodedep->id_ino);
    3410             :         /*
    3411             :          * If the bitmap is not yet written, then the allocated
    3412             :          * inode cannot be written to disk.
    3413             :          */
    3414           0 :         if ((inodedep->id_state & DEPCOMPLETE) == 0) {
    3415           0 :                 if (inodedep->id_savedino1 != NULL) {
    3416             :                         FREE_LOCK(&lk);
    3417           0 :                         panic("initiate_write_inodeblock: already doing I/O");
    3418             :                 }
    3419             :                 FREE_LOCK(&lk);
    3420           0 :                 inodedep->id_savedino1 = malloc(sizeof(struct ufs1_dinode),
    3421             :                     M_INODEDEP, M_WAITOK);
    3422           0 :                 inodedep->id_unsize = sizeof(struct ufs1_dinode);
    3423           0 :                 ACQUIRE_LOCK(&lk);
    3424           0 :                 *inodedep->id_savedino1 = *dp;
    3425           0 :                 memset(dp, 0, sizeof(struct ufs1_dinode));
    3426           0 :                 return;
    3427             :         }
    3428             :         /*
    3429             :          * If no dependencies, then there is nothing to roll back.
    3430             :          */
    3431           0 :         inodedep->id_savedsize = dp->di_size;
    3432           0 :         if (TAILQ_FIRST(&inodedep->id_inoupdt) == NULL)
    3433           0 :                 return;
    3434             :         /*
    3435             :          * Set the dependencies to busy.
    3436             :          */
    3437           0 :         for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
    3438           0 :              adp = TAILQ_NEXT(adp, ad_next)) {
    3439             : #ifdef DIAGNOSTIC
    3440           0 :                 if (deplist != 0 && prevlbn >= adp->ad_lbn) {
    3441           0 :                         FREE_LOCK(&lk);
    3442           0 :                         panic("softdep_write_inodeblock: lbn order");
    3443             :                 }
    3444           0 :                 prevlbn = adp->ad_lbn;
    3445           0 :                 if (adp->ad_lbn < NDADDR &&
    3446           0 :                     (d1 = dp->di_db[adp->ad_lbn]) != (d2 = adp->ad_newblkno)) {
    3447           0 :                         FREE_LOCK(&lk);
    3448           0 :                         panic("%s: direct pointer #%lld mismatch %d != %d",
    3449           0 :                             "softdep_write_inodeblock", (long long)adp->ad_lbn,
    3450             :                             d1, d2);
    3451             :                 }
    3452           0 :                 if (adp->ad_lbn >= NDADDR &&
    3453           0 :                     (d1 = dp->di_ib[adp->ad_lbn - NDADDR]) !=
    3454           0 :                     (d2 = adp->ad_newblkno)) {
    3455           0 :                         FREE_LOCK(&lk);
    3456           0 :                         panic("%s: indirect pointer #%lld mismatch %d != %d",
    3457           0 :                             "softdep_write_inodeblock", (long long)(adp->ad_lbn -
    3458             :                             NDADDR), d1, d2);
    3459             :                 }
    3460           0 :                 deplist |= 1 << adp->ad_lbn;
    3461           0 :                 if ((adp->ad_state & ATTACHED) == 0) {
    3462           0 :                         FREE_LOCK(&lk);
    3463           0 :                         panic("softdep_write_inodeblock: Unknown state 0x%x",
    3464           0 :                             adp->ad_state);
    3465             :                 }
    3466             : #endif /* DIAGNOSTIC */
    3467           0 :                 adp->ad_state &= ~ATTACHED;
    3468           0 :                 adp->ad_state |= UNDONE;
    3469             :         }
    3470             :         /*
    3471             :          * The on-disk inode cannot claim to be any larger than the last
    3472             :          * fragment that has been written. Otherwise, the on-disk inode
    3473             :          * might have fragments that were not the last block in the file
    3474             :          * which would corrupt the filesystem.
    3475             :          */
    3476           0 :         for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
    3477           0 :              lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
    3478           0 :                 if (adp->ad_lbn >= NDADDR)
    3479             :                         break;
    3480           0 :                 dp->di_db[adp->ad_lbn] = adp->ad_oldblkno;
    3481             :                 /* keep going until hitting a rollback to a frag */
    3482           0 :                 if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
    3483             :                         continue;
    3484           0 :                 dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
    3485           0 :                 for (i = adp->ad_lbn + 1; i < NDADDR; i++) {
    3486             : #ifdef DIAGNOSTIC
    3487           0 :                         if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) {
    3488           0 :                                 FREE_LOCK(&lk);
    3489           0 :                                 panic("softdep_write_inodeblock: lost dep1");
    3490             :                         }
    3491             : #endif /* DIAGNOSTIC */
    3492           0 :                         dp->di_db[i] = 0;
    3493             :                 }
    3494           0 :                 for (i = 0; i < NIADDR; i++) {
    3495             : #ifdef DIAGNOSTIC
    3496           0 :                         if (dp->di_ib[i] != 0 &&
    3497           0 :                             (deplist & ((1 << NDADDR) << i)) == 0) {
    3498           0 :                                 FREE_LOCK(&lk);
    3499           0 :                                 panic("softdep_write_inodeblock: lost dep2");
    3500             :                         }
    3501             : #endif /* DIAGNOSTIC */
    3502           0 :                         dp->di_ib[i] = 0;
    3503             :                 }
    3504           0 :                 return;
    3505             :         }
    3506             :         /*
    3507             :          * If we have zero'ed out the last allocated block of the file,
    3508             :          * roll back the size to the last currently allocated block.
    3509             :          * We know that this last allocated block is a full-sized as
    3510             :          * we already checked for fragments in the loop above.
    3511             :          */
    3512           0 :         if (lastadp != NULL &&
    3513           0 :             dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
    3514           0 :                 for (i = lastadp->ad_lbn; i >= 0; i--)
    3515           0 :                         if (dp->di_db[i] != 0)
    3516             :                                 break;
    3517           0 :                 dp->di_size = (i + 1) * fs->fs_bsize;
    3518           0 :         }
    3519             :         /*
    3520             :          * The only dependencies are for indirect blocks.
    3521             :          *
    3522             :          * The file size for indirect block additions is not guaranteed.
    3523             :          * Such a guarantee would be non-trivial to achieve. The conventional
    3524             :          * synchronous write implementation also does not make this guarantee.
    3525             :          * Fsck should catch and fix discrepancies. Arguably, the file size
    3526             :          * can be over-estimated without destroying integrity when the file
    3527             :          * moves into the indirect blocks (i.e., is large). If we want to
    3528             :          * postpone fsck, we are stuck with this argument.
    3529             :          */
    3530           0 :         for (; adp; adp = TAILQ_NEXT(adp, ad_next))
    3531           0 :                 dp->di_ib[adp->ad_lbn - NDADDR] = 0;
    3532           0 : }
    3533             : 
    3534             : #ifdef FFS2
    3535             : /*
    3536             :  * Version of initiate_write_inodeblock that handles FFS2 dinodes.
    3537             :  */
    3538             : /* The inode block */
    3539             : STATIC void
    3540           0 : initiate_write_inodeblock_ufs2(struct inodedep *inodedep, struct buf *bp)
    3541             : {
    3542             :         struct allocdirect *adp, *lastadp;
    3543             :         struct ufs2_dinode *dp;
    3544           0 :         struct fs *fs = inodedep->id_fs;
    3545             : #ifdef DIAGNOSTIC
    3546             :         daddr_t prevlbn = -1, d1, d2;
    3547             : #endif
    3548             :         int deplist, i;
    3549             : 
    3550           0 :         if (inodedep->id_state & IOSTARTED)
    3551           0 :                 panic("initiate_write_inodeblock_ufs2: already started");
    3552           0 :         inodedep->id_state |= IOSTARTED;
    3553           0 :         fs = inodedep->id_fs;
    3554           0 :         dp = (struct ufs2_dinode *)bp->b_data +
    3555           0 :             ino_to_fsbo(fs, inodedep->id_ino);
    3556             :         /*
    3557             :          * If the bitmap is not yet written, then the allocated
    3558             :          * inode cannot be written to disk.
    3559             :          */
    3560           0 :         if ((inodedep->id_state & DEPCOMPLETE) == 0) {
    3561           0 :                 if (inodedep->id_savedino2 != NULL)
    3562           0 :                         panic("initiate_write_inodeblock_ufs2: I/O underway");
    3563           0 :                 inodedep->id_savedino2 = malloc(sizeof(struct ufs2_dinode),
    3564             :                     M_INODEDEP, M_WAITOK);
    3565           0 :                 inodedep->id_unsize = sizeof(struct ufs2_dinode);
    3566           0 :                 *inodedep->id_savedino2 = *dp;
    3567           0 :                 memset(dp, 0, sizeof(struct ufs2_dinode));
    3568           0 :                 return;
    3569             :         }
    3570             :         /*
    3571             :          * If no dependencies, then there is nothing to roll back.
    3572             :          */
    3573           0 :         inodedep->id_savedsize = dp->di_size;
    3574           0 :         if (TAILQ_FIRST(&inodedep->id_inoupdt) == NULL)
    3575           0 :                 return;
    3576             : 
    3577             : #ifdef notyet
    3578             :         inodedep->id_savedextsize = dp->di_extsize;
    3579             :         if (TAILQ_FIRST(&inodedep->id_inoupdt) == NULL &&
    3580             :             TAILQ_FIRST(&inodedep->id_extupdt) == NULL)
    3581             :                 return;
    3582             :         /*
    3583             :          * Set the ext data dependencies to busy.
    3584             :          */
    3585             :         for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp;
    3586             :              adp = TAILQ_NEXT(adp, ad_next)) {
    3587             : #ifdef DIAGNOSTIC
    3588             :                 if (deplist != 0 && prevlbn >= adp->ad_lbn) {
    3589             :                         FREE_LOCK(&lk);
    3590             :                         panic("softdep_write_inodeblock: lbn order");
    3591             :                 }
    3592             :                 prevlbn = adp->ad_lbn;
    3593             :                 if ((d1 = dp->di_extb[adp->ad_lbn]) !=
    3594             :                     (d2 = adp->ad_newblkno)) {
    3595             :                         FREE_LOCK(&lk);
    3596             :                         panic("%s: direct pointer #%lld mismatch %lld != %lld",
    3597             :                             "softdep_write_inodeblock", (long long)adp->ad_lbn,
    3598             :                             d1, d2);
    3599             :                 }
    3600             :                 deplist |= 1 << adp->ad_lbn;
    3601             :                 if ((adp->ad_state & ATTACHED) == 0) {
    3602             :                         FREE_LOCK(&lk);
    3603             :                         panic("softdep_write_inodeblock: Unknown state 0x%x",
    3604             :                             adp->ad_state);
    3605             :                 }
    3606             : #endif /* DIAGNOSTIC */
    3607             :                 adp->ad_state &= ~ATTACHED;
    3608             :                 adp->ad_state |= UNDONE;
    3609             :         }
    3610             :         /*
    3611             :          * The on-disk inode cannot claim to be any larger than the last
    3612             :          * fragment that has been written. Otherwise, the on-disk inode
    3613             :          * might have fragments that were not the last block in the ext
    3614             :          * data which would corrupt the filesystem.
    3615             :          */
    3616             :         for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp;
    3617             :              lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
    3618             :                 dp->di_extb[adp->ad_lbn] = adp->ad_oldblkno;
    3619             :                 /* keep going until hitting a rollback to a frag */
    3620             :                 if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
    3621             :                         continue;
    3622             :                 dp->di_extsize = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
    3623             :                 for (i = adp->ad_lbn + 1; i < NXADDR; i++) {
    3624             : #ifdef DIAGNOSTIC
    3625             :                         if (dp->di_extb[i] != 0 && (deplist & (1 << i)) == 0) {
    3626             :                                 FREE_LOCK(&lk);
    3627             :                                 panic("softdep_write_inodeblock: lost dep1");
    3628             :                         }
    3629             : #endif /* DIAGNOSTIC */
    3630             :                         dp->di_extb[i] = 0;
    3631             :                 }
    3632             :                 lastadp = NULL;
    3633             :                 break;
    3634             :         }
    3635             :         /*
    3636             :          * If we have zero'ed out the last allocated block of the ext
    3637             :          * data, roll back the size to the last currently allocated block.
    3638             :          * We know that this last allocated block is a full-sized as
    3639             :          * we already checked for fragments in the loop above.
    3640             :          */
    3641             :         if (lastadp != NULL &&
    3642             :             dp->di_extsize <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
    3643             :                 for (i = lastadp->ad_lbn; i >= 0; i--)
    3644             :                         if (dp->di_extb[i] != 0)
    3645             :                                 break;
    3646             :                 dp->di_extsize = (i + 1) * fs->fs_bsize;
    3647             :         }
    3648             : #endif /* notyet */
    3649             : 
    3650             :         /*
    3651             :          * Set the file data dependencies to busy.
    3652             :          */
    3653           0 :         for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
    3654           0 :              adp = TAILQ_NEXT(adp, ad_next)) {
    3655             : #ifdef DIAGNOSTIC
    3656           0 :                 if (deplist != 0 && prevlbn >= adp->ad_lbn) {
    3657           0 :                         FREE_LOCK(&lk);
    3658           0 :                         panic("softdep_write_inodeblock: lbn order");
    3659             :                 }
    3660           0 :                 prevlbn = adp->ad_lbn;
    3661           0 :                 if (adp->ad_lbn < NDADDR &&
    3662           0 :                     (d1 = dp->di_db[adp->ad_lbn]) != (d2 = adp->ad_newblkno)) {
    3663           0 :                         FREE_LOCK(&lk);
    3664           0 :                         panic("%s: direct pointer #%lld mismatch %lld != %lld",
    3665           0 :                             "softdep_write_inodeblock", (long long)adp->ad_lbn,
    3666             :                             d1, d2);
    3667             :                 }
    3668           0 :                 if (adp->ad_lbn >= NDADDR &&
    3669           0 :                     (d1 = dp->di_ib[adp->ad_lbn - NDADDR]) !=
    3670           0 :                     (d2 = adp->ad_newblkno)) {
    3671           0 :                         FREE_LOCK(&lk);
    3672           0 :                         panic("%s: indirect pointer #%lld mismatch %lld != %lld",
    3673           0 :                             "softdep_write_inodeblock", (long long)(adp->ad_lbn -
    3674             :                             NDADDR), d1, d2);
    3675             :                 }
    3676           0 :                 deplist |= 1 << adp->ad_lbn;
    3677           0 :                 if ((adp->ad_state & ATTACHED) == 0) {
    3678           0 :                         FREE_LOCK(&lk);
    3679           0 :                         panic("softdep_write_inodeblock: Unknown state 0x%x",
    3680           0 :                             adp->ad_state);
    3681             :                 }
    3682             : #endif /* DIAGNOSTIC */
    3683           0 :                 adp->ad_state &= ~ATTACHED;
    3684           0 :                 adp->ad_state |= UNDONE;
    3685             :         }
    3686             :         /*
    3687             :          * The on-disk inode cannot claim to be any larger than the last
    3688             :          * fragment that has been written. Otherwise, the on-disk inode
    3689             :          * might have fragments that were not the last block in the file
    3690             :          * which would corrupt the filesystem.
    3691             :          */
    3692           0 :         for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
    3693           0 :              lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
    3694           0 :                 if (adp->ad_lbn >= NDADDR)
    3695             :                         break;
    3696           0 :                 dp->di_db[adp->ad_lbn] = adp->ad_oldblkno;
    3697             :                 /* keep going until hitting a rollback to a frag */
    3698           0 :                 if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
    3699             :                         continue;
    3700           0 :                 dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
    3701           0 :                 for (i = adp->ad_lbn + 1; i < NDADDR; i++) {
    3702             : #ifdef DIAGNOSTIC
    3703           0 :                         if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) {
    3704           0 :                                 FREE_LOCK(&lk);
    3705           0 :                                 panic("softdep_write_inodeblock: lost dep2");
    3706             :                         }
    3707             : #endif /* DIAGNOSTIC */
    3708           0 :                         dp->di_db[i] = 0;
    3709             :                 }
    3710           0 :                 for (i = 0; i < NIADDR; i++) {
    3711             : #ifdef DIAGNOSTIC
    3712           0 :                         if (dp->di_ib[i] != 0 &&
    3713           0 :                             (deplist & ((1 << NDADDR) << i)) == 0) {
    3714           0 :                                 FREE_LOCK(&lk);
    3715           0 :                                 panic("softdep_write_inodeblock: lost dep3");
    3716             :                         }
    3717             : #endif /* DIAGNOSTIC */
    3718           0 :                         dp->di_ib[i] = 0;
    3719             :                 }
    3720           0 :                 return;
    3721             :         }
    3722             :         /*
    3723             :          * If we have zero'ed out the last allocated block of the file,
    3724             :          * roll back the size to the last currently allocated block.
    3725             :          * We know that this last allocated block is a full-sized as
    3726             :          * we already checked for fragments in the loop above.
    3727             :          */
    3728           0 :         if (lastadp != NULL &&
    3729           0 :             dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
    3730           0 :                 for (i = lastadp->ad_lbn; i >= 0; i--)
    3731           0 :                         if (dp->di_db[i] != 0)
    3732             :                                 break;
    3733           0 :                 dp->di_size = (i + 1) * fs->fs_bsize;
    3734           0 :         }
    3735             :         /*
    3736             :          * The only dependencies are for indirect blocks.
    3737             :          *
    3738             :          * The file size for indirect block additions is not guaranteed.
    3739             :          * Such a guarantee would be non-trivial to achieve. The conventional
    3740             :          * synchronous write implementation also does not make this guarantee.
    3741             :          * Fsck should catch and fix discrepancies. Arguably, the file size
    3742             :          * can be over-estimated without destroying integrity when the file
    3743             :          * moves into the indirect blocks (i.e., is large). If we want to
    3744             :          * postpone fsck, we are stuck with this argument.
    3745             :          */
    3746           0 :         for (; adp; adp = TAILQ_NEXT(adp, ad_next))
    3747           0 :                 dp->di_ib[adp->ad_lbn - NDADDR] = 0;
    3748           0 : }
    3749             : #endif /* FFS2 */
    3750             : 
    3751             : /*
    3752             :  * This routine is called during the completion interrupt
    3753             :  * service routine for a disk write (from the procedure called
    3754             :  * by the device driver to inform the file system caches of
    3755             :  * a request completion).  It should be called early in this
    3756             :  * procedure, before the block is made available to other
    3757             :  * processes or other routines are called.
    3758             :  */
    3759             : /* describes the completed disk write */
    3760             : void 
    3761           0 : softdep_disk_write_complete(struct buf *bp)
    3762             : {
    3763             :         struct worklist *wk;
    3764           0 :         struct workhead reattach;
    3765             :         struct newblk *newblk;
    3766             :         struct allocindir *aip;
    3767             :         struct allocdirect *adp;
    3768             :         struct indirdep *indirdep;
    3769             :         struct inodedep *inodedep;
    3770             :         struct bmsafemap *bmsafemap;
    3771             : 
    3772             :         /*
    3773             :          * If an error occurred while doing the write, then the data
    3774             :          * has not hit the disk and the dependencies cannot be unrolled.
    3775             :          */
    3776           0 :         if ((bp->b_flags & B_ERROR) && !(bp->b_flags & B_INVAL))
    3777           0 :                 return;
    3778             : 
    3779             : #ifdef DEBUG
    3780             :         if (lk.lkt_held != -1)
    3781             :                 panic("softdep_disk_write_complete: lock is held");
    3782             :         lk.lkt_held = -2;
    3783             : #endif
    3784           0 :         LIST_INIT(&reattach);
    3785           0 :         while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {
    3786           0 :                 WORKLIST_REMOVE(wk);
    3787           0 :                 switch (wk->wk_type) {
    3788             : 
    3789             :                 case D_PAGEDEP:
    3790           0 :                         if (handle_written_filepage(WK_PAGEDEP(wk), bp))
    3791           0 :                                 WORKLIST_INSERT(&reattach, wk);
    3792           0 :                         continue;
    3793             : 
    3794             :                 case D_INODEDEP:
    3795           0 :                         if (handle_written_inodeblock(WK_INODEDEP(wk), bp))
    3796           0 :                                 WORKLIST_INSERT(&reattach, wk);
    3797           0 :                         continue;
    3798             : 
    3799             :                 case D_BMSAFEMAP:
    3800           0 :                         bmsafemap = WK_BMSAFEMAP(wk);
    3801           0 :                         while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkhd))) {
    3802           0 :                                 newblk->nb_state |= DEPCOMPLETE;
    3803           0 :                                 newblk->nb_bmsafemap = NULL;
    3804           0 :                                 LIST_REMOVE(newblk, nb_deps);
    3805             :                         }
    3806           0 :                         while ((adp =
    3807           0 :                            LIST_FIRST(&bmsafemap->sm_allocdirecthd))) {
    3808           0 :                                 adp->ad_state |= DEPCOMPLETE;
    3809           0 :                                 adp->ad_buf = NULL;
    3810           0 :                                 LIST_REMOVE(adp, ad_deps);
    3811           0 :                                 handle_allocdirect_partdone(adp);
    3812             :                         }
    3813           0 :                         while ((aip =
    3814           0 :                             LIST_FIRST(&bmsafemap->sm_allocindirhd))) {
    3815           0 :                                 aip->ai_state |= DEPCOMPLETE;
    3816           0 :                                 aip->ai_buf = NULL;
    3817           0 :                                 LIST_REMOVE(aip, ai_deps);
    3818           0 :                                 handle_allocindir_partdone(aip);
    3819             :                         }
    3820           0 :                         while ((inodedep =
    3821           0 :                              LIST_FIRST(&bmsafemap->sm_inodedephd)) != NULL) {
    3822           0 :                                 inodedep->id_state |= DEPCOMPLETE;
    3823           0 :                                 LIST_REMOVE(inodedep, id_deps);
    3824           0 :                                 inodedep->id_buf = NULL;
    3825             :                         }
    3826           0 :                         WORKITEM_FREE(bmsafemap, D_BMSAFEMAP);
    3827           0 :                         continue;
    3828             : 
    3829             :                 case D_MKDIR:
    3830           0 :                         handle_written_mkdir(WK_MKDIR(wk), MKDIR_BODY);
    3831           0 :                         continue;
    3832             : 
    3833             :                 case D_ALLOCDIRECT:
    3834           0 :                         adp = WK_ALLOCDIRECT(wk);
    3835           0 :                         adp->ad_state |= COMPLETE;
    3836           0 :                         handle_allocdirect_partdone(adp);
    3837           0 :                         continue;
    3838             : 
    3839             :                 case D_ALLOCINDIR:
    3840           0 :                         aip = WK_ALLOCINDIR(wk);
    3841           0 :                         aip->ai_state |= COMPLETE;
    3842           0 :                         handle_allocindir_partdone(aip);
    3843           0 :                         continue;
    3844             : 
    3845             :                 case D_INDIRDEP:
    3846           0 :                         indirdep = WK_INDIRDEP(wk);
    3847           0 :                         if (indirdep->ir_state & GOINGAWAY)
    3848           0 :                                 panic("disk_write_complete: indirdep gone");
    3849           0 :                         memcpy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount);
    3850           0 :                         free(indirdep->ir_saveddata, M_INDIRDEP, bp->b_bcount);
    3851           0 :                         indirdep->ir_saveddata = NULL;
    3852           0 :                         indirdep->ir_state &= ~UNDONE;
    3853           0 :                         indirdep->ir_state |= ATTACHED;
    3854           0 :                         while ((aip = LIST_FIRST(&indirdep->ir_donehd))) {
    3855           0 :                                 handle_allocindir_partdone(aip);
    3856           0 :                                 if (aip == LIST_FIRST(&indirdep->ir_donehd))
    3857           0 :                                         panic("disk_write_complete: not gone");
    3858             :                         }
    3859           0 :                         WORKLIST_INSERT(&reattach, wk);
    3860           0 :                         if ((bp->b_flags & B_DELWRI) == 0)
    3861           0 :                                 stat_indir_blk_ptrs++;
    3862           0 :                         buf_dirty(bp);
    3863           0 :                         continue;
    3864             : 
    3865             :                 default:
    3866           0 :                         panic("handle_disk_write_complete: Unknown type %s",
    3867           0 :                             TYPENAME(wk->wk_type));
    3868             :                         /* NOTREACHED */
    3869             :                 }
    3870             :         }
    3871             :         /*
    3872             :          * Reattach any requests that must be redone.
    3873             :          */
    3874           0 :         while ((wk = LIST_FIRST(&reattach)) != NULL) {
    3875           0 :                 WORKLIST_REMOVE(wk);
    3876           0 :                 WORKLIST_INSERT(&bp->b_dep, wk);
    3877             :         }
    3878             : #ifdef DEBUG
    3879             :         if (lk.lkt_held != -2)
    3880             :                 panic("softdep_disk_write_complete: lock lost");
    3881             :         lk.lkt_held = -1;
    3882             : #endif
    3883           0 : }
    3884             : 
    3885             : /*
    3886             :  * Called from within softdep_disk_write_complete above. Note that
    3887             :  * this routine is always called from interrupt level with further
    3888             :  * splbio interrupts blocked.
    3889             :  */
    3890             : /* the completed allocdirect */
    3891             : STATIC void 
    3892           0 : handle_allocdirect_partdone(struct allocdirect *adp)
    3893             : {
    3894             :         struct allocdirect *listadp;
    3895             :         struct inodedep *inodedep;
    3896             :         long bsize, delay;
    3897             : 
    3898           0 :         splassert(IPL_BIO);
    3899             : 
    3900           0 :         if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
    3901           0 :                 return;
    3902           0 :         if (adp->ad_buf != NULL)
    3903           0 :                 panic("handle_allocdirect_partdone: dangling dep");
    3904             : 
    3905             :         /*
    3906             :          * The on-disk inode cannot claim to be any larger than the last
    3907             :          * fragment that has been written. Otherwise, the on-disk inode
    3908             :          * might have fragments that were not the last block in the file
    3909             :          * which would corrupt the filesystem. Thus, we cannot free any
    3910             :          * allocdirects after one whose ad_oldblkno claims a fragment as
    3911             :          * these blocks must be rolled back to zero before writing the inode.
    3912             :          * We check the currently active set of allocdirects in id_inoupdt.
    3913             :          */
    3914           0 :         inodedep = adp->ad_inodedep;
    3915           0 :         bsize = inodedep->id_fs->fs_bsize;
    3916           0 :         TAILQ_FOREACH(listadp, &inodedep->id_inoupdt, ad_next) {
    3917             :                 /* found our block */
    3918           0 :                 if (listadp == adp)
    3919             :                         break;
    3920             :                 /* continue if ad_oldlbn is not a fragment */
    3921           0 :                 if (listadp->ad_oldsize == 0 ||
    3922           0 :                     listadp->ad_oldsize == bsize)
    3923             :                         continue;
    3924             :                 /* hit a fragment */
    3925           0 :                 return;
    3926             :         }
    3927             :         /*
    3928             :          * If we have reached the end of the current list without
    3929             :          * finding the just finished dependency, then it must be
    3930             :          * on the future dependency list. Future dependencies cannot
    3931             :          * be freed until they are moved to the current list.
    3932             :          */
    3933           0 :         if (listadp == NULL) {
    3934             : #ifdef DEBUG
    3935             :                 TAILQ_FOREACH(listadp, &inodedep->id_newinoupdt, ad_next)
    3936             :                         /* found our block */
    3937             :                         if (listadp == adp)
    3938             :                                 break;
    3939             :                 if (listadp == NULL)
    3940             :                         panic("handle_allocdirect_partdone: lost dep");
    3941             : #endif /* DEBUG */
    3942           0 :                 return;
    3943             :         }
    3944             :         /*
    3945             :          * If we have found the just finished dependency, then free
    3946             :          * it along with anything that follows it that is complete.
    3947             :          * If the inode still has a bitmap dependency, then it has
    3948             :          * never been written to disk, hence the on-disk inode cannot
    3949             :          * reference the old fragment so we can free it without delay.
    3950             :          */
    3951           0 :         delay = (inodedep->id_state & DEPCOMPLETE);
    3952           0 :         for (; adp; adp = listadp) {
    3953           0 :                 listadp = TAILQ_NEXT(adp, ad_next);
    3954           0 :                 if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
    3955           0 :                         return;
    3956           0 :                 free_allocdirect(&inodedep->id_inoupdt, adp, delay);
    3957             :         }
    3958           0 : }
    3959             : 
    3960             : /*
    3961             :  * Called from within softdep_disk_write_complete above. Note that
    3962             :  * this routine is always called from interrupt level with further
    3963             :  * splbio interrupts blocked.
    3964             :  */
    3965             : /* the completed allocindir */
    3966             : STATIC void
    3967           0 : handle_allocindir_partdone(struct allocindir *aip)
    3968             : {
    3969             :         struct indirdep *indirdep;
    3970             : 
    3971           0 :         splassert(IPL_BIO);
    3972             : 
    3973           0 :         if ((aip->ai_state & ALLCOMPLETE) != ALLCOMPLETE)
    3974           0 :                 return;
    3975           0 :         if (aip->ai_buf != NULL)
    3976           0 :                 panic("handle_allocindir_partdone: dangling dependency");
    3977           0 :         indirdep = aip->ai_indirdep;
    3978           0 :         if (indirdep->ir_state & UNDONE) {
    3979           0 :                 LIST_REMOVE(aip, ai_next);
    3980           0 :                 LIST_INSERT_HEAD(&indirdep->ir_donehd, aip, ai_next);
    3981           0 :                 return;
    3982             :         }
    3983           0 :         if (indirdep->ir_state & UFS1FMT)
    3984           0 :                 ((int32_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
    3985           0 :                     aip->ai_newblkno;
    3986             :         else
    3987           0 :                 ((int64_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
    3988             :                     aip->ai_newblkno;
    3989           0 :         LIST_REMOVE(aip, ai_next);
    3990           0 :         if (aip->ai_freefrag != NULL)
    3991           0 :                 add_to_worklist(&aip->ai_freefrag->ff_list);
    3992           0 :         WORKITEM_FREE(aip, D_ALLOCINDIR);
    3993           0 : }
    3994             : 
    3995             : /*
    3996             :  * Called from within softdep_disk_write_complete above to restore
    3997             :  * in-memory inode block contents to their most up-to-date state. Note
    3998             :  * that this routine is always called from interrupt level with further
    3999             :  * splbio interrupts blocked.
    4000             :  */
    4001             : /* buffer containing the inode block */
    4002             : STATIC int 
    4003           0 : handle_written_inodeblock(struct inodedep *inodedep, struct buf *bp)
    4004             : {
    4005             :         struct worklist *wk, *filefree;
    4006             :         struct allocdirect *adp, *nextadp;
    4007             :         struct ufs1_dinode *dp1 = NULL;
    4008             :         struct ufs2_dinode *dp2 = NULL;
    4009             :         int hadchanges, fstype;
    4010             : 
    4011           0 :         splassert(IPL_BIO);
    4012             : 
    4013           0 :         if ((inodedep->id_state & IOSTARTED) == 0)
    4014           0 :                 panic("handle_written_inodeblock: not started");
    4015           0 :         inodedep->id_state &= ~IOSTARTED;
    4016             : 
    4017           0 :         if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC) {
    4018             :                 fstype = UM_UFS1;
    4019           0 :                 dp1 = (struct ufs1_dinode *) bp->b_data +
    4020           0 :                     ino_to_fsbo(inodedep->id_fs, inodedep->id_ino);
    4021           0 :         } else {
    4022             :                 fstype = UM_UFS2;
    4023           0 :                 dp2 = (struct ufs2_dinode *) bp->b_data +
    4024           0 :                     ino_to_fsbo(inodedep->id_fs, inodedep->id_ino);
    4025             :         }
    4026             : 
    4027             :         /*
    4028             :          * If we had to rollback the inode allocation because of
    4029             :          * bitmaps being incomplete, then simply restore it.
    4030             :          * Keep the block dirty so that it will not be reclaimed until
    4031             :          * all associated dependencies have been cleared and the
    4032             :          * corresponding updates written to disk.
    4033             :          */
    4034           0 :         if (inodedep->id_savedino1 != NULL) {
    4035           0 :                 if (fstype == UM_UFS1)
    4036           0 :                         *dp1 = *inodedep->id_savedino1;
    4037             :                 else
    4038           0 :                         *dp2 = *inodedep->id_savedino2;
    4039           0 :                 free(inodedep->id_savedino1, M_INODEDEP, inodedep->id_unsize);
    4040           0 :                 inodedep->id_savedino1 = NULL;
    4041           0 :                 if ((bp->b_flags & B_DELWRI) == 0)
    4042           0 :                         stat_inode_bitmap++;
    4043           0 :                 buf_dirty(bp);
    4044           0 :                 return (1);
    4045             :         }
    4046           0 :         inodedep->id_state |= COMPLETE;
    4047             :         /*
    4048             :          * Roll forward anything that had to be rolled back before 
    4049             :          * the inode could be updated.
    4050             :          */
    4051             :         hadchanges = 0;
    4052           0 :         for (adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; adp = nextadp) {
    4053           0 :                 nextadp = TAILQ_NEXT(adp, ad_next);
    4054           0 :                 if (adp->ad_state & ATTACHED)
    4055           0 :                         panic("handle_written_inodeblock: new entry");
    4056           0 :                 if (fstype == UM_UFS1) {
    4057           0 :                         if (adp->ad_lbn < NDADDR) {
    4058           0 :                                 if (dp1->di_db[adp->ad_lbn] != adp->ad_oldblkno)
    4059           0 :                                          panic("%s: %s #%lld mismatch %d != "
    4060             :                                              "%lld",
    4061             :                                              "handle_written_inodeblock",
    4062             :                                              "direct pointer",
    4063             :                                              (long long)adp->ad_lbn,
    4064             :                                              dp1->di_db[adp->ad_lbn],
    4065             :                                              (long long)adp->ad_oldblkno);
    4066           0 :                                 dp1->di_db[adp->ad_lbn] = adp->ad_newblkno;
    4067           0 :                         } else {
    4068           0 :                                 if (dp1->di_ib[adp->ad_lbn - NDADDR] != 0)
    4069           0 :                                         panic("%s: %s #%lld allocated as %d",
    4070             :                                             "handle_written_inodeblock",
    4071             :                                             "indirect pointer",
    4072             :                                             (long long)(adp->ad_lbn - NDADDR),
    4073             :                                             dp1->di_ib[adp->ad_lbn - NDADDR]);
    4074           0 :                                 dp1->di_ib[adp->ad_lbn - NDADDR] =
    4075           0 :                                    adp->ad_newblkno;
    4076             :                         }
    4077             :                 } else {
    4078           0 :                         if (adp->ad_lbn < NDADDR) {
    4079           0 :                                 if (dp2->di_db[adp->ad_lbn] != adp->ad_oldblkno)
    4080           0 :                                         panic("%s: %s #%lld mismatch %lld != "
    4081             :                                             "%lld", "handle_written_inodeblock",
    4082             :                                             "direct pointer",
    4083             :                                             (long long)adp->ad_lbn,
    4084             :                                             dp2->di_db[adp->ad_lbn],
    4085             :                                             (long long)adp->ad_oldblkno);
    4086           0 :                                 dp2->di_db[adp->ad_lbn] = adp->ad_newblkno;
    4087           0 :                         } else {
    4088           0 :                                 if (dp2->di_ib[adp->ad_lbn - NDADDR] != 0)
    4089           0 :                                         panic("%s: %s #%lld allocated as %lld",
    4090             :                                             "handle_written_inodeblock",
    4091             :                                             "indirect pointer",
    4092             :                                             (long long)(adp->ad_lbn - NDADDR),
    4093             :                                             dp2->di_ib[adp->ad_lbn - NDADDR]);
    4094           0 :                                 dp2->di_ib[adp->ad_lbn - NDADDR] =
    4095           0 :                                     adp->ad_newblkno;
    4096             :                         }
    4097             :                 }
    4098           0 :                 adp->ad_state &= ~UNDONE;
    4099           0 :                 adp->ad_state |= ATTACHED;
    4100             :                 hadchanges = 1;
    4101             :         }
    4102           0 :         if (hadchanges && (bp->b_flags & B_DELWRI) == 0)
    4103           0 :                 stat_direct_blk_ptrs++;
    4104             :         /*
    4105             :          * Reset the file size to its most up-to-date value.
    4106             :          */
    4107           0 :         if (inodedep->id_savedsize == -1)
    4108           0 :                 panic("handle_written_inodeblock: bad size");
    4109             :         
    4110           0 :         if (fstype == UM_UFS1) {
    4111           0 :                 if (dp1->di_size != inodedep->id_savedsize) {
    4112           0 :                         dp1->di_size = inodedep->id_savedsize;
    4113             :                         hadchanges = 1;
    4114           0 :                 }
    4115             :         } else {
    4116           0 :                 if (dp2->di_size != inodedep->id_savedsize) {
    4117           0 :                         dp2->di_size = inodedep->id_savedsize;
    4118             :                         hadchanges = 1;
    4119           0 :                 }
    4120             :         }
    4121           0 :         inodedep->id_savedsize = -1;
    4122             :         /*
    4123             :          * If there were any rollbacks in the inode block, then it must be
    4124             :          * marked dirty so that its will eventually get written back in
    4125             :          * its correct form.
    4126             :          */
    4127           0 :         if (hadchanges)
    4128           0 :                 buf_dirty(bp);
    4129             :         /*
    4130             :          * Process any allocdirects that completed during the update.
    4131             :          */
    4132           0 :         if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
    4133           0 :                 handle_allocdirect_partdone(adp);
    4134             :         /*
    4135             :          * Process deallocations that were held pending until the
    4136             :          * inode had been written to disk. Freeing of the inode
    4137             :          * is delayed until after all blocks have been freed to
    4138             :          * avoid creation of new <vfsid, inum, lbn> triples
    4139             :          * before the old ones have been deleted.
    4140             :          */
    4141             :         filefree = NULL;
    4142           0 :         while ((wk = LIST_FIRST(&inodedep->id_bufwait)) != NULL) {
    4143           0 :                 WORKLIST_REMOVE(wk);
    4144           0 :                 switch (wk->wk_type) {
    4145             : 
    4146             :                 case D_FREEFILE:
    4147             :                         /*
    4148             :                          * We defer adding filefree to the worklist until
    4149             :                          * all other additions have been made to ensure
    4150             :                          * that it will be done after all the old blocks
    4151             :                          * have been freed.
    4152             :                          */
    4153           0 :                         if (filefree != NULL)
    4154           0 :                                 panic("handle_written_inodeblock: filefree");
    4155             :                         filefree = wk;
    4156           0 :                         continue;
    4157             : 
    4158             :                 case D_MKDIR:
    4159           0 :                         handle_written_mkdir(WK_MKDIR(wk), MKDIR_PARENT);
    4160           0 :                         continue;
    4161             : 
    4162             :                 case D_DIRADD:
    4163           0 :                         diradd_inode_written(WK_DIRADD(wk), inodedep);
    4164           0 :                         continue;
    4165             : 
    4166             :                 case D_FREEBLKS:
    4167           0 :                         wk->wk_state |= COMPLETE;
    4168           0 :                         if ((wk->wk_state & ALLCOMPLETE) != ALLCOMPLETE)
    4169           0 :                                 continue;
    4170             :                         /* FALLTHROUGH */
    4171             :                 case D_FREEFRAG:
    4172             :                 case D_DIRREM:
    4173           0 :                         add_to_worklist(wk);
    4174           0 :                         continue;
    4175             : 
    4176             :                 case D_NEWDIRBLK:
    4177           0 :                         free_newdirblk(WK_NEWDIRBLK(wk));
    4178           0 :                         continue;
    4179             : 
    4180             :                 default:
    4181           0 :                         panic("handle_written_inodeblock: Unknown type %s",
    4182           0 :                             TYPENAME(wk->wk_type));
    4183             :                         /* NOTREACHED */
    4184             :                 }
    4185             :         }
    4186           0 :         if (filefree != NULL) {
    4187           0 :                 if (free_inodedep(inodedep) == 0)
    4188           0 :                         panic("handle_written_inodeblock: live inodedep");
    4189           0 :                 add_to_worklist(filefree);
    4190           0 :                 return (0);
    4191             :         }
    4192             : 
    4193             :         /*
    4194             :          * If no outstanding dependencies, free it.
    4195             :          */
    4196           0 :         if (free_inodedep(inodedep) ||
    4197           0 :             TAILQ_FIRST(&inodedep->id_inoupdt) == NULL)
    4198           0 :                 return (0);
    4199           0 :         return (hadchanges);
    4200           0 : }
    4201             : 
    4202             : /*
    4203             :  * Process a diradd entry after its dependent inode has been written.
    4204             :  * This routine must be called with splbio interrupts blocked.
    4205             :  */
    4206             : STATIC void
    4207           0 : diradd_inode_written(struct diradd *dap, struct inodedep *inodedep)
    4208             : {
    4209             :         struct pagedep *pagedep;
    4210             : 
    4211           0 :         splassert(IPL_BIO);
    4212             : 
    4213           0 :         dap->da_state |= COMPLETE;
    4214           0 :         if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
    4215           0 :                 if (dap->da_state & DIRCHG)
    4216           0 :                         pagedep = dap->da_previous->dm_pagedep;
    4217             :                 else
    4218           0 :                         pagedep = dap->da_pagedep;
    4219           0 :                 LIST_REMOVE(dap, da_pdlist);
    4220           0 :                 LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
    4221           0 :         }
    4222           0 :         WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
    4223           0 : }
    4224             : 
    4225             : /*
    4226             :  * Handle the completion of a mkdir dependency.
    4227             :  */
    4228             : STATIC void
    4229           0 : handle_written_mkdir(struct mkdir *mkdir, int type)
    4230             : {
    4231             :         struct diradd *dap;
    4232             :         struct pagedep *pagedep;
    4233             : 
    4234           0 :         splassert(IPL_BIO);
    4235             : 
    4236           0 :         if (mkdir->md_state != type)
    4237           0 :                 panic("handle_written_mkdir: bad type");
    4238           0 :         dap = mkdir->md_diradd;
    4239           0 :         dap->da_state &= ~type;
    4240           0 :         if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0)
    4241           0 :                 dap->da_state |= DEPCOMPLETE;
    4242           0 :         if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
    4243           0 :                 if (dap->da_state & DIRCHG)
    4244           0 :                         pagedep = dap->da_previous->dm_pagedep;
    4245             :                 else
    4246           0 :                         pagedep = dap->da_pagedep;
    4247           0 :                 LIST_REMOVE(dap, da_pdlist);
    4248           0 :                 LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
    4249           0 :         }
    4250           0 :         LIST_REMOVE(mkdir, md_mkdirs);
    4251           0 :         WORKITEM_FREE(mkdir, D_MKDIR);
    4252           0 : }
    4253             : 
    4254             : /*
    4255             :  * Called from within softdep_disk_write_complete above.
    4256             :  * A write operation was just completed. Removed inodes can
    4257             :  * now be freed and associated block pointers may be committed.
    4258             :  * Note that this routine is always called from interrupt level
    4259             :  * with further splbio interrupts blocked.
    4260             :  */
    4261             : /* buffer containing the written page */
    4262             : STATIC int 
    4263           0 : handle_written_filepage(struct pagedep *pagedep, struct buf *bp)
    4264             : {
    4265             :         struct dirrem *dirrem;
    4266             :         struct diradd *dap, *nextdap;
    4267             :         struct direct *ep;
    4268             :         int i, chgs;
    4269             : 
    4270           0 :         splassert(IPL_BIO);
    4271             : 
    4272           0 :         if ((pagedep->pd_state & IOSTARTED) == 0)
    4273           0 :                 panic("handle_written_filepage: not started");
    4274           0 :         pagedep->pd_state &= ~IOSTARTED;
    4275             :         /*
    4276             :          * Process any directory removals that have been committed.
    4277             :          */
    4278           0 :         while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd)) != NULL) {
    4279           0 :                 LIST_REMOVE(dirrem, dm_next);
    4280           0 :                 dirrem->dm_dirinum = pagedep->pd_ino;
    4281           0 :                 add_to_worklist(&dirrem->dm_list);
    4282             :         }
    4283             :         /*
    4284             :          * Free any directory additions that have been committed.
    4285             :          * If it is a newly allocated block, we have to wait until
    4286             :          * the on-disk directory inode claims the new block.
    4287             :          */
    4288           0 :         if ((pagedep->pd_state & NEWBLOCK) == 0)
    4289           0 :                 while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
    4290           0 :                         free_diradd(dap);
    4291             :         /*
    4292             :          * Uncommitted directory entries must be restored.
    4293             :          */
    4294           0 :         for (chgs = 0, i = 0; i < DAHASHSZ; i++) {
    4295           0 :                 for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]); dap;
    4296             :                      dap = nextdap) {
    4297           0 :                         nextdap = LIST_NEXT(dap, da_pdlist);
    4298           0 :                         if (dap->da_state & ATTACHED)
    4299           0 :                                 panic("handle_written_filepage: attached");
    4300           0 :                         ep = (struct direct *)
    4301           0 :                             ((char *)bp->b_data + dap->da_offset);
    4302           0 :                         ep->d_ino = dap->da_newinum;
    4303           0 :                         dap->da_state &= ~UNDONE;
    4304           0 :                         dap->da_state |= ATTACHED;
    4305             :                         chgs = 1;
    4306             :                         /*
    4307             :                          * If the inode referenced by the directory has
    4308             :                          * been written out, then the dependency can be
    4309             :                          * moved to the pending list.
    4310             :                          */
    4311           0 :                         if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
    4312           0 :                                 LIST_REMOVE(dap, da_pdlist);
    4313           0 :                                 LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap,
    4314             :                                     da_pdlist);
    4315           0 :                         }
    4316             :                 }
    4317             :         }
    4318             :         /*
    4319             :          * If there were any rollbacks in the directory, then it must be
    4320             :          * marked dirty so that its will eventually get written back in
    4321             :          * its correct form.
    4322             :          */
    4323           0 :         if (chgs) {
    4324           0 :                 if ((bp->b_flags & B_DELWRI) == 0)
    4325           0 :                         stat_dir_entry++;
    4326           0 :                 buf_dirty(bp);
    4327           0 :                 return (1);
    4328             :         }
    4329             :         /*
    4330             :          * If we are not waiting for a new directory block to be
    4331             :          * claimed by its inode, then the pagedep will be freed.
    4332             :          * Otherwise it will remain to track any new entries on
    4333             :          * the page in case they are fsync'ed.
    4334             :          */
    4335           0 :         if ((pagedep->pd_state & NEWBLOCK) == 0) {
    4336           0 :                 LIST_REMOVE(pagedep, pd_hash);
    4337           0 :                 WORKITEM_FREE(pagedep, D_PAGEDEP);
    4338           0 :         }
    4339           0 :         return (0);
    4340           0 : }
    4341             : 
    4342             : /*
    4343             :  * Writing back in-core inode structures.
    4344             :  * 
    4345             :  * The file system only accesses an inode's contents when it occupies an
    4346             :  * "in-core" inode structure.  These "in-core" structures are separate from
    4347             :  * the page frames used to cache inode blocks.  Only the latter are
    4348             :  * transferred to/from the disk.  So, when the updated contents of the
    4349             :  * "in-core" inode structure are copied to the corresponding in-memory inode
    4350             :  * block, the dependencies are also transferred.  The following procedure is
    4351             :  * called when copying a dirty "in-core" inode to a cached inode block.
    4352             :  */
    4353             : 
    4354             : /*
    4355             :  * Called when an inode is loaded from disk. If the effective link count
    4356             :  * differed from the actual link count when it was last flushed, then we
    4357             :  * need to ensure that the correct effective link count is put back.
    4358             :  */
    4359             : /* the "in_core" copy of the inode */
    4360             : void 
    4361           0 : softdep_load_inodeblock(struct inode *ip)
    4362             : {
    4363           0 :         struct inodedep *inodedep;
    4364             : 
    4365             :         /*
    4366             :          * Check for alternate nlink count.
    4367             :          */
    4368           0 :         ip->i_effnlink = DIP(ip, nlink);
    4369           0 :         ACQUIRE_LOCK(&lk);
    4370           0 :         if (inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep) == 0) {
    4371           0 :                 FREE_LOCK(&lk);
    4372           0 :                 return;
    4373             :         }
    4374           0 :         ip->i_effnlink -= inodedep->id_nlinkdelta;
    4375           0 :         FREE_LOCK(&lk);
    4376           0 : }
    4377             : 
    4378             : /*
    4379             :  * This routine is called just before the "in-core" inode
    4380             :  * information is to be copied to the in-memory inode block.
    4381             :  * Recall that an inode block contains several inodes. If
    4382             :  * the force flag is set, then the dependencies will be
    4383             :  * cleared so that the update can always be made. Note that
    4384             :  * the buffer is locked when this routine is called, so we
    4385             :  * will never be in the middle of writing the inode block 
    4386             :  * to disk.
    4387             :  */
    4388             : /* the "in_core" copy of the inode */
    4389             : /* the buffer containing the inode block */
    4390             : /* nonzero => update must be allowed */
    4391             : void 
    4392           0 : softdep_update_inodeblock(struct inode *ip, struct buf *bp, int waitfor)
    4393             : {
    4394           0 :         struct inodedep *inodedep;
    4395             :         struct worklist *wk;
    4396             :         int error, gotit;
    4397             : 
    4398             :         /*
    4399             :          * If the effective link count is not equal to the actual link
    4400             :          * count, then we must track the difference in an inodedep while
    4401             :          * the inode is (potentially) tossed out of the cache. Otherwise,
    4402             :          * if there is no existing inodedep, then there are no dependencies
    4403             :          * to track.
    4404             :          */
    4405           0 :         ACQUIRE_LOCK(&lk);
    4406           0 :         if (inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep) == 0) {
    4407           0 :                 FREE_LOCK(&lk);
    4408           0 :                 if (ip->i_effnlink != DIP(ip, nlink))
    4409           0 :                         panic("softdep_update_inodeblock: bad link count");
    4410           0 :                 return;
    4411             :         }
    4412           0 :         if (inodedep->id_nlinkdelta != DIP(ip, nlink) - ip->i_effnlink) {
    4413           0 :                 FREE_LOCK(&lk);
    4414           0 :                 panic("softdep_update_inodeblock: bad delta");
    4415             :         }
    4416             :         /*
    4417             :          * Changes have been initiated. Anything depending on these
    4418             :          * changes cannot occur until this inode has been written.
    4419             :          */
    4420           0 :         inodedep->id_state &= ~COMPLETE;
    4421           0 :         if ((inodedep->id_state & ONWORKLIST) == 0)
    4422           0 :                 WORKLIST_INSERT(&bp->b_dep, &inodedep->id_list);
    4423             :         /*
    4424             :          * Any new dependencies associated with the incore inode must 
    4425             :          * now be moved to the list associated with the buffer holding
    4426             :          * the in-memory copy of the inode. Once merged process any
    4427             :          * allocdirects that are completed by the merger.
    4428             :          */
    4429           0 :         merge_inode_lists(inodedep);
    4430           0 :         if (TAILQ_FIRST(&inodedep->id_inoupdt) != NULL)
    4431           0 :                 handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt));
    4432             :         /*
    4433             :          * Now that the inode has been pushed into the buffer, the
    4434             :          * operations dependent on the inode being written to disk
    4435             :          * can be moved to the id_bufwait so that they will be
    4436             :          * processed when the buffer I/O completes.
    4437             :          */
    4438           0 :         while ((wk = LIST_FIRST(&inodedep->id_inowait)) != NULL) {
    4439           0 :                 WORKLIST_REMOVE(wk);
    4440           0 :                 WORKLIST_INSERT(&inodedep->id_bufwait, wk);
    4441             :         }
    4442             :         /*
    4443             :          * Newly allocated inodes cannot be written until the bitmap
    4444             :          * that allocates them have been written (indicated by
    4445             :          * DEPCOMPLETE being set in id_state). If we are doing a
    4446             :          * forced sync (e.g., an fsync on a file), we force the bitmap
    4447             :          * to be written so that the update can be done.
    4448             :          */
    4449           0 :         do {
    4450           0 :                 if ((inodedep->id_state & DEPCOMPLETE) != 0 || waitfor == 0) {
    4451           0 :                         FREE_LOCK(&lk);
    4452           0 :                         return;
    4453             :                 }
    4454           0 :                 bp = inodedep->id_buf;
    4455           0 :                 gotit = getdirtybuf(bp, MNT_WAIT);
    4456           0 :         } while (gotit == -1);
    4457           0 :         FREE_LOCK(&lk);
    4458           0 :         if (gotit && (error = bwrite(bp)) != 0)
    4459           0 :                 softdep_error("softdep_update_inodeblock: bwrite", error);
    4460           0 :         if ((inodedep->id_state & DEPCOMPLETE) == 0)
    4461           0 :                 panic("softdep_update_inodeblock: update failed");
    4462           0 : }
    4463             : 
    4464             : /*
    4465             :  * Merge the new inode dependency list (id_newinoupdt) into the old
    4466             :  * inode dependency list (id_inoupdt). This routine must be called
    4467             :  * with splbio interrupts blocked.
    4468             :  */
    4469             : STATIC void
    4470           0 : merge_inode_lists(struct inodedep *inodedep)
    4471             : {
    4472             :         struct allocdirect *listadp, *newadp;
    4473             : 
    4474           0 :         splassert(IPL_BIO);
    4475             : 
    4476           0 :         newadp = TAILQ_FIRST(&inodedep->id_newinoupdt);
    4477           0 :         for (listadp = TAILQ_FIRST(&inodedep->id_inoupdt); listadp && newadp;) {
    4478           0 :                 if (listadp->ad_lbn < newadp->ad_lbn) {
    4479           0 :                         listadp = TAILQ_NEXT(listadp, ad_next);
    4480           0 :                         continue;
    4481             :                 }
    4482           0 :                 TAILQ_REMOVE(&inodedep->id_newinoupdt, newadp, ad_next);
    4483           0 :                 TAILQ_INSERT_BEFORE(listadp, newadp, ad_next);
    4484           0 :                 if (listadp->ad_lbn == newadp->ad_lbn) {
    4485           0 :                         allocdirect_merge(&inodedep->id_inoupdt, newadp,
    4486             :                             listadp);
    4487             :                         listadp = newadp;
    4488           0 :                 }
    4489           0 :                 newadp = TAILQ_FIRST(&inodedep->id_newinoupdt);
    4490             :         }
    4491           0 :         while ((newadp = TAILQ_FIRST(&inodedep->id_newinoupdt)) != NULL) {
    4492           0 :                 TAILQ_REMOVE(&inodedep->id_newinoupdt, newadp, ad_next);
    4493           0 :                 TAILQ_INSERT_TAIL(&inodedep->id_inoupdt, newadp, ad_next);
    4494             :         }
    4495           0 : }
    4496             : 
    4497             : /*
    4498             :  * If we are doing an fsync, then we must ensure that any directory
    4499             :  * entries for the inode have been written after the inode gets to disk.
    4500             :  */
    4501             : /* the "in_core" copy of the inode */
    4502             : int
    4503           0 : softdep_fsync(struct vnode *vp)
    4504             : {
    4505           0 :         struct inodedep *inodedep;
    4506             :         struct pagedep *pagedep;
    4507             :         struct worklist *wk;
    4508             :         struct diradd *dap;
    4509             :         struct mount *mnt;
    4510           0 :         struct vnode *pvp;
    4511             :         struct inode *ip;
    4512             :         struct inode *pip;
    4513           0 :         struct buf *bp;
    4514             :         struct fs *fs;
    4515           0 :         struct proc *p = CURPROC;               /* XXX */
    4516             :         int error, flushparent;
    4517             :         ufsino_t parentino;
    4518             :         daddr_t lbn;
    4519             : 
    4520           0 :         ip = VTOI(vp);
    4521           0 :         fs = ip->i_fs;
    4522           0 :         ACQUIRE_LOCK(&lk);
    4523           0 :         if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0) {
    4524           0 :                 FREE_LOCK(&lk);
    4525           0 :                 return (0);
    4526             :         }
    4527           0 :         if (LIST_FIRST(&inodedep->id_inowait) != NULL ||
    4528           0 :             LIST_FIRST(&inodedep->id_bufwait) != NULL ||
    4529           0 :             TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
    4530           0 :             TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL) {
    4531           0 :                 FREE_LOCK(&lk);
    4532           0 :                 panic("softdep_fsync: pending ops");
    4533             :         }
    4534           0 :         for (error = 0, flushparent = 0; ; ) {
    4535           0 :                 if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) == NULL)
    4536             :                         break;
    4537           0 :                 if (wk->wk_type != D_DIRADD) {
    4538           0 :                         FREE_LOCK(&lk);
    4539           0 :                         panic("softdep_fsync: Unexpected type %s",
    4540           0 :                             TYPENAME(wk->wk_type));
    4541             :                 }
    4542           0 :                 dap = WK_DIRADD(wk);
    4543             :                 /*
    4544             :                  * Flush our parent if this directory entry has a MKDIR_PARENT
    4545             :                  * dependency or is contained in a newly allocated block.
    4546             :                  */
    4547           0 :                 if (dap->da_state & DIRCHG)
    4548           0 :                         pagedep = dap->da_previous->dm_pagedep;
    4549             :                 else
    4550           0 :                         pagedep = dap->da_pagedep;
    4551           0 :                 mnt = pagedep->pd_mnt;
    4552           0 :                 parentino = pagedep->pd_ino;
    4553           0 :                 lbn = pagedep->pd_lbn;
    4554           0 :                 if ((dap->da_state & (MKDIR_BODY | COMPLETE)) != COMPLETE) {
    4555           0 :                         FREE_LOCK(&lk);
    4556           0 :                         panic("softdep_fsync: dirty");
    4557             :                 }
    4558           0 :                 if ((dap->da_state & MKDIR_PARENT) ||
    4559           0 :                     (pagedep->pd_state & NEWBLOCK))
    4560           0 :                         flushparent = 1;
    4561             :                 else
    4562             :                         flushparent = 0;
    4563             :                 /*
    4564             :                  * If we are being fsync'ed as part of vgone'ing this vnode,
    4565             :                  * then we will not be able to release and recover the
    4566             :                  * vnode below, so we just have to give up on writing its
    4567             :                  * directory entry out. It will eventually be written, just
    4568             :                  * not now, but then the user was not asking to have it
    4569             :                  * written, so we are not breaking any promises.
    4570             :                  */
    4571           0 :                 if (vp->v_flag & VXLOCK)
    4572             :                         break;
    4573             :                 /*
    4574             :                  * We prevent deadlock by always fetching inodes from the
    4575             :                  * root, moving down the directory tree. Thus, when fetching
    4576             :                  * our parent directory, we must unlock ourselves before
    4577             :                  * requesting the lock on our parent. See the comment in
    4578             :                  * ufs_lookup for details on possible races.
    4579             :                  */
    4580           0 :                 FREE_LOCK(&lk);
    4581           0 :                 VOP_UNLOCK(vp);
    4582           0 :                 error = VFS_VGET(mnt, parentino, &pvp);
    4583           0 :                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    4584           0 :                 if (error != 0)
    4585           0 :                         return (error);
    4586             :                 /*
    4587             :                  * All MKDIR_PARENT dependencies and all the NEWBLOCK pagedeps
    4588             :                  * that are contained in direct blocks will be resolved by 
    4589             :                  * doing a UFS_UPDATE. Pagedeps contained in indirect blocks
    4590             :                  * may require a complete sync'ing of the directory. So, we
    4591             :                  * try the cheap and fast UFS_UPDATE first, and if that fails,
    4592             :                  * then we do the slower VOP_FSYNC of the directory.
    4593             :                  */
    4594           0 :                 pip = VTOI(pvp);
    4595           0 :                 if (flushparent) {
    4596           0 :                         error = UFS_UPDATE(pip, 1);
    4597           0 :                         if (error) {
    4598           0 :                                 vput(pvp);
    4599           0 :                                 return (error);
    4600             :                         }
    4601           0 :                         if (pagedep->pd_state & NEWBLOCK) {
    4602           0 :                                 error = VOP_FSYNC(pvp, p->p_ucred, MNT_WAIT, p);
    4603           0 :                                 if (error) {
    4604           0 :                                         vput(pvp);
    4605           0 :                                         return (error);
    4606             :                                 }
    4607             :                         }
    4608             :                 }
    4609             :                 /*
    4610             :                  * Flush directory page containing the inode's name.
    4611             :                  */
    4612           0 :                 error = bread(pvp, lbn, fs->fs_bsize, &bp);
    4613           0 :                 if (error == 0) {
    4614           0 :                         bp->b_bcount = blksize(fs, pip, lbn);
    4615           0 :                         error = bwrite(bp);
    4616           0 :                 } else
    4617           0 :                         brelse(bp);
    4618           0 :                 vput(pvp);
    4619           0 :                 if (error != 0)
    4620           0 :                         return (error);
    4621           0 :                 ACQUIRE_LOCK(&lk);
    4622           0 :                 if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0)
    4623             :                         break;
    4624             :         }
    4625           0 :         FREE_LOCK(&lk);
    4626           0 :         return (0);
    4627           0 : }
    4628             : 
    4629             : /*
    4630             :  * Flush all the dirty bitmaps associated with the block device
    4631             :  * before flushing the rest of the dirty blocks so as to reduce
    4632             :  * the number of dependencies that will have to be rolled back.
    4633             :  */
    4634             : void
    4635           0 : softdep_fsync_mountdev(struct vnode *vp, int waitfor)
    4636             : {
    4637             :         struct buf *bp, *nbp;
    4638             :         struct worklist *wk;
    4639             : 
    4640           0 :         if (!vn_isdisk(vp, NULL))
    4641           0 :                 panic("softdep_fsync_mountdev: vnode not a disk");
    4642           0 :         ACQUIRE_LOCK(&lk);
    4643           0 :         LIST_FOREACH_SAFE(bp, &vp->v_dirtyblkhd, b_vnbufs, nbp) {
    4644             :                 /* 
    4645             :                  * If it is already scheduled, skip to the next buffer.
    4646             :                  */
    4647           0 :                 splassert(IPL_BIO);
    4648           0 :                 if (bp->b_flags & B_BUSY)
    4649             :                         continue;
    4650             : 
    4651           0 :                 if ((bp->b_flags & B_DELWRI) == 0) {
    4652           0 :                         FREE_LOCK(&lk);
    4653           0 :                         panic("softdep_fsync_mountdev: not dirty");
    4654             :                 }
    4655             :                 /*
    4656             :                  * We are only interested in bitmaps with outstanding
    4657             :                  * dependencies.
    4658             :                  */
    4659           0 :                 if ((wk = LIST_FIRST(&bp->b_dep)) == NULL ||
    4660           0 :                     wk->wk_type != D_BMSAFEMAP) {
    4661             :                         continue;
    4662             :                 }
    4663           0 :                 bremfree(bp);
    4664           0 :                 buf_acquire(bp);
    4665           0 :                 FREE_LOCK(&lk);
    4666           0 :                 (void) bawrite(bp);
    4667           0 :                 ACQUIRE_LOCK(&lk);
    4668             :                 /*
    4669             :                  * Since we may have slept during the I/O, we need 
    4670             :                  * to start from a known point.
    4671             :                  */
    4672           0 :                 nbp = LIST_FIRST(&vp->v_dirtyblkhd);
    4673           0 :         }
    4674           0 :         if (waitfor == MNT_WAIT)
    4675           0 :                 drain_output(vp, 1);
    4676           0 :         FREE_LOCK(&lk);
    4677           0 : }
    4678             : 
    4679             : /*
    4680             :  * This routine is called when we are trying to synchronously flush a
    4681             :  * file. This routine must eliminate any filesystem metadata dependencies
    4682             :  * so that the syncing routine can succeed by pushing the dirty blocks
    4683             :  * associated with the file. If any I/O errors occur, they are returned.
    4684             :  */
    4685             : int
    4686           0 : softdep_sync_metadata(struct vop_fsync_args *ap)
    4687             : {
    4688           0 :         struct vnode *vp = ap->a_vp;
    4689             :         struct pagedep *pagedep;
    4690             :         struct allocdirect *adp;
    4691             :         struct allocindir *aip;
    4692             :         struct buf *bp, *nbp;
    4693             :         struct worklist *wk;
    4694             :         int i, gotit, error, waitfor;
    4695             : 
    4696             :         /*
    4697             :          * Check whether this vnode is involved in a filesystem
    4698             :          * that is doing soft dependency processing.
    4699             :          */
    4700           0 :         if (!vn_isdisk(vp, NULL)) {
    4701           0 :                 if (!DOINGSOFTDEP(vp))
    4702           0 :                         return (0);
    4703             :         } else
    4704           0 :                 if (vp->v_specmountpoint == NULL ||
    4705           0 :                     (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP) == 0)
    4706           0 :                         return (0);
    4707             :         /*
    4708             :          * Ensure that any direct block dependencies have been cleared.
    4709             :          */
    4710           0 :         ACQUIRE_LOCK(&lk);
    4711           0 :         if ((error = flush_inodedep_deps(VTOI(vp)->i_fs, VTOI(vp)->i_number))) {
    4712           0 :                 FREE_LOCK(&lk);
    4713           0 :                 return (error);
    4714             :         }
    4715             :         /*
    4716             :          * For most files, the only metadata dependencies are the
    4717             :          * cylinder group maps that allocate their inode or blocks.
    4718             :          * The block allocation dependencies can be found by traversing
    4719             :          * the dependency lists for any buffers that remain on their
    4720             :          * dirty buffer list. The inode allocation dependency will
    4721             :          * be resolved when the inode is updated with MNT_WAIT.
    4722             :          * This work is done in two passes. The first pass grabs most
    4723             :          * of the buffers and begins asynchronously writing them. The
    4724             :          * only way to wait for these asynchronous writes is to sleep
    4725             :          * on the filesystem vnode which may stay busy for a long time
    4726             :          * if the filesystem is active. So, instead, we make a second
    4727             :          * pass over the dependencies blocking on each write. In the
    4728             :          * usual case we will be blocking against a write that we
    4729             :          * initiated, so when it is done the dependency will have been
    4730             :          * resolved. Thus the second pass is expected to end quickly.
    4731             :          */
    4732           0 :         waitfor = MNT_NOWAIT;
    4733             : top:
    4734             :         /*
    4735             :          * We must wait for any I/O in progress to finish so that
    4736             :          * all potential buffers on the dirty list will be visible.
    4737             :          */
    4738           0 :         drain_output(vp, 1);
    4739           0 :         bp = LIST_FIRST(&vp->v_dirtyblkhd);
    4740           0 :         gotit = getdirtybuf(bp, MNT_WAIT);
    4741           0 :         if (gotit == 0) {
    4742           0 :                 FREE_LOCK(&lk);
    4743           0 :                 return (0);
    4744           0 :         } else if (gotit == -1)
    4745           0 :                 goto top;
    4746             : loop:
    4747             :         /*
    4748             :          * As we hold the buffer locked, none of its dependencies
    4749             :          * will disappear.
    4750             :          */
    4751           0 :         LIST_FOREACH(wk, &bp->b_dep, wk_list) {
    4752           0 :                 switch (wk->wk_type) {
    4753             : 
    4754             :                 case D_ALLOCDIRECT:
    4755           0 :                         adp = WK_ALLOCDIRECT(wk);
    4756           0 :                         if (adp->ad_state & DEPCOMPLETE)
    4757             :                                 break;
    4758           0 :                         nbp = adp->ad_buf;
    4759           0 :                         gotit = getdirtybuf(nbp, waitfor);
    4760           0 :                         if (gotit == 0)
    4761             :                                 break;
    4762           0 :                         else if (gotit == -1)
    4763           0 :                                 goto loop;
    4764           0 :                         FREE_LOCK(&lk);
    4765           0 :                         if (waitfor == MNT_NOWAIT) {
    4766           0 :                                 bawrite(nbp);
    4767           0 :                         } else if ((error = VOP_BWRITE(nbp)) != 0) {
    4768           0 :                                 bawrite(bp);
    4769           0 :                                 return (error);
    4770             :                         }
    4771           0 :                         ACQUIRE_LOCK(&lk);
    4772           0 :                         break;
    4773             : 
    4774             :                 case D_ALLOCINDIR:
    4775           0 :                         aip = WK_ALLOCINDIR(wk);
    4776           0 :                         if (aip->ai_state & DEPCOMPLETE)
    4777             :                                 break;
    4778           0 :                         nbp = aip->ai_buf;
    4779           0 :                         gotit = getdirtybuf(nbp, waitfor);
    4780           0 :                         if (gotit == 0)
    4781             :                                 break;
    4782           0 :                         else if (gotit == -1)
    4783           0 :                                 goto loop;
    4784           0 :                         FREE_LOCK(&lk);
    4785           0 :                         if (waitfor == MNT_NOWAIT) {
    4786           0 :                                 bawrite(nbp);
    4787           0 :                         } else if ((error = VOP_BWRITE(nbp)) != 0) {
    4788           0 :                                 bawrite(bp);
    4789           0 :                                 return (error);
    4790             :                         }
    4791           0 :                         ACQUIRE_LOCK(&lk);
    4792           0 :                         break;
    4793             : 
    4794             :                 case D_INDIRDEP:
    4795             :                 restart:
    4796             : 
    4797           0 :                         LIST_FOREACH(aip, &WK_INDIRDEP(wk)->ir_deplisthd, ai_next) {
    4798           0 :                                 if (aip->ai_state & DEPCOMPLETE)
    4799             :                                         continue;
    4800           0 :                                 nbp = aip->ai_buf;
    4801           0 :                                 if (getdirtybuf(nbp, MNT_WAIT) <= 0)
    4802           0 :                                         goto restart;
    4803           0 :                                 FREE_LOCK(&lk);
    4804           0 :                                 if ((error = VOP_BWRITE(nbp)) != 0) {
    4805           0 :                                         bawrite(bp);
    4806           0 :                                         return (error);
    4807             :                                 }
    4808           0 :                                 ACQUIRE_LOCK(&lk);
    4809           0 :                                 goto restart;
    4810             :                         }
    4811             :                         break;
    4812             : 
    4813             :                 case D_INODEDEP:
    4814           0 :                         if ((error = flush_inodedep_deps(WK_INODEDEP(wk)->id_fs,
    4815           0 :                             WK_INODEDEP(wk)->id_ino)) != 0) {
    4816           0 :                                 FREE_LOCK(&lk);
    4817           0 :                                 bawrite(bp);
    4818           0 :                                 return (error);
    4819             :                         }
    4820             :                         break;
    4821             : 
    4822             :                 case D_PAGEDEP:
    4823             :                         /*
    4824             :                          * We are trying to sync a directory that may
    4825             :                          * have dependencies on both its own metadata
    4826             :                          * and/or dependencies on the inodes of any
    4827             :                          * recently allocated files. We walk its diradd
    4828             :                          * lists pushing out the associated inode.
    4829             :                          */
    4830           0 :                         pagedep = WK_PAGEDEP(wk);
    4831           0 :                         for (i = 0; i < DAHASHSZ; i++) {
    4832           0 :                                 if (LIST_FIRST(&pagedep->pd_diraddhd[i]) ==
    4833             :                                     NULL)
    4834             :                                         continue;
    4835           0 :                                 if ((error =
    4836           0 :                                     flush_pagedep_deps(vp, pagedep->pd_mnt,
    4837             :                                                 &pagedep->pd_diraddhd[i]))) {
    4838           0 :                                         FREE_LOCK(&lk);
    4839           0 :                                         bawrite(bp);
    4840           0 :                                         return (error);
    4841             :                                 }
    4842             :                         }
    4843             :                         break;
    4844             : 
    4845             :                 case D_MKDIR:
    4846             :                         /*
    4847             :                          * This case should never happen if the vnode has
    4848             :                          * been properly sync'ed. However, if this function
    4849             :                          * is used at a place where the vnode has not yet
    4850             :                          * been sync'ed, this dependency can show up. So,
    4851             :                          * rather than panic, just flush it.
    4852             :                          */
    4853           0 :                         nbp = WK_MKDIR(wk)->md_buf;
    4854           0 :                         KASSERT(bp != nbp);
    4855           0 :                         gotit = getdirtybuf(nbp, waitfor);
    4856           0 :                         if (gotit == 0)
    4857             :                                 break;
    4858           0 :                         else if (gotit == -1)
    4859           0 :                                 goto loop;
    4860           0 :                         FREE_LOCK(&lk);
    4861           0 :                         if (waitfor == MNT_NOWAIT) {
    4862           0 :                                 bawrite(nbp);
    4863           0 :                         } else if ((error = VOP_BWRITE(nbp)) != 0) {
    4864           0 :                                 bawrite(bp);
    4865           0 :                                 return (error);
    4866             :                         }
    4867           0 :                         ACQUIRE_LOCK(&lk);
    4868           0 :                         break;
    4869             : 
    4870             :                 case D_BMSAFEMAP:
    4871             :                         /*
    4872             :                          * This case should never happen if the vnode has
    4873             :                          * been properly sync'ed. However, if this function
    4874             :                          * is used at a place where the vnode has not yet
    4875             :                          * been sync'ed, this dependency can show up. So,
    4876             :                          * rather than panic, just flush it.
    4877             :                          */
    4878           0 :                         nbp = WK_BMSAFEMAP(wk)->sm_buf;
    4879           0 :                         if (bp == nbp)
    4880             :                                 break;
    4881           0 :                         gotit = getdirtybuf(nbp, waitfor);
    4882           0 :                         if (gotit == 0)
    4883             :                                 break;
    4884           0 :                         else if (gotit == -1)
    4885           0 :                                 goto loop;
    4886           0 :                         FREE_LOCK(&lk);
    4887           0 :                         if (waitfor == MNT_NOWAIT) {
    4888           0 :                                 bawrite(nbp);
    4889           0 :                         } else if ((error = VOP_BWRITE(nbp)) != 0) {
    4890           0 :                                 bawrite(bp);
    4891           0 :                                 return (error);
    4892             :                         }
    4893           0 :                         ACQUIRE_LOCK(&lk);
    4894           0 :                         break;
    4895             : 
    4896             :                 default:
    4897           0 :                         FREE_LOCK(&lk);
    4898           0 :                         panic("softdep_sync_metadata: Unknown type %s",
    4899           0 :                             TYPENAME(wk->wk_type));
    4900             :                         /* NOTREACHED */
    4901             :                 }
    4902             :         }
    4903           0 :         do {
    4904           0 :                 nbp = LIST_NEXT(bp, b_vnbufs);
    4905           0 :                 gotit = getdirtybuf(nbp, MNT_WAIT);
    4906           0 :         } while (gotit == -1);
    4907           0 :         FREE_LOCK(&lk);
    4908           0 :         bawrite(bp);
    4909           0 :         ACQUIRE_LOCK(&lk);
    4910           0 :         if (nbp != NULL) {
    4911             :                 bp = nbp;
    4912           0 :                 goto loop;
    4913             :         }
    4914             :         /*
    4915             :          * The brief unlock is to allow any pent up dependency
    4916             :          * processing to be done. Then proceed with the second pass.
    4917             :          */
    4918           0 :         if (waitfor == MNT_NOWAIT) {
    4919             :                 waitfor = MNT_WAIT;
    4920           0 :                 FREE_LOCK(&lk);
    4921           0 :                 ACQUIRE_LOCK(&lk);
    4922           0 :                 goto top;
    4923             :         }
    4924             : 
    4925             :         /*
    4926             :          * If we have managed to get rid of all the dirty buffers,
    4927             :          * then we are done. For certain directories and block
    4928             :          * devices, we may need to do further work.
    4929             :          *
    4930             :          * We must wait for any I/O in progress to finish so that
    4931             :          * all potential buffers on the dirty list will be visible.
    4932             :          */
    4933           0 :         drain_output(vp, 1);
    4934           0 :         if (LIST_EMPTY(&vp->v_dirtyblkhd)) {
    4935             :                 FREE_LOCK(&lk);
    4936           0 :                 return (0);
    4937             :         }
    4938             : 
    4939             :         FREE_LOCK(&lk);
    4940             :         /*
    4941             :          * If we are trying to sync a block device, some of its buffers may
    4942             :          * contain metadata that cannot be written until the contents of some
    4943             :          * partially written files have been written to disk. The only easy
    4944             :          * way to accomplish this is to sync the entire filesystem (luckily
    4945             :          * this happens rarely).
    4946             :          */
    4947           0 :         if (vn_isdisk(vp, NULL) &&
    4948           0 :             vp->v_specmountpoint && !VOP_ISLOCKED(vp) &&
    4949           0 :             (error = VFS_SYNC(vp->v_specmountpoint, MNT_WAIT, 0, ap->a_cred,
    4950           0 :              ap->a_p)) != 0)
    4951           0 :                 return (error);
    4952           0 :         return (0);
    4953           0 : }
    4954             : 
    4955             : /*
    4956             :  * Flush the dependencies associated with an inodedep.
    4957             :  * Called with splbio blocked.
    4958             :  */
    4959             : STATIC int
    4960           0 : flush_inodedep_deps(struct fs *fs, ufsino_t ino)
    4961             : {
    4962           0 :         struct inodedep *inodedep;
    4963             :         struct allocdirect *adp;
    4964             :         int gotit, error, waitfor;
    4965             :         struct buf *bp;
    4966             : 
    4967           0 :         splassert(IPL_BIO);
    4968             : 
    4969             :         /*
    4970             :          * This work is done in two passes. The first pass grabs most
    4971             :          * of the buffers and begins asynchronously writing them. The
    4972             :          * only way to wait for these asynchronous writes is to sleep
    4973             :          * on the filesystem vnode which may stay busy for a long time
    4974             :          * if the filesystem is active. So, instead, we make a second
    4975             :          * pass over the dependencies blocking on each write. In the
    4976             :          * usual case we will be blocking against a write that we
    4977             :          * initiated, so when it is done the dependency will have been
    4978             :          * resolved. Thus the second pass is expected to end quickly.
    4979             :          * We give a brief window at the top of the loop to allow
    4980             :          * any pending I/O to complete.
    4981             :          */
    4982           0 :         for (waitfor = MNT_NOWAIT; ; ) {
    4983             :         retry_ino:
    4984           0 :                 FREE_LOCK(&lk);
    4985           0 :                 ACQUIRE_LOCK(&lk);
    4986           0 :                 if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
    4987           0 :                         return (0);
    4988           0 :                 TAILQ_FOREACH(adp, &inodedep->id_inoupdt, ad_next) {
    4989           0 :                         if (adp->ad_state & DEPCOMPLETE)
    4990             :                                 continue;
    4991           0 :                         bp = adp->ad_buf;
    4992           0 :                         gotit = getdirtybuf(bp, waitfor);
    4993           0 :                         if (gotit == 0) {
    4994           0 :                                 if (waitfor == MNT_NOWAIT)
    4995             :                                         continue;
    4996             :                                 break;
    4997           0 :                         } else if (gotit == -1)
    4998           0 :                                 goto retry_ino;
    4999           0 :                         FREE_LOCK(&lk);
    5000           0 :                         if (waitfor == MNT_NOWAIT) {
    5001           0 :                                 bawrite(bp);
    5002           0 :                         } else if ((error = VOP_BWRITE(bp)) != 0) {
    5003           0 :                                 ACQUIRE_LOCK(&lk);
    5004           0 :                                 return (error);
    5005             :                         }
    5006           0 :                         ACQUIRE_LOCK(&lk);
    5007           0 :                         break;
    5008             :                 }
    5009           0 :                 if (adp != NULL)
    5010           0 :                         continue;
    5011             :         retry_newino:
    5012           0 :                 TAILQ_FOREACH(adp, &inodedep->id_newinoupdt, ad_next) {
    5013           0 :                         if (adp->ad_state & DEPCOMPLETE)
    5014             :                                 continue;
    5015           0 :                         bp = adp->ad_buf;
    5016           0 :                         gotit = getdirtybuf(bp, waitfor);
    5017           0 :                         if (gotit == 0) {
    5018           0 :                                 if (waitfor == MNT_NOWAIT)
    5019             :                                         continue;
    5020             :                                 break;
    5021           0 :                         } else if (gotit == -1)
    5022           0 :                                 goto retry_newino;
    5023           0 :                         FREE_LOCK(&lk);
    5024           0 :                         if (waitfor == MNT_NOWAIT) {
    5025           0 :                                 bawrite(bp);
    5026           0 :                         } else if ((error = VOP_BWRITE(bp)) != 0) {
    5027           0 :                                 ACQUIRE_LOCK(&lk);
    5028           0 :                                 return (error);
    5029             :                         }
    5030           0 :                         ACQUIRE_LOCK(&lk);
    5031           0 :                         break;
    5032             :                 }
    5033           0 :                 if (adp != NULL)
    5034           0 :                         continue;
    5035             :                 /*
    5036             :                  * If pass2, we are done, otherwise do pass 2.
    5037             :                  */
    5038           0 :                 if (waitfor == MNT_WAIT)
    5039             :                         break;
    5040             :                 waitfor = MNT_WAIT;
    5041             :         }
    5042             :         /*
    5043             :          * Try freeing inodedep in case all dependencies have been removed.
    5044             :          */
    5045           0 :         if (inodedep_lookup(fs, ino, 0, &inodedep) != 0)
    5046           0 :                 (void) free_inodedep(inodedep);
    5047           0 :         return (0);
    5048           0 : }
    5049             : 
    5050             : /*
    5051             :  * Eliminate a pagedep dependency by flushing out all its diradd dependencies.
    5052             :  * Called with splbio blocked.
    5053             :  */
    5054             : STATIC int
    5055           0 : flush_pagedep_deps(struct vnode *pvp, struct mount *mp,
    5056             :     struct diraddhd *diraddhdp)
    5057             : {
    5058           0 :         struct proc *p = CURPROC;       /* XXX */
    5059             :         struct worklist *wk;
    5060           0 :         struct inodedep *inodedep;
    5061             :         struct ufsmount *ump;
    5062             :         struct diradd *dap;
    5063           0 :         struct vnode *vp;
    5064             :         int gotit, error = 0;
    5065           0 :         struct buf *bp;
    5066             :         ufsino_t inum;
    5067             : 
    5068           0 :         splassert(IPL_BIO);
    5069             : 
    5070           0 :         ump = VFSTOUFS(mp);
    5071           0 :         while ((dap = LIST_FIRST(diraddhdp)) != NULL) {
    5072             :                 /*
    5073             :                  * Flush ourselves if this directory entry
    5074             :                  * has a MKDIR_PARENT dependency.
    5075             :                  */
    5076           0 :                 if (dap->da_state & MKDIR_PARENT) {
    5077           0 :                         FREE_LOCK(&lk);
    5078           0 :                         if ((error = UFS_UPDATE(VTOI(pvp), 1)))
    5079             :                                 break;
    5080           0 :                         ACQUIRE_LOCK(&lk);
    5081             :                         /*
    5082             :                          * If that cleared dependencies, go on to next.
    5083             :                          */
    5084           0 :                         if (dap != LIST_FIRST(diraddhdp))
    5085           0 :                                 continue;
    5086           0 :                         if (dap->da_state & MKDIR_PARENT) {
    5087           0 :                                 FREE_LOCK(&lk);
    5088           0 :                                 panic("flush_pagedep_deps: MKDIR_PARENT");
    5089             :                         }
    5090             :                 }
    5091             :                 /*
    5092             :                  * A newly allocated directory must have its "." and
    5093             :                  * ".." entries written out before its name can be
    5094             :                  * committed in its parent. We do not want or need
    5095             :                  * the full semantics of a synchronous VOP_FSYNC as
    5096             :                  * that may end up here again, once for each directory
    5097             :                  * level in the filesystem. Instead, we push the blocks
    5098             :                  * and wait for them to clear. We have to fsync twice
    5099             :                  * because the first call may choose to defer blocks
    5100             :                  * that still have dependencies, but deferral will
    5101             :                  * happen at most once.
    5102             :                  */
    5103           0 :                 inum = dap->da_newinum;
    5104           0 :                 if (dap->da_state & MKDIR_BODY) {
    5105           0 :                         FREE_LOCK(&lk);
    5106           0 :                         if ((error = VFS_VGET(mp, inum, &vp)) != 0)
    5107             :                                 break;
    5108           0 :                         if ((error=VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)) ||
    5109           0 :                             (error=VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p))) {
    5110           0 :                                 vput(vp);
    5111           0 :                                 break;
    5112             :                         }
    5113           0 :                         drain_output(vp, 0);
    5114             :                         /*
    5115             :                          * If first block is still dirty with a D_MKDIR
    5116             :                          * dependency then it needs to be written now.
    5117             :                          */
    5118           0 :                         for (;;) {
    5119             :                                 error = 0;
    5120           0 :                                 ACQUIRE_LOCK(&lk);
    5121           0 :                                 bp = incore(vp, 0);
    5122           0 :                                 if (bp == NULL) {
    5123           0 :                                         FREE_LOCK(&lk);
    5124           0 :                                         break;
    5125             :                                 }
    5126           0 :                                 LIST_FOREACH(wk, &bp->b_dep, wk_list)
    5127           0 :                                         if (wk->wk_type == D_MKDIR)
    5128             :                                                 break;
    5129           0 :                                 if (wk) {
    5130           0 :                                         gotit = getdirtybuf(bp, MNT_WAIT);
    5131           0 :                                         FREE_LOCK(&lk);
    5132           0 :                                         if (gotit == -1)
    5133           0 :                                                 continue;
    5134           0 :                                         if (gotit && (error = bwrite(bp)) != 0)
    5135             :                                                 break;
    5136             :                                 } else
    5137           0 :                                         FREE_LOCK(&lk);
    5138             :                                 break;
    5139             :                         }
    5140           0 :                         vput(vp);
    5141             :                         /* Flushing of first block failed */
    5142           0 :                         if (error)
    5143             :                                 break;
    5144           0 :                         ACQUIRE_LOCK(&lk);
    5145             :                         /*
    5146             :                          * If that cleared dependencies, go on to next.
    5147             :                          */
    5148           0 :                         if (dap != LIST_FIRST(diraddhdp))
    5149           0 :                                 continue;
    5150           0 :                         if (dap->da_state & MKDIR_BODY) {
    5151           0 :                                 FREE_LOCK(&lk);
    5152           0 :                                 panic("flush_pagedep_deps: MKDIR_BODY");
    5153             :                         }
    5154             :                 }
    5155             :                 /*
    5156             :                  * Flush the inode on which the directory entry depends.
    5157             :                  * Having accounted for MKDIR_PARENT and MKDIR_BODY above,
    5158             :                  * the only remaining dependency is that the updated inode
    5159             :                  * count must get pushed to disk. The inode has already
    5160             :                  * been pushed into its inode buffer (via VOP_UPDATE) at
    5161             :                  * the time of the reference count change. So we need only
    5162             :                  * locate that buffer, ensure that there will be no rollback
    5163             :                  * caused by a bitmap dependency, then write the inode buffer.
    5164             :                  */
    5165           0 :                 if (inodedep_lookup(ump->um_fs, inum, 0, &inodedep) == 0) {
    5166           0 :                         FREE_LOCK(&lk);
    5167           0 :                         panic("flush_pagedep_deps: lost inode");
    5168             :                 }
    5169             :                 /*
    5170             :                  * If the inode still has bitmap dependencies,
    5171             :                  * push them to disk.
    5172             :                  */
    5173             :         retry:
    5174           0 :                 if ((inodedep->id_state & DEPCOMPLETE) == 0) {
    5175           0 :                         bp = inodedep->id_buf;
    5176           0 :                         gotit = getdirtybuf(bp, MNT_WAIT);
    5177           0 :                         if (gotit == -1)
    5178           0 :                                 goto retry;
    5179           0 :                         FREE_LOCK(&lk);
    5180           0 :                         if (gotit && (error = bwrite(bp)) != 0)
    5181             :                                 break;
    5182           0 :                         ACQUIRE_LOCK(&lk);
    5183           0 :                         if (dap != LIST_FIRST(diraddhdp))
    5184           0 :                                 continue;
    5185             :                 }
    5186             :                 /*
    5187             :                  * If the inode is still sitting in a buffer waiting
    5188             :                  * to be written, push it to disk.
    5189             :                  */
    5190           0 :                 FREE_LOCK(&lk);
    5191           0 :                 if ((error = bread(ump->um_devvp,
    5192           0 :                     fsbtodb(ump->um_fs, ino_to_fsba(ump->um_fs, inum)),
    5193           0 :                     (int)ump->um_fs->fs_bsize, &bp)) != 0) {
    5194           0 :                         brelse(bp);
    5195           0 :                         break;
    5196             :                 }
    5197           0 :                 if ((error = bwrite(bp)) != 0)
    5198             :                         break;
    5199           0 :                 ACQUIRE_LOCK(&lk);
    5200             :                 /*
    5201             :                  * If we have failed to get rid of all the dependencies
    5202             :                  * then something is seriously wrong.
    5203             :                  */
    5204           0 :                 if (dap == LIST_FIRST(diraddhdp)) {
    5205           0 :                         FREE_LOCK(&lk);
    5206           0 :                         panic("flush_pagedep_deps: flush failed");
    5207             :                 }
    5208             :         }
    5209           0 :         if (error)
    5210           0 :                 ACQUIRE_LOCK(&lk);
    5211           0 :         return (error);
    5212           0 : }
    5213             : 
    5214             : /*
    5215             :  * A large burst of file addition or deletion activity can drive the
    5216             :  * memory load excessively high. First attempt to slow things down
    5217             :  * using the techniques below. If that fails, this routine requests
    5218             :  * the offending operations to fall back to running synchronously
    5219             :  * until the memory load returns to a reasonable level.
    5220             :  */
    5221             : int
    5222           0 : softdep_slowdown(struct vnode *vp)
    5223             : {
    5224             :         int max_softdeps_hard;
    5225             : 
    5226           0 :         max_softdeps_hard = max_softdeps * 11 / 10;
    5227           0 :         if (num_dirrem < max_softdeps_hard / 2 &&
    5228           0 :             num_inodedep < max_softdeps_hard)
    5229           0 :                 return (0);
    5230           0 :         stat_sync_limit_hit += 1;
    5231           0 :         return (1);
    5232           0 : }
    5233             : 
    5234             : /*
    5235             :  * If memory utilization has gotten too high, deliberately slow things
    5236             :  * down and speed up the I/O processing.
    5237             :  */
    5238             : STATIC int
    5239           0 : request_cleanup(int resource, int islocked)
    5240             : {
    5241           0 :         struct proc *p = CURPROC;
    5242             :         int s;
    5243             : 
    5244             :         /*
    5245             :          * We never hold up the filesystem syncer process.
    5246             :          */
    5247           0 :         if (p == filesys_syncer || (p->p_flag & P_SOFTDEP))
    5248           0 :                 return (0);
    5249             :         /*
    5250             :          * First check to see if the work list has gotten backlogged.
    5251             :          * If it has, co-opt this process to help clean up two entries.
    5252             :          * Because this process may hold inodes locked, we cannot
    5253             :          * handle any remove requests that might block on a locked
    5254             :          * inode as that could lead to deadlock. We set P_SOFTDEP
    5255             :          * to avoid recursively processing the worklist.
    5256             :          */
    5257           0 :         if (num_on_worklist > max_softdeps / 10) {
    5258           0 :                 atomic_setbits_int(&p->p_flag, P_SOFTDEP);
    5259           0 :                 if (islocked)
    5260           0 :                         FREE_LOCK(&lk);
    5261           0 :                 process_worklist_item(NULL, LK_NOWAIT);
    5262           0 :                 process_worklist_item(NULL, LK_NOWAIT);
    5263           0 :                 atomic_clearbits_int(&p->p_flag, P_SOFTDEP);
    5264           0 :                 stat_worklist_push += 2;
    5265           0 :                 if (islocked)
    5266           0 :                         ACQUIRE_LOCK(&lk);
    5267           0 :                 return(1);
    5268             :         }
    5269             :         /*
    5270             :          * Next, we attempt to speed up the syncer process. If that
    5271             :          * is successful, then we allow the process to continue.
    5272             :          */
    5273           0 :         if (speedup_syncer())
    5274           0 :                 return(0);
    5275             :         /*
    5276             :          * If we are resource constrained on inode dependencies, try
    5277             :          * flushing some dirty inodes. Otherwise, we are constrained
    5278             :          * by file deletions, so try accelerating flushes of directories
    5279             :          * with removal dependencies. We would like to do the cleanup
    5280             :          * here, but we probably hold an inode locked at this point and 
    5281             :          * that might deadlock against one that we try to clean. So,
    5282             :          * the best that we can do is request the syncer daemon to do
    5283             :          * the cleanup for us.
    5284             :          */
    5285           0 :         switch (resource) {
    5286             : 
    5287             :         case FLUSH_INODES:
    5288           0 :                 stat_ino_limit_push += 1;
    5289           0 :                 req_clear_inodedeps += 1;
    5290           0 :                 stat_countp = &stat_ino_limit_hit;
    5291           0 :                 break;
    5292             : 
    5293             :         case FLUSH_REMOVE:
    5294           0 :                 stat_blk_limit_push += 1;
    5295           0 :                 req_clear_remove += 1;
    5296           0 :                 stat_countp = &stat_blk_limit_hit;
    5297           0 :                 break;
    5298             : 
    5299             :         default:
    5300           0 :                 if (islocked)
    5301           0 :                         FREE_LOCK(&lk);
    5302           0 :                 panic("request_cleanup: unknown type");
    5303             :         }
    5304             :         /*
    5305             :          * Hopefully the syncer daemon will catch up and awaken us.
    5306             :          * We wait at most tickdelay before proceeding in any case.
    5307             :          */
    5308           0 :         if (islocked == 0)
    5309           0 :                 ACQUIRE_LOCK(&lk);
    5310           0 :         proc_waiting += 1;
    5311           0 :         if (!timeout_pending(&proc_waiting_timeout))
    5312           0 :                 timeout_add(&proc_waiting_timeout, tickdelay > 2 ? tickdelay : 2);
    5313             : 
    5314           0 :         s = FREE_LOCK_INTERLOCKED(&lk);
    5315           0 :         (void) tsleep((caddr_t)&proc_waiting, PPAUSE, "softupdate", 0);
    5316           0 :         ACQUIRE_LOCK_INTERLOCKED(&lk, s);
    5317           0 :         proc_waiting -= 1;
    5318           0 :         if (islocked == 0)
    5319           0 :                 FREE_LOCK(&lk);
    5320           0 :         return (1);
    5321           0 : }
    5322             : 
    5323             : /*
    5324             :  * Awaken processes pausing in request_cleanup and clear proc_waiting
    5325             :  * to indicate that there is no longer a timer running.
    5326             :  */
    5327             : void
    5328           0 : pause_timer(void *arg)
    5329             : {
    5330             : 
    5331           0 :         *stat_countp += 1;
    5332           0 :         wakeup_one(&proc_waiting);
    5333           0 :         if (proc_waiting > 0)
    5334           0 :                 timeout_add(&proc_waiting_timeout, tickdelay > 2 ? tickdelay : 2);
    5335           0 : }
    5336             : 
    5337             : /*
    5338             :  * Flush out a directory with at least one removal dependency in an effort to
    5339             :  * reduce the number of dirrem, freefile, and freeblks dependency structures.
    5340             :  */
    5341             : STATIC void
    5342           0 : clear_remove(struct proc *p)
    5343             : {
    5344             :         struct pagedep_hashhead *pagedephd;
    5345             :         struct pagedep *pagedep;
    5346             :         static int next = 0;
    5347             :         struct mount *mp;
    5348           0 :         struct vnode *vp;
    5349             :         int error, cnt;
    5350             :         ufsino_t ino;
    5351             : 
    5352           0 :         ACQUIRE_LOCK(&lk);
    5353           0 :         for (cnt = 0; cnt <= pagedep_hash; cnt++) {
    5354           0 :                 pagedephd = &pagedep_hashtbl[next++];
    5355           0 :                 if (next > pagedep_hash)
    5356           0 :                         next = 0;
    5357           0 :                 LIST_FOREACH(pagedep, pagedephd, pd_hash) {
    5358           0 :                         if (LIST_FIRST(&pagedep->pd_dirremhd) == NULL)
    5359             :                                 continue;
    5360           0 :                         mp = pagedep->pd_mnt;
    5361           0 :                         ino = pagedep->pd_ino;
    5362             : #if 0
    5363             :                         if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
    5364             :                                 continue;
    5365             : #endif
    5366           0 :                         FREE_LOCK(&lk);
    5367           0 :                         if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
    5368           0 :                                 softdep_error("clear_remove: vget", error);
    5369             : #if 0
    5370             :                                 vn_finished_write(mp);
    5371             : #endif
    5372           0 :                                 return;
    5373             :                         }
    5374           0 :                         if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)))
    5375           0 :                                 softdep_error("clear_remove: fsync", error);
    5376           0 :                         drain_output(vp, 0);
    5377           0 :                         vput(vp);
    5378             : #if 0
    5379             :                         vn_finished_write(mp);
    5380             : #endif
    5381           0 :                         return;
    5382             :                 }
    5383             :         }
    5384           0 :         FREE_LOCK(&lk);
    5385           0 : }
    5386             : 
    5387             : /*
    5388             :  * Clear out a block of dirty inodes in an effort to reduce
    5389             :  * the number of inodedep dependency structures.
    5390             :  */
    5391             : STATIC void
    5392           0 : clear_inodedeps(struct proc *p)
    5393             : {
    5394             :         struct inodedep_hashhead *inodedephd;
    5395           0 :         struct inodedep *inodedep = NULL;
    5396             :         static int next = 0;
    5397             :         struct mount *mp;
    5398           0 :         struct vnode *vp;
    5399             :         struct fs *fs;
    5400             :         int error, cnt;
    5401             :         ufsino_t firstino, lastino, ino;
    5402             : 
    5403           0 :         ACQUIRE_LOCK(&lk);
    5404             :         /*
    5405             :          * Pick a random inode dependency to be cleared.
    5406             :          * We will then gather up all the inodes in its block 
    5407             :          * that have dependencies and flush them out.
    5408             :          */
    5409           0 :         for (cnt = 0; cnt <= inodedep_hash; cnt++) {
    5410           0 :                 inodedephd = &inodedep_hashtbl[next++];
    5411           0 :                 if (next > inodedep_hash)
    5412           0 :                         next = 0;
    5413           0 :                 if ((inodedep = LIST_FIRST(inodedephd)) != NULL)
    5414             :                         break;
    5415             :         }
    5416           0 :         if (inodedep == NULL) {
    5417           0 :                 FREE_LOCK(&lk);
    5418           0 :                 return;
    5419             :         }
    5420             :         /*
    5421             :          * Ugly code to find mount point given pointer to superblock.
    5422             :          */
    5423           0 :         fs = inodedep->id_fs;
    5424           0 :         TAILQ_FOREACH(mp, &mountlist, mnt_list)
    5425           0 :                 if ((mp->mnt_flag & MNT_SOFTDEP) && fs == VFSTOUFS(mp)->um_fs)
    5426             :                         break;
    5427             :         /*
    5428             :          * Find the last inode in the block with dependencies.
    5429             :          */
    5430           0 :         firstino = inodedep->id_ino & ~(INOPB(fs) - 1);
    5431           0 :         for (lastino = firstino + INOPB(fs) - 1; lastino > firstino; lastino--)
    5432           0 :                 if (inodedep_lookup(fs, lastino, 0, &inodedep) != 0)
    5433             :                         break;
    5434             :         /*
    5435             :          * Asynchronously push all but the last inode with dependencies.
    5436             :          * Synchronously push the last inode with dependencies to ensure
    5437             :          * that the inode block gets written to free up the inodedeps.
    5438             :          */
    5439           0 :         for (ino = firstino; ino <= lastino; ino++) {
    5440           0 :                 if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
    5441             :                         continue;
    5442           0 :                 FREE_LOCK(&lk);
    5443             : #if 0
    5444             :                 if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
    5445             :                         continue;
    5446             : #endif
    5447           0 :                 if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
    5448           0 :                         softdep_error("clear_inodedeps: vget", error);
    5449             : #if 0
    5450             :                         vn_finished_write(mp);
    5451             : #endif
    5452           0 :                         return;
    5453             :                 }
    5454           0 :                 if (ino == lastino) {
    5455           0 :                         if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p)))
    5456           0 :                                 softdep_error("clear_inodedeps: fsync1", error);
    5457             :                 } else {
    5458           0 :                         if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)))
    5459           0 :                                 softdep_error("clear_inodedeps: fsync2", error);
    5460           0 :                         drain_output(vp, 0);
    5461             :                 }
    5462           0 :                 vput(vp);
    5463             : #if 0
    5464             :                 vn_finished_write(mp);
    5465             : #endif
    5466           0 :                 ACQUIRE_LOCK(&lk);
    5467           0 :         }
    5468           0 :         FREE_LOCK(&lk);
    5469           0 : }
    5470             : 
    5471             : /*
    5472             :  * Function to determine if the buffer has outstanding dependencies
    5473             :  * that will cause a roll-back if the buffer is written. If wantcount
    5474             :  * is set, return number of dependencies, otherwise just yes or no.
    5475             :  */
    5476             : int
    5477           0 : softdep_count_dependencies(struct buf *bp, int wantcount, int islocked)
    5478             : {
    5479             :         struct worklist *wk;
    5480             :         struct inodedep *inodedep;
    5481             :         struct indirdep *indirdep;
    5482             :         struct allocindir *aip;
    5483             :         struct pagedep *pagedep;
    5484             :         struct diradd *dap;
    5485             :         int i, retval;
    5486             : 
    5487             :         retval = 0;
    5488           0 :         if (!islocked)
    5489           0 :                 ACQUIRE_LOCK(&lk);
    5490           0 :         LIST_FOREACH(wk, &bp->b_dep, wk_list) {
    5491           0 :                 switch (wk->wk_type) {
    5492             : 
    5493             :                 case D_INODEDEP:
    5494           0 :                         inodedep = WK_INODEDEP(wk);
    5495           0 :                         if ((inodedep->id_state & DEPCOMPLETE) == 0) {
    5496             :                                 /* bitmap allocation dependency */
    5497           0 :                                 retval += 1;
    5498           0 :                                 if (!wantcount)
    5499             :                                         goto out;
    5500             :                         }
    5501           0 :                         if (TAILQ_FIRST(&inodedep->id_inoupdt)) {
    5502             :                                 /* direct block pointer dependency */
    5503           0 :                                 retval += 1;
    5504           0 :                                 if (!wantcount)
    5505             :                                         goto out;
    5506             :                         }
    5507             :                         continue;
    5508             : 
    5509             :                 case D_INDIRDEP:
    5510           0 :                         indirdep = WK_INDIRDEP(wk);
    5511             : 
    5512           0 :                         LIST_FOREACH(aip, &indirdep->ir_deplisthd, ai_next) {
    5513             :                                 /* indirect block pointer dependency */
    5514           0 :                                 retval += 1;
    5515           0 :                                 if (!wantcount)
    5516             :                                         goto out;
    5517             :                         }
    5518             :                         continue;
    5519             : 
    5520             :                 case D_PAGEDEP:
    5521           0 :                         pagedep = WK_PAGEDEP(wk);
    5522           0 :                         for (i = 0; i < DAHASHSZ; i++) {
    5523             : 
    5524           0 :                                 LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {
    5525             :                                         /* directory entry dependency */
    5526           0 :                                         retval += 1;
    5527           0 :                                         if (!wantcount)
    5528             :                                                 goto out;
    5529             :                                 }
    5530             :                         }
    5531             :                         continue;
    5532             : 
    5533             :                 case D_BMSAFEMAP:
    5534             :                 case D_ALLOCDIRECT:
    5535             :                 case D_ALLOCINDIR:
    5536             :                 case D_MKDIR:
    5537             :                         /* never a dependency on these blocks */
    5538             :                         continue;
    5539             : 
    5540             :                 default:
    5541           0 :                         if (!islocked)
    5542           0 :                                 FREE_LOCK(&lk);
    5543           0 :                         panic("softdep_check_for_rollback: Unexpected type %s",
    5544           0 :                             TYPENAME(wk->wk_type));
    5545             :                         /* NOTREACHED */
    5546             :                 }
    5547             :         }
    5548             : out:
    5549           0 :         if (!islocked)
    5550           0 :                 FREE_LOCK(&lk);
    5551           0 :         return retval;
    5552             : }
    5553             : 
    5554             : /*
    5555             :  * Acquire exclusive access to a buffer.
    5556             :  * Must be called with splbio blocked.
    5557             :  * Returns:
    5558             :  * 1 if the buffer was acquired and is dirty;
    5559             :  * 0 if the buffer was clean, or we would have slept but had MN_NOWAIT;
    5560             :  * -1 if we slept and may try again (but not with this bp).
    5561             :  */
    5562             : STATIC int
    5563           0 : getdirtybuf(struct buf *bp, int waitfor)
    5564             : {
    5565             :         int s;
    5566             : 
    5567           0 :         if (bp == NULL)
    5568           0 :                 return (0);
    5569             : 
    5570           0 :         splassert(IPL_BIO);
    5571             : 
    5572           0 :         if (bp->b_flags & B_BUSY) {
    5573           0 :                 if (waitfor != MNT_WAIT)
    5574           0 :                         return (0);
    5575           0 :                 bp->b_flags |= B_WANTED;
    5576           0 :                 s = FREE_LOCK_INTERLOCKED(&lk);
    5577           0 :                 tsleep((caddr_t)bp, PRIBIO + 1, "sdsdty", 0);
    5578           0 :                 ACQUIRE_LOCK_INTERLOCKED(&lk, s);
    5579           0 :                 return (-1);
    5580             :         }
    5581           0 :         if ((bp->b_flags & B_DELWRI) == 0)
    5582           0 :                 return (0);
    5583           0 :         bremfree(bp);
    5584           0 :         buf_acquire(bp);
    5585           0 :         return (1);
    5586           0 : }
    5587             : 
    5588             : /*
    5589             :  * Wait for pending output on a vnode to complete.
    5590             :  * Must be called with vnode locked.
    5591             :  */
    5592             : STATIC void
    5593           0 : drain_output(struct vnode *vp, int islocked)
    5594             : {
    5595             :         int s;
    5596             : 
    5597           0 :         if (!islocked)
    5598           0 :                 ACQUIRE_LOCK(&lk);
    5599             : 
    5600           0 :         splassert(IPL_BIO);
    5601             : 
    5602           0 :         while (vp->v_numoutput) {
    5603           0 :                 vp->v_bioflag |= VBIOWAIT;
    5604           0 :                 s = FREE_LOCK_INTERLOCKED(&lk);
    5605           0 :                 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "drain_output", 0);
    5606           0 :                 ACQUIRE_LOCK_INTERLOCKED(&lk, s);
    5607             :         }
    5608           0 :         if (!islocked)
    5609           0 :                 FREE_LOCK(&lk);
    5610           0 : }
    5611             : 
    5612             : /*
    5613             :  * Called whenever a buffer that is being invalidated or reallocated
    5614             :  * contains dependencies. This should only happen if an I/O error has
    5615             :  * occurred. The routine is called with the buffer locked.
    5616             :  */ 
    5617             : void
    5618           0 : softdep_deallocate_dependencies(struct buf *bp)
    5619             : {
    5620             : 
    5621           0 :         if ((bp->b_flags & B_ERROR) == 0)
    5622           0 :                 panic("softdep_deallocate_dependencies: dangling deps");
    5623           0 :         softdep_error(bp->b_vp->v_mount->mnt_stat.f_mntonname, bp->b_error);
    5624           0 :         panic("softdep_deallocate_dependencies: unrecovered I/O error");
    5625             : }
    5626             : 
    5627             : /*
    5628             :  * Function to handle asynchronous write errors in the filesystem.
    5629             :  */
    5630             : void
    5631           0 : softdep_error(char *func, int error)
    5632             : {
    5633             : 
    5634             :         /* XXX should do something better! */
    5635           0 :         printf("%s: got error %d while accessing filesystem\n", func, error);
    5636           0 : }
    5637             : 
    5638             : #ifdef DDB
    5639             : #include <machine/db_machdep.h>
    5640             : #include <ddb/db_interface.h>
    5641             : #include <ddb/db_output.h>
    5642             : 
    5643             : void
    5644           0 : softdep_print(struct buf *bp, int full,
    5645             :     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
    5646             : {
    5647             :         struct worklist *wk;
    5648             : 
    5649           0 :         (*pr)("  deps:\n");
    5650           0 :         LIST_FOREACH(wk, &bp->b_dep, wk_list)
    5651           0 :                 worklist_print(wk, full, pr);
    5652           0 : }
    5653             : 
    5654             : void
    5655           0 : worklist_print(struct worklist *wk, int full,
    5656             :     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
    5657             : {
    5658             :         struct pagedep *pagedep;
    5659             :         struct inodedep *inodedep;
    5660             :         struct newblk *newblk;
    5661             :         struct bmsafemap *bmsafemap;
    5662             :         struct allocdirect *adp;
    5663             :         struct indirdep *indirdep;
    5664             :         struct allocindir *aip;
    5665             :         struct freefrag *freefrag;
    5666             :         struct freeblks *freeblks;
    5667             :         struct freefile *freefile;
    5668             :         struct diradd *dap;
    5669             :         struct mkdir *mkdir;
    5670             :         struct dirrem *dirrem;
    5671             :         struct newdirblk *newdirblk;
    5672           0 :         char prefix[33];
    5673             :         int i;
    5674             : 
    5675           0 :         for (prefix[i = 2 * MIN(16, full)] = '\0'; i--; prefix[i] = ' ')
    5676             :                 ;
    5677             : 
    5678           0 :         (*pr)("%s%s(%p) state %b\n%s", prefix, TYPENAME(wk->wk_type), wk,
    5679           0 :             wk->wk_state, DEP_BITS, prefix);
    5680           0 :         switch (wk->wk_type) {
    5681             :         case D_PAGEDEP:
    5682           0 :                 pagedep = WK_PAGEDEP(wk);
    5683           0 :                 (*pr)("mount %p ino %u lbn %lld\n", pagedep->pd_mnt,
    5684           0 :                     pagedep->pd_ino, (long long)pagedep->pd_lbn);
    5685           0 :                 break;
    5686             :         case D_INODEDEP:
    5687           0 :                 inodedep = WK_INODEDEP(wk);
    5688           0 :                 (*pr)("fs %p ino %u nlinkdelta %u dino %p\n"
    5689           0 :                     "%s  bp %p savsz %lld\n", inodedep->id_fs,
    5690           0 :                     inodedep->id_ino, inodedep->id_nlinkdelta,
    5691           0 :                     inodedep->id_un.idu_savedino1,
    5692           0 :                     prefix, inodedep->id_buf, inodedep->id_savedsize);
    5693           0 :                 break;
    5694             :         case D_NEWBLK:
    5695           0 :                 newblk = WK_NEWBLK(wk);
    5696           0 :                 (*pr)("fs %p newblk %lld state %d bmsafemap %p\n",
    5697           0 :                     newblk->nb_fs, (long long)newblk->nb_newblkno,
    5698           0 :                     newblk->nb_state, newblk->nb_bmsafemap);
    5699           0 :                 break;
    5700             :         case D_BMSAFEMAP:
    5701           0 :                 bmsafemap = WK_BMSAFEMAP(wk);
    5702           0 :                 (*pr)("buf %p\n", bmsafemap->sm_buf);
    5703           0 :                 break;
    5704             :         case D_ALLOCDIRECT:
    5705           0 :                 adp = WK_ALLOCDIRECT(wk);
    5706           0 :                 (*pr)("lbn %lld newlbk %lld oldblk %lld newsize %ld olsize "
    5707             :                     "%ld\n%s  bp %p inodedep %p freefrag %p\n",
    5708           0 :                     (long long)adp->ad_lbn, (long long)adp->ad_newblkno,
    5709           0 :                     (long long)adp->ad_oldblkno, adp->ad_newsize,
    5710           0 :                     adp->ad_oldsize,
    5711           0 :                     prefix, adp->ad_buf, adp->ad_inodedep, adp->ad_freefrag);
    5712           0 :                 break;
    5713             :         case D_INDIRDEP:
    5714           0 :                 indirdep = WK_INDIRDEP(wk);
    5715           0 :                 (*pr)("savedata %p savebp %p\n", indirdep->ir_saveddata,
    5716           0 :                     indirdep->ir_savebp);
    5717           0 :                 break;
    5718             :         case D_ALLOCINDIR:
    5719           0 :                 aip = WK_ALLOCINDIR(wk);
    5720           0 :                 (*pr)("off %d newblk %lld oldblk %lld freefrag %p\n"
    5721           0 :                     "%s  indirdep %p buf %p\n", aip->ai_offset,
    5722           0 :                     (long long)aip->ai_newblkno, (long long)aip->ai_oldblkno,
    5723           0 :                     aip->ai_freefrag, prefix, aip->ai_indirdep, aip->ai_buf);
    5724           0 :                 break;
    5725             :         case D_FREEFRAG:
    5726           0 :                 freefrag = WK_FREEFRAG(wk);
    5727           0 :                 (*pr)("vnode %p mp %p blkno %lld fsize %ld ino %u\n",
    5728           0 :                     freefrag->ff_devvp, freefrag->ff_mnt,
    5729           0 :                     (long long)freefrag->ff_blkno, freefrag->ff_fragsize,
    5730           0 :                     freefrag->ff_inum);
    5731           0 :                 break;
    5732             :         case D_FREEBLKS:
    5733           0 :                 freeblks = WK_FREEBLKS(wk);
    5734           0 :                 (*pr)("previno %u devvp %p mp %p oldsz %lld newsz %lld\n"
    5735           0 :                     "%s  chkcnt %d uid %d\n", freeblks->fb_previousinum,
    5736           0 :                     freeblks->fb_devvp, freeblks->fb_mnt, freeblks->fb_oldsize,
    5737           0 :                     freeblks->fb_newsize,
    5738           0 :                     prefix, freeblks->fb_chkcnt, freeblks->fb_uid);
    5739           0 :                 break;
    5740             :         case D_FREEFILE:
    5741           0 :                 freefile = WK_FREEFILE(wk);
    5742           0 :                 (*pr)("mode %x oldino %u vnode %p mp %p\n", freefile->fx_mode,
    5743           0 :                     freefile->fx_oldinum, freefile->fx_devvp, freefile->fx_mnt);
    5744           0 :                 break;
    5745             :         case D_DIRADD:
    5746           0 :                 dap = WK_DIRADD(wk);
    5747           0 :                 (*pr)("off %d ino %u da_un %p\n", dap->da_offset, 
    5748           0 :                     dap->da_newinum, dap->da_un.dau_previous);
    5749           0 :                 break;
    5750             :         case D_MKDIR:
    5751           0 :                 mkdir = WK_MKDIR(wk);
    5752           0 :                 (*pr)("diradd %p bp %p\n", mkdir->md_diradd, mkdir->md_buf);
    5753           0 :                 break;
    5754             :         case D_DIRREM:
    5755           0 :                 dirrem = WK_DIRREM(wk);
    5756           0 :                 (*pr)("mp %p ino %u dm_un %p\n", dirrem->dm_mnt, 
    5757           0 :                     dirrem->dm_oldinum, dirrem->dm_un.dmu_pagedep);
    5758           0 :                 break;
    5759             :         case D_NEWDIRBLK:
    5760           0 :                 newdirblk = WK_NEWDIRBLK(wk);
    5761           0 :                 (*pr)("pagedep %p\n", newdirblk->db_pagedep);
    5762           0 :                 break;
    5763             :         }
    5764           0 : }
    5765             : #endif

Generated by: LCOV version 1.13