LCOV - code coverage report
Current view: top level - kern - vfs_bio.c (source / functions) Hit Total Coverage
Test: 6.4 Lines: 0 697 0.0 %
Date: 2018-10-19 03:25:38 Functions: 0 38 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*      $OpenBSD: vfs_bio.c,v 1.186 2018/08/13 15:26:17 visa Exp $      */
       2             : /*      $NetBSD: vfs_bio.c,v 1.44 1996/06/11 11:15:36 pk Exp $  */
       3             : 
       4             : /*
       5             :  * Copyright (c) 1994 Christopher G. Demetriou
       6             :  * Copyright (c) 1982, 1986, 1989, 1993
       7             :  *      The Regents of the University of California.  All rights reserved.
       8             :  * (c) UNIX System Laboratories, Inc.
       9             :  * All or some portions of this file are derived from material licensed
      10             :  * to the University of California by American Telephone and Telegraph
      11             :  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
      12             :  * the permission of UNIX System Laboratories, Inc.
      13             :  *
      14             :  * Redistribution and use in source and binary forms, with or without
      15             :  * modification, are permitted provided that the following conditions
      16             :  * are met:
      17             :  * 1. Redistributions of source code must retain the above copyright
      18             :  *    notice, this list of conditions and the following disclaimer.
      19             :  * 2. Redistributions in binary form must reproduce the above copyright
      20             :  *    notice, this list of conditions and the following disclaimer in the
      21             :  *    documentation and/or other materials provided with the distribution.
      22             :  * 3. Neither the name of the University nor the names of its contributors
      23             :  *    may be used to endorse or promote products derived from this software
      24             :  *    without specific prior written permission.
      25             :  *
      26             :  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
      27             :  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
      28             :  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
      29             :  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
      30             :  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
      31             :  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
      32             :  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
      33             :  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
      34             :  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
      35             :  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
      36             :  * SUCH DAMAGE.
      37             :  *
      38             :  *      @(#)vfs_bio.c   8.6 (Berkeley) 1/11/94
      39             :  */
      40             : 
      41             : /*
      42             :  * Some references:
      43             :  *      Bach: The Design of the UNIX Operating System (Prentice Hall, 1986)
      44             :  *      Leffler, et al.: The Design and Implementation of the 4.3BSD
      45             :  *              UNIX Operating System (Addison Welley, 1989)
      46             :  */
      47             : 
      48             : #include <sys/param.h>
      49             : #include <sys/systm.h>
      50             : #include <sys/proc.h>
      51             : #include <sys/buf.h>
      52             : #include <sys/vnode.h>
      53             : #include <sys/mount.h>
      54             : #include <sys/malloc.h>
      55             : #include <sys/pool.h>
      56             : #include <sys/resourcevar.h>
      57             : #include <sys/conf.h>
      58             : #include <sys/kernel.h>
      59             : #include <sys/specdev.h>
      60             : #include <uvm/uvm_extern.h>
      61             : 
      62             : /* XXX Should really be in buf.h, but for uvm_constraint_range.. */
      63             : int     buf_realloc_pages(struct buf *, struct uvm_constraint_range *, int);
      64             : 
      65             : struct uvm_constraint_range high_constraint;
      66             : int fliphigh;
      67             : 
      68             : int nobuffers;
      69             : int needbuffer;
      70             : struct bio_ops bioops;
      71             : 
      72             : /* private bufcache functions */
      73             : void bufcache_init(void);
      74             : void bufcache_adjust(void);
      75             : struct buf *bufcache_gethighcleanbuf(void);
      76             : struct buf *bufcache_getdmacleanbuf(void);
      77             : 
      78             : /*
      79             :  * Buffer pool for I/O buffers.
      80             :  */
      81             : struct pool bufpool;
      82             : struct bufhead bufhead = LIST_HEAD_INITIALIZER(bufhead);
      83             : void buf_put(struct buf *);
      84             : 
      85             : struct buf *bio_doread(struct vnode *, daddr_t, int, int);
      86             : struct buf *buf_get(struct vnode *, daddr_t, size_t);
      87             : void bread_cluster_callback(struct buf *);
      88             : 
      89             : struct bcachestats bcstats;  /* counters */
      90             : long lodirtypages;      /* dirty page count low water mark */
      91             : long hidirtypages;      /* dirty page count high water mark */
      92             : long targetpages;       /* target number of pages for cache size */
      93             : long buflowpages;       /* smallest size cache allowed */
      94             : long bufhighpages;      /* largest size cache allowed */
      95             : long bufbackpages;      /* minimum number of pages we shrink when asked to */
      96             : 
      97             : vsize_t bufkvm;
      98             : 
      99             : struct proc *cleanerproc;
     100             : int bd_req;                     /* Sleep point for cleaner daemon. */
     101             : 
     102             : #define NUM_CACHES 2
     103             : #define DMA_CACHE 0
     104             : struct bufcache cleancache[NUM_CACHES];
     105             : struct bufqueue dirtyqueue;
     106             : 
     107             : void
     108           0 : buf_put(struct buf *bp)
     109             : {
     110           0 :         splassert(IPL_BIO);
     111             : 
     112             : #ifdef DIAGNOSTIC
     113           0 :         if (bp->b_pobj != NULL)
     114           0 :                 KASSERT(bp->b_bufsize > 0);
     115           0 :         if (ISSET(bp->b_flags, B_DELWRI))
     116           0 :                 panic("buf_put: releasing dirty buffer");
     117           0 :         if (bp->b_freelist.tqe_next != NOLIST &&
     118           0 :             bp->b_freelist.tqe_next != (void *)-1)
     119           0 :                 panic("buf_put: still on the free list");
     120           0 :         if (bp->b_vnbufs.le_next != NOLIST &&
     121           0 :             bp->b_vnbufs.le_next != (void *)-1)
     122           0 :                 panic("buf_put: still on the vnode list");
     123           0 :         if (!LIST_EMPTY(&bp->b_dep))
     124           0 :                 panic("buf_put: b_dep is not empty");
     125             : #endif
     126             : 
     127           0 :         LIST_REMOVE(bp, b_list);
     128           0 :         bcstats.numbufs--;
     129             : 
     130           0 :         if (buf_dealloc_mem(bp) != 0)
     131             :                 return;
     132           0 :         pool_put(&bufpool, bp);
     133           0 : }
     134             : 
     135             : /*
     136             :  * Initialize buffers and hash links for buffers.
     137             :  */
     138             : void
     139           0 : bufinit(void)
     140             : {
     141             :         u_int64_t dmapages;
     142             :         u_int64_t highpages;
     143             : 
     144           0 :         dmapages = uvm_pagecount(&dma_constraint);
     145             :         /* take away a guess at how much of this the kernel will consume */
     146           0 :         dmapages -= (atop(physmem) - atop(uvmexp.free));
     147             : 
     148             :         /* See if we have memory above the dma accessible region. */
     149           0 :         high_constraint.ucr_low = dma_constraint.ucr_high;
     150           0 :         high_constraint.ucr_high = no_constraint.ucr_high;
     151           0 :         if (high_constraint.ucr_low != high_constraint.ucr_high)
     152           0 :                 high_constraint.ucr_low++;
     153           0 :         highpages = uvm_pagecount(&high_constraint);
     154             : 
     155             :         /*
     156             :          * Do we have any significant amount of high memory above
     157             :          * the DMA region? if so enable moving buffers there, if not,
     158             :          * don't bother.
     159             :          */
     160           0 :         if (highpages > dmapages / 4)
     161           0 :                 fliphigh = 1;
     162             :         else
     163           0 :                 fliphigh = 0;
     164             : 
     165             :         /*
     166             :          * If MD code doesn't say otherwise, use up to 10% of DMA'able
     167             :          * memory for buffers.
     168             :          */
     169           0 :         if (bufcachepercent == 0)
     170           0 :                 bufcachepercent = 10;
     171             : 
     172             :         /*
     173             :          * XXX these values and their same use in kern_sysctl
     174             :          * need to move into buf.h
     175             :          */
     176           0 :         KASSERT(bufcachepercent <= 90);
     177           0 :         KASSERT(bufcachepercent >= 5);
     178           0 :         if (bufpages == 0)
     179           0 :                 bufpages = dmapages * bufcachepercent / 100;
     180           0 :         if (bufpages < BCACHE_MIN)
     181           0 :                 bufpages = BCACHE_MIN;
     182           0 :         KASSERT(bufpages < dmapages);
     183             : 
     184           0 :         bufhighpages = bufpages;
     185             : 
     186             :         /*
     187             :          * Set the base backoff level for the buffer cache.  We will
     188             :          * not allow uvm to steal back more than this number of pages.
     189             :          */
     190           0 :         buflowpages = dmapages * 5 / 100;
     191           0 :         if (buflowpages < BCACHE_MIN)
     192             :                 buflowpages = BCACHE_MIN;
     193             : 
     194             :         /*
     195             :          * set bufbackpages to 100 pages, or 10 percent of the low water mark
     196             :          * if we don't have that many pages.
     197             :          */
     198             : 
     199           0 :         bufbackpages = buflowpages * 10 / 100;
     200           0 :         if (bufbackpages > 100)
     201             :                 bufbackpages = 100;
     202             : 
     203             :         /*
     204             :          * If the MD code does not say otherwise, reserve 10% of kva
     205             :          * space for mapping buffers.
     206             :          */
     207           0 :         if (bufkvm == 0)
     208           0 :                 bufkvm = VM_KERNEL_SPACE_SIZE / 10;
     209             : 
     210             :         /*
     211             :          * Don't use more than twice the amount of bufpages for mappings.
     212             :          * It's twice since we map things sparsely.
     213             :          */
     214           0 :         if (bufkvm > bufpages * PAGE_SIZE)
     215           0 :                 bufkvm = bufpages * PAGE_SIZE;
     216             :         /*
     217             :          * Round bufkvm to MAXPHYS because we allocate chunks of va space
     218             :          * in MAXPHYS chunks.
     219             :          */
     220           0 :         bufkvm &= ~(MAXPHYS - 1);
     221             : 
     222           0 :         pool_init(&bufpool, sizeof(struct buf), 0, IPL_BIO, 0, "bufpl", NULL);
     223             : 
     224           0 :         bufcache_init();
     225             : 
     226             :         /*
     227             :          * hmm - bufkvm is an argument because it's static, while
     228             :          * bufpages is global because it can change while running.
     229             :          */
     230           0 :         buf_mem_init(bufkvm);
     231             : 
     232             :         /*
     233             :          * Set the dirty page high water mark to be less than the low
     234             :          * water mark for pages in the buffer cache. This ensures we
     235             :          * can always back off by throwing away clean pages, and give
     236             :          * ourselves a chance to write out the dirty pages eventually.
     237             :          */
     238           0 :         hidirtypages = (buflowpages / 4) * 3;
     239           0 :         lodirtypages = buflowpages / 2;
     240             : 
     241             :         /*
     242             :          * We are allowed to use up to the reserve.
     243             :          */
     244           0 :         targetpages = bufpages - RESERVE_PAGES;
     245           0 : }
     246             : 
     247             : /*
     248             :  * Change cachepct
     249             :  */
     250             : void
     251           0 : bufadjust(int newbufpages)
     252             : {
     253             :         struct buf *bp;
     254             :         int s;
     255             : 
     256           0 :         if (newbufpages < buflowpages)
     257           0 :                 newbufpages = buflowpages;
     258             : 
     259           0 :         s = splbio();
     260           0 :         bufpages = newbufpages;
     261             : 
     262             :         /*
     263             :          * We are allowed to use up to the reserve
     264             :          */
     265           0 :         targetpages = bufpages - RESERVE_PAGES;
     266             : 
     267             :         /*
     268             :          * Shrinking the cache happens here only if someone has manually
     269             :          * adjusted bufcachepercent - or the pagedaemon has told us
     270             :          * to give back memory *now* - so we give it all back.
     271             :          */
     272           0 :         while ((bp = bufcache_getdmacleanbuf()) &&
     273           0 :             (bcstats.dmapages > targetpages)) {
     274           0 :                 bufcache_take(bp);
     275           0 :                 if (bp->b_vp) {
     276           0 :                         RBT_REMOVE(buf_rb_bufs, &bp->b_vp->v_bufs_tree, bp);
     277           0 :                         brelvp(bp);
     278           0 :                 }
     279           0 :                 buf_put(bp);
     280             :         }
     281           0 :         bufcache_adjust();
     282             : 
     283             :         /*
     284             :          * Wake up the cleaner if we have lots of dirty pages,
     285             :          * or if we are getting low on buffer cache kva.
     286             :          */
     287           0 :         if ((UNCLEAN_PAGES >= hidirtypages) ||
     288           0 :             bcstats.kvaslots_avail <= 2 * RESERVE_SLOTS)
     289           0 :                 wakeup(&bd_req);
     290             : 
     291           0 :         splx(s);
     292           0 : }
     293             : 
     294             : /*
     295             :  * Make the buffer cache back off from cachepct.
     296             :  */
     297             : int
     298           0 : bufbackoff(struct uvm_constraint_range *range, long size)
     299             : {
     300             :         /*
     301             :          * Back off "size" buffer cache pages. Called by the page
     302             :          * daemon to consume buffer cache pages rather than scanning.
     303             :          *
     304             :          * It returns 0 to the pagedaemon to indicate that it has
     305             :          * succeeded in freeing enough pages. It returns -1 to
     306             :          * indicate that it could not and the pagedaemon should take
     307             :          * other measures.
     308             :          *
     309             :          */
     310             :         long pdelta, oldbufpages;
     311             : 
     312             :         /*
     313             :          * If we will accept high memory for this backoff
     314             :          * try to steal it from the high memory buffer cache.
     315             :          */
     316           0 :         if (range->ucr_high > dma_constraint.ucr_high) {
     317             :                 struct buf *bp;
     318           0 :                 int64_t start = bcstats.numbufpages, recovered = 0;
     319           0 :                 int s = splbio();
     320             : 
     321           0 :                 while ((recovered < size) &&
     322           0 :                     (bp = bufcache_gethighcleanbuf())) {
     323           0 :                         bufcache_take(bp);
     324           0 :                         if (bp->b_vp) {
     325           0 :                                 RBT_REMOVE(buf_rb_bufs,
     326             :                                     &bp->b_vp->v_bufs_tree, bp);
     327           0 :                                 brelvp(bp);
     328           0 :                         }
     329           0 :                         buf_put(bp);
     330           0 :                         recovered = start - bcstats.numbufpages;
     331             :                 }
     332           0 :                 bufcache_adjust();
     333           0 :                 splx(s);
     334             : 
     335             :                 /* If we got enough, return success */
     336           0 :                 if (recovered >= size)
     337           0 :                         return 0;
     338             : 
     339             :                 /*
     340             :                  * If we needed only memory above DMA,
     341             :                  * return failure
     342             :                  */
     343           0 :                 if (range->ucr_low > dma_constraint.ucr_high)
     344           0 :                         return -1;
     345             : 
     346             :                 /* Otherwise get the rest from DMA */
     347           0 :                 size -= recovered;
     348           0 :         }
     349             : 
     350             :         /*
     351             :          * XXX Otherwise do the dma memory cache dance. this needs
     352             :          * refactoring later to get rid of 'bufpages'
     353             :          */
     354             : 
     355             :         /*
     356             :          * Back off by at least bufbackpages. If the page daemon gave us
     357             :          * a larger size, back off by that much.
     358             :          */
     359           0 :         pdelta = (size > bufbackpages) ? size : bufbackpages;
     360             : 
     361           0 :         if (bufpages <= buflowpages)
     362           0 :                 return(-1);
     363           0 :         if (bufpages - pdelta < buflowpages)
     364           0 :                 pdelta = bufpages - buflowpages;
     365           0 :         oldbufpages = bufpages;
     366           0 :         bufadjust(bufpages - pdelta);
     367           0 :         if (oldbufpages - bufpages < size)
     368           0 :                 return (-1); /* we did not free what we were asked */
     369             :         else
     370           0 :                 return(0);
     371           0 : }
     372             : 
     373             : 
     374             : /*
     375             :  * Opportunistically flip a buffer into high memory. Will move the buffer
     376             :  * if memory is available without sleeping, and return 0, otherwise will
     377             :  * fail and return -1 with the buffer unchanged.
     378             :  */
     379             : 
     380             : int
     381           0 : buf_flip_high(struct buf *bp)
     382             : {
     383             :         int s;
     384             :         int ret = -1;
     385             : 
     386           0 :         KASSERT(ISSET(bp->b_flags, B_BC));
     387           0 :         KASSERT(ISSET(bp->b_flags, B_DMA));
     388           0 :         KASSERT(bp->cache == DMA_CACHE);
     389           0 :         KASSERT(fliphigh);
     390             : 
     391             :         /* Attempt to move the buffer to high memory if we can */
     392           0 :         s = splbio();
     393           0 :         if (buf_realloc_pages(bp, &high_constraint, UVM_PLA_NOWAIT) == 0) {
     394           0 :                 KASSERT(!ISSET(bp->b_flags, B_DMA));
     395           0 :                 bcstats.highflips++;
     396             :                 ret = 0;
     397           0 :         } else
     398           0 :                 bcstats.highflops++;
     399           0 :         splx(s);
     400             : 
     401           0 :         return ret;
     402             : }
     403             : 
     404             : /*
     405             :  * Flip a buffer to dma reachable memory, when we need it there for
     406             :  * I/O. This can sleep since it will wait for memory alloacation in the
     407             :  * DMA reachable area since we have to have the buffer there to proceed.
     408             :  */
     409             : void
     410           0 : buf_flip_dma(struct buf *bp)
     411             : {
     412           0 :         KASSERT(ISSET(bp->b_flags, B_BC));
     413           0 :         KASSERT(ISSET(bp->b_flags, B_BUSY));
     414           0 :         KASSERT(bp->cache < NUM_CACHES);
     415             : 
     416           0 :         if (!ISSET(bp->b_flags, B_DMA)) {
     417           0 :                 int s = splbio();
     418             : 
     419             :                 /* move buf to dma reachable memory */
     420           0 :                 (void) buf_realloc_pages(bp, &dma_constraint, UVM_PLA_WAITOK);
     421           0 :                 KASSERT(ISSET(bp->b_flags, B_DMA));
     422           0 :                 bcstats.dmaflips++;
     423           0 :                 splx(s);
     424           0 :         }
     425             : 
     426           0 :         if (bp->cache > DMA_CACHE) {
     427           0 :                 CLR(bp->b_flags, B_COLD);
     428           0 :                 CLR(bp->b_flags, B_WARM);
     429           0 :                 bp->cache = DMA_CACHE;
     430           0 :         }
     431           0 : }
     432             : 
     433             : struct buf *
     434           0 : bio_doread(struct vnode *vp, daddr_t blkno, int size, int async)
     435             : {
     436             :         struct buf *bp;
     437             :         struct mount *mp;
     438             : 
     439           0 :         bp = getblk(vp, blkno, size, 0, 0);
     440             : 
     441             :         /*
     442             :          * If buffer does not have valid data, start a read.
     443             :          * Note that if buffer is B_INVAL, getblk() won't return it.
     444             :          * Therefore, it's valid if its I/O has completed or been delayed.
     445             :          */
     446           0 :         if (!ISSET(bp->b_flags, (B_DONE | B_DELWRI))) {
     447           0 :                 SET(bp->b_flags, B_READ | async);
     448           0 :                 bcstats.pendingreads++;
     449           0 :                 bcstats.numreads++;
     450           0 :                 VOP_STRATEGY(bp);
     451             :                 /* Pay for the read. */
     452           0 :                 curproc->p_ru.ru_inblock++;                  /* XXX */
     453           0 :         } else if (async) {
     454           0 :                 brelse(bp);
     455           0 :         }
     456             : 
     457           0 :         mp = vp->v_type == VBLK? vp->v_specmountpoint : vp->v_mount;
     458             : 
     459             :         /*
     460             :          * Collect statistics on synchronous and asynchronous reads.
     461             :          * Reads from block devices are charged to their associated
     462             :          * filesystem (if any).
     463             :          */
     464           0 :         if (mp != NULL) {
     465           0 :                 if (async == 0)
     466           0 :                         mp->mnt_stat.f_syncreads++;
     467             :                 else
     468           0 :                         mp->mnt_stat.f_asyncreads++;
     469             :         }
     470             : 
     471           0 :         return (bp);
     472             : }
     473             : 
     474             : /*
     475             :  * Read a disk block.
     476             :  * This algorithm described in Bach (p.54).
     477             :  */
     478             : int
     479           0 : bread(struct vnode *vp, daddr_t blkno, int size, struct buf **bpp)
     480             : {
     481             :         struct buf *bp;
     482             : 
     483             :         /* Get buffer for block. */
     484           0 :         bp = *bpp = bio_doread(vp, blkno, size, 0);
     485             : 
     486             :         /* Wait for the read to complete, and return result. */
     487           0 :         return (biowait(bp));
     488             : }
     489             : 
     490             : /*
     491             :  * Read-ahead multiple disk blocks. The first is sync, the rest async.
     492             :  * Trivial modification to the breada algorithm presented in Bach (p.55).
     493             :  */
     494             : int
     495           0 : breadn(struct vnode *vp, daddr_t blkno, int size, daddr_t rablks[],
     496             :     int rasizes[], int nrablks, struct buf **bpp)
     497             : {
     498             :         struct buf *bp;
     499             :         int i;
     500             : 
     501           0 :         bp = *bpp = bio_doread(vp, blkno, size, 0);
     502             : 
     503             :         /*
     504             :          * For each of the read-ahead blocks, start a read, if necessary.
     505             :          */
     506           0 :         for (i = 0; i < nrablks; i++) {
     507             :                 /* If it's in the cache, just go on to next one. */
     508           0 :                 if (incore(vp, rablks[i]))
     509             :                         continue;
     510             : 
     511             :                 /* Get a buffer for the read-ahead block */
     512           0 :                 (void) bio_doread(vp, rablks[i], rasizes[i], B_ASYNC);
     513           0 :         }
     514             : 
     515             :         /* Otherwise, we had to start a read for it; wait until it's valid. */
     516           0 :         return (biowait(bp));
     517             : }
     518             : 
     519             : /*
     520             :  * Called from interrupt context.
     521             :  */
     522             : void
     523           0 : bread_cluster_callback(struct buf *bp)
     524             : {
     525           0 :         struct buf **xbpp = bp->b_saveaddr;
     526             :         int i;
     527             : 
     528           0 :         if (xbpp[1] != NULL) {
     529           0 :                 size_t newsize = xbpp[1]->b_bufsize;
     530             : 
     531             :                 /*
     532             :                  * Shrink this buffer's mapping to only cover its part of
     533             :                  * the total I/O.
     534             :                  */
     535           0 :                 buf_fix_mapping(bp, newsize);
     536           0 :                 bp->b_bcount = newsize;
     537           0 :         }
     538             : 
     539             :         /* Invalidate read-ahead buffers if read short */
     540           0 :         if (bp->b_resid > 0) {
     541           0 :                 for (i = 0; xbpp[i] != NULL; i++)
     542             :                         continue;
     543           0 :                 for (i = i - 1; i != 0; i--) {
     544           0 :                         if (xbpp[i]->b_bufsize <= bp->b_resid) {
     545           0 :                                 bp->b_resid -= xbpp[i]->b_bufsize;
     546           0 :                                 SET(xbpp[i]->b_flags, B_INVAL);
     547           0 :                         } else if (bp->b_resid > 0) {
     548           0 :                                 bp->b_resid = 0;
     549           0 :                                 SET(xbpp[i]->b_flags, B_INVAL);
     550             :                         } else
     551             :                                 break;
     552             :                 }
     553             :         }
     554             : 
     555           0 :         for (i = 1; xbpp[i] != NULL; i++) {
     556           0 :                 if (ISSET(bp->b_flags, B_ERROR))
     557           0 :                         SET(xbpp[i]->b_flags, B_INVAL | B_ERROR);
     558           0 :                 biodone(xbpp[i]);
     559             :         }
     560             : 
     561           0 :         free(xbpp, M_TEMP, 0);
     562             : 
     563           0 :         if (ISSET(bp->b_flags, B_ASYNC)) {
     564           0 :                 brelse(bp);
     565           0 :         } else {
     566           0 :                 CLR(bp->b_flags, B_WANTED);
     567           0 :                 wakeup(bp);
     568             :         }
     569           0 : }
     570             : 
     571             : /*
     572             :  * Read-ahead multiple disk blocks, but make sure only one (big) I/O
     573             :  * request is sent to the disk.
     574             :  * XXX This should probably be dropped and breadn should instead be optimized
     575             :  * XXX to do fewer I/O requests.
     576             :  */
     577             : int
     578           0 : bread_cluster(struct vnode *vp, daddr_t blkno, int size, struct buf **rbpp)
     579             : {
     580             :         struct buf *bp, **xbpp;
     581           0 :         int howmany, maxra, i, inc;
     582           0 :         daddr_t sblkno;
     583             : 
     584           0 :         *rbpp = bio_doread(vp, blkno, size, 0);
     585             : 
     586             :         /*
     587             :          * If the buffer is in the cache skip any I/O operation.
     588             :          */
     589           0 :         if (ISSET((*rbpp)->b_flags, B_CACHE))
     590             :                 goto out;
     591             : 
     592           0 :         if (size != round_page(size))
     593             :                 goto out;
     594             : 
     595           0 :         if (VOP_BMAP(vp, blkno + 1, NULL, &sblkno, &maxra))
     596             :                 goto out;
     597             : 
     598           0 :         maxra++;
     599           0 :         if (sblkno == -1 || maxra < 2)
     600             :                 goto out;
     601             : 
     602           0 :         howmany = MAXPHYS / size;
     603           0 :         if (howmany > maxra)
     604           0 :                 howmany = maxra;
     605             : 
     606           0 :         xbpp = mallocarray(howmany + 1, sizeof(struct buf *), M_TEMP, M_NOWAIT);
     607           0 :         if (xbpp == NULL)
     608             :                 goto out;
     609             : 
     610           0 :         for (i = howmany - 1; i >= 0; i--) {
     611             :                 size_t sz;
     612             : 
     613             :                 /*
     614             :                  * First buffer allocates big enough size to cover what
     615             :                  * all the other buffers need.
     616             :                  */
     617           0 :                 sz = i == 0 ? howmany * size : 0;
     618             : 
     619           0 :                 xbpp[i] = buf_get(vp, blkno + i + 1, sz);
     620           0 :                 if (xbpp[i] == NULL) {
     621           0 :                         for (++i; i < howmany; i++) {
     622           0 :                                 SET(xbpp[i]->b_flags, B_INVAL);
     623           0 :                                 brelse(xbpp[i]);
     624             :                         }
     625           0 :                         free(xbpp, M_TEMP, 0);
     626           0 :                         goto out;
     627             :                 }
     628           0 :         }
     629             : 
     630           0 :         bp = xbpp[0];
     631             : 
     632           0 :         xbpp[howmany] = NULL;
     633             : 
     634           0 :         inc = btodb(size);
     635             : 
     636           0 :         for (i = 1; i < howmany; i++) {
     637           0 :                 bcstats.pendingreads++;
     638           0 :                 bcstats.numreads++;
     639             :                 /*
     640             :                 * We set B_DMA here because bp above will be B_DMA,
     641             :                 * and we are playing buffer slice-n-dice games from
     642             :                 * the memory allocated in bp.
     643             :                 */
     644           0 :                 SET(xbpp[i]->b_flags, B_DMA | B_READ | B_ASYNC);
     645           0 :                 xbpp[i]->b_blkno = sblkno + (i * inc);
     646           0 :                 xbpp[i]->b_bufsize = xbpp[i]->b_bcount = size;
     647           0 :                 xbpp[i]->b_data = NULL;
     648           0 :                 xbpp[i]->b_pobj = bp->b_pobj;
     649           0 :                 xbpp[i]->b_poffs = bp->b_poffs + (i * size);
     650             :         }
     651             : 
     652           0 :         KASSERT(bp->b_lblkno == blkno + 1);
     653           0 :         KASSERT(bp->b_vp == vp);
     654             : 
     655           0 :         bp->b_blkno = sblkno;
     656           0 :         SET(bp->b_flags, B_READ | B_ASYNC | B_CALL);
     657             : 
     658           0 :         bp->b_saveaddr = (void *)xbpp;
     659           0 :         bp->b_iodone = bread_cluster_callback;
     660             : 
     661           0 :         bcstats.pendingreads++;
     662           0 :         bcstats.numreads++;
     663           0 :         VOP_STRATEGY(bp);
     664           0 :         curproc->p_ru.ru_inblock++;
     665             : 
     666             : out:
     667           0 :         return (biowait(*rbpp));
     668           0 : }
     669             : 
     670             : /*
     671             :  * Block write.  Described in Bach (p.56)
     672             :  */
     673             : int
     674           0 : bwrite(struct buf *bp)
     675             : {
     676             :         int rv, async, wasdelayed, s;
     677             :         struct vnode *vp;
     678             :         struct mount *mp;
     679             : 
     680           0 :         vp = bp->b_vp;
     681           0 :         if (vp != NULL)
     682           0 :                 mp = vp->v_type == VBLK? vp->v_specmountpoint : vp->v_mount;
     683             :         else
     684             :                 mp = NULL;
     685             : 
     686             :         /*
     687             :          * Remember buffer type, to switch on it later.  If the write was
     688             :          * synchronous, but the file system was mounted with MNT_ASYNC,
     689             :          * convert it to a delayed write.
     690             :          * XXX note that this relies on delayed tape writes being converted
     691             :          * to async, not sync writes (which is safe, but ugly).
     692             :          */
     693           0 :         async = ISSET(bp->b_flags, B_ASYNC);
     694           0 :         if (!async && mp && ISSET(mp->mnt_flag, MNT_ASYNC)) {
     695           0 :                 bdwrite(bp);
     696           0 :                 return (0);
     697             :         }
     698             : 
     699             :         /*
     700             :          * Collect statistics on synchronous and asynchronous writes.
     701             :          * Writes to block devices are charged to their associated
     702             :          * filesystem (if any).
     703             :          */
     704           0 :         if (mp != NULL) {
     705           0 :                 if (async)
     706           0 :                         mp->mnt_stat.f_asyncwrites++;
     707             :                 else
     708           0 :                         mp->mnt_stat.f_syncwrites++;
     709             :         }
     710           0 :         bcstats.pendingwrites++;
     711           0 :         bcstats.numwrites++;
     712             : 
     713           0 :         wasdelayed = ISSET(bp->b_flags, B_DELWRI);
     714           0 :         CLR(bp->b_flags, (B_READ | B_DONE | B_ERROR | B_DELWRI));
     715             : 
     716           0 :         s = splbio();
     717             : 
     718             :         /*
     719             :          * If not synchronous, pay for the I/O operation and make
     720             :          * sure the buf is on the correct vnode queue.  We have
     721             :          * to do this now, because if we don't, the vnode may not
     722             :          * be properly notified that its I/O has completed.
     723             :          */
     724           0 :         if (wasdelayed) {
     725           0 :                 reassignbuf(bp);
     726           0 :         } else
     727           0 :                 curproc->p_ru.ru_oublock++;
     728             : 
     729             : 
     730             :         /* Initiate disk write.  Make sure the appropriate party is charged. */
     731           0 :         bp->b_vp->v_numoutput++;
     732           0 :         splx(s);
     733           0 :         buf_flip_dma(bp);
     734           0 :         SET(bp->b_flags, B_WRITEINPROG);
     735           0 :         VOP_STRATEGY(bp);
     736             : 
     737             :         /*
     738             :          * If the queue is above the high water mark, wait till
     739             :          * the number of outstanding write bufs drops below the low
     740             :          * water mark.
     741             :          */
     742           0 :         if (bp->b_bq)
     743           0 :                 bufq_wait(bp->b_bq);
     744             : 
     745           0 :         if (async)
     746           0 :                 return (0);
     747             : 
     748             :         /*
     749             :          * If I/O was synchronous, wait for it to complete.
     750             :          */
     751           0 :         rv = biowait(bp);
     752             : 
     753             :         /* Release the buffer. */
     754           0 :         brelse(bp);
     755             : 
     756           0 :         return (rv);
     757           0 : }
     758             : 
     759             : 
     760             : /*
     761             :  * Delayed write.
     762             :  *
     763             :  * The buffer is marked dirty, but is not queued for I/O.
     764             :  * This routine should be used when the buffer is expected
     765             :  * to be modified again soon, typically a small write that
     766             :  * partially fills a buffer.
     767             :  *
     768             :  * NB: magnetic tapes cannot be delayed; they must be
     769             :  * written in the order that the writes are requested.
     770             :  *
     771             :  * Described in Leffler, et al. (pp. 208-213).
     772             :  */
     773             : void
     774           0 : bdwrite(struct buf *bp)
     775             : {
     776             :         int s;
     777             : 
     778             :         /*
     779             :          * If the block hasn't been seen before:
     780             :          *      (1) Mark it as having been seen,
     781             :          *      (2) Charge for the write.
     782             :          *      (3) Make sure it's on its vnode's correct block list,
     783             :          *      (4) If a buffer is rewritten, move it to end of dirty list
     784             :          */
     785           0 :         if (!ISSET(bp->b_flags, B_DELWRI)) {
     786           0 :                 SET(bp->b_flags, B_DELWRI);
     787           0 :                 s = splbio();
     788           0 :                 buf_flip_dma(bp);
     789           0 :                 reassignbuf(bp);
     790           0 :                 splx(s);
     791           0 :                 curproc->p_ru.ru_oublock++;          /* XXX */
     792           0 :         }
     793             : 
     794             :         /* The "write" is done, so mark and release the buffer. */
     795           0 :         CLR(bp->b_flags, B_NEEDCOMMIT);
     796           0 :         SET(bp->b_flags, B_DONE);
     797           0 :         brelse(bp);
     798           0 : }
     799             : 
     800             : /*
     801             :  * Asynchronous block write; just an asynchronous bwrite().
     802             :  */
     803             : void
     804           0 : bawrite(struct buf *bp)
     805             : {
     806             : 
     807           0 :         SET(bp->b_flags, B_ASYNC);
     808           0 :         VOP_BWRITE(bp);
     809           0 : }
     810             : 
     811             : /*
     812             :  * Must be called at splbio()
     813             :  */
     814             : void
     815           0 : buf_dirty(struct buf *bp)
     816             : {
     817           0 :         splassert(IPL_BIO);
     818             : 
     819             : #ifdef DIAGNOSTIC
     820           0 :         if (!ISSET(bp->b_flags, B_BUSY))
     821           0 :                 panic("Trying to dirty buffer on freelist!");
     822             : #endif
     823             : 
     824           0 :         if (ISSET(bp->b_flags, B_DELWRI) == 0) {
     825           0 :                 SET(bp->b_flags, B_DELWRI);
     826           0 :                 buf_flip_dma(bp);
     827           0 :                 reassignbuf(bp);
     828           0 :         }
     829           0 : }
     830             : 
     831             : /*
     832             :  * Must be called at splbio()
     833             :  */
     834             : void
     835           0 : buf_undirty(struct buf *bp)
     836             : {
     837           0 :         splassert(IPL_BIO);
     838             : 
     839             : #ifdef DIAGNOSTIC
     840           0 :         if (!ISSET(bp->b_flags, B_BUSY))
     841           0 :                 panic("Trying to undirty buffer on freelist!");
     842             : #endif
     843           0 :         if (ISSET(bp->b_flags, B_DELWRI)) {
     844           0 :                 CLR(bp->b_flags, B_DELWRI);
     845           0 :                 reassignbuf(bp);
     846           0 :         }
     847           0 : }
     848             : 
     849             : /*
     850             :  * Release a buffer on to the free lists.
     851             :  * Described in Bach (p. 46).
     852             :  */
     853             : void
     854           0 : brelse(struct buf *bp)
     855             : {
     856             :         int s;
     857             : 
     858           0 :         s = splbio();
     859             : 
     860           0 :         if (bp->b_data != NULL)
     861           0 :                 KASSERT(bp->b_bufsize > 0);
     862             : 
     863             :         /*
     864             :          * Determine which queue the buffer should be on, then put it there.
     865             :          */
     866             : 
     867             :         /* If it's not cacheable, or an error, mark it invalid. */
     868           0 :         if (ISSET(bp->b_flags, (B_NOCACHE|B_ERROR)))
     869           0 :                 SET(bp->b_flags, B_INVAL);
     870             : 
     871           0 :         if (ISSET(bp->b_flags, B_INVAL)) {
     872             :                 /*
     873             :                  * If the buffer is invalid, free it now rather than leaving
     874             :                  * it in a queue and wasting memory.
     875             :                  */
     876           0 :                 if (LIST_FIRST(&bp->b_dep) != NULL)
     877           0 :                         buf_deallocate(bp);
     878             : 
     879           0 :                 if (ISSET(bp->b_flags, B_DELWRI)) {
     880           0 :                         CLR(bp->b_flags, B_DELWRI);
     881           0 :                 }
     882             : 
     883           0 :                 if (bp->b_vp) {
     884           0 :                         RBT_REMOVE(buf_rb_bufs, &bp->b_vp->v_bufs_tree, bp);
     885           0 :                         brelvp(bp);
     886           0 :                 }
     887           0 :                 bp->b_vp = NULL;
     888             : 
     889             :                 /*
     890             :                  * Wake up any processes waiting for _this_ buffer to
     891             :                  * become free. They are not allowed to grab it
     892             :                  * since it will be freed. But the only sleeper is
     893             :                  * getblk and it will restart the operation after
     894             :                  * sleep.
     895             :                  */
     896           0 :                 if (ISSET(bp->b_flags, B_WANTED)) {
     897           0 :                         CLR(bp->b_flags, B_WANTED);
     898           0 :                         wakeup(bp);
     899           0 :                 }
     900           0 :                 buf_put(bp);
     901           0 :         } else {
     902             :                 /*
     903             :                  * It has valid data.  Put it on the end of the appropriate
     904             :                  * queue, so that it'll stick around for as long as possible.
     905             :                  */
     906           0 :                 bufcache_release(bp);
     907             : 
     908             :                 /* Unlock the buffer. */
     909           0 :                 CLR(bp->b_flags, (B_AGE | B_ASYNC | B_NOCACHE | B_DEFERRED));
     910           0 :                 buf_release(bp);
     911             : 
     912             :                 /* Wake up any processes waiting for _this_ buffer to
     913             :                  * become free. */
     914           0 :                 if (ISSET(bp->b_flags, B_WANTED)) {
     915           0 :                         CLR(bp->b_flags, B_WANTED);
     916           0 :                         wakeup(bp);
     917           0 :                 }
     918             :         }
     919             : 
     920             :         /* Wake up syncer and cleaner processes waiting for buffers. */
     921           0 :         if (nobuffers) {
     922           0 :                 nobuffers = 0;
     923           0 :                 wakeup(&nobuffers);
     924           0 :         }
     925             : 
     926             :         /* Wake up any processes waiting for any buffer to become free. */
     927           0 :         if (needbuffer && bcstats.dmapages < targetpages &&
     928           0 :             bcstats.kvaslots_avail > RESERVE_SLOTS) {
     929           0 :                 needbuffer = 0;
     930           0 :                 wakeup(&needbuffer);
     931           0 :         }
     932             : 
     933           0 :         splx(s);
     934           0 : }
     935             : 
     936             : /*
     937             :  * Determine if a block is in the cache. Just look on what would be its hash
     938             :  * chain. If it's there, return a pointer to it, unless it's marked invalid.
     939             :  */
     940             : struct buf *
     941           0 : incore(struct vnode *vp, daddr_t blkno)
     942             : {
     943             :         struct buf *bp;
     944           0 :         struct buf b;
     945             :         int s;
     946             : 
     947           0 :         s = splbio();
     948             : 
     949             :         /* Search buf lookup tree */
     950           0 :         b.b_lblkno = blkno;
     951           0 :         bp = RBT_FIND(buf_rb_bufs, &vp->v_bufs_tree, &b);
     952           0 :         if (bp != NULL && ISSET(bp->b_flags, B_INVAL))
     953           0 :                 bp = NULL;
     954             : 
     955           0 :         splx(s);
     956           0 :         return (bp);
     957           0 : }
     958             : 
     959             : /*
     960             :  * Get a block of requested size that is associated with
     961             :  * a given vnode and block offset. If it is found in the
     962             :  * block cache, mark it as having been found, make it busy
     963             :  * and return it. Otherwise, return an empty block of the
     964             :  * correct size. It is up to the caller to ensure that the
     965             :  * cached blocks be of the correct size.
     966             :  */
     967             : struct buf *
     968           0 : getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo)
     969             : {
     970             :         struct buf *bp;
     971           0 :         struct buf b;
     972           0 :         int s, error;
     973             : 
     974             :         /*
     975             :          * XXX
     976             :          * The following is an inlined version of 'incore()', but with
     977             :          * the 'invalid' test moved to after the 'busy' test.  It's
     978             :          * necessary because there are some cases in which the NFS
     979             :          * code sets B_INVAL prior to writing data to the server, but
     980             :          * in which the buffers actually contain valid data.  In this
     981             :          * case, we can't allow the system to allocate a new buffer for
     982             :          * the block until the write is finished.
     983             :          */
     984             : start:
     985           0 :         s = splbio();
     986           0 :         b.b_lblkno = blkno;
     987           0 :         bp = RBT_FIND(buf_rb_bufs, &vp->v_bufs_tree, &b);
     988           0 :         if (bp != NULL) {
     989           0 :                 if (ISSET(bp->b_flags, B_BUSY)) {
     990           0 :                         SET(bp->b_flags, B_WANTED);
     991           0 :                         error = tsleep(bp, slpflag | (PRIBIO + 1), "getblk",
     992             :                             slptimeo);
     993           0 :                         splx(s);
     994           0 :                         if (error)
     995           0 :                                 return (NULL);
     996           0 :                         goto start;
     997             :                 }
     998             : 
     999           0 :                 if (!ISSET(bp->b_flags, B_INVAL)) {
    1000           0 :                         bcstats.cachehits++;
    1001           0 :                         SET(bp->b_flags, B_CACHE);
    1002           0 :                         bufcache_take(bp);
    1003           0 :                         buf_acquire(bp);
    1004           0 :                         splx(s);
    1005           0 :                         return (bp);
    1006             :                 }
    1007             :         }
    1008           0 :         splx(s);
    1009             : 
    1010           0 :         if ((bp = buf_get(vp, blkno, size)) == NULL)
    1011           0 :                 goto start;
    1012             : 
    1013           0 :         return (bp);
    1014           0 : }
    1015             : 
    1016             : /*
    1017             :  * Get an empty, disassociated buffer of given size.
    1018             :  */
    1019             : struct buf *
    1020           0 : geteblk(size_t size)
    1021             : {
    1022             :         struct buf *bp;
    1023             : 
    1024           0 :         while ((bp = buf_get(NULL, 0, size)) == NULL)
    1025           0 :                 continue;
    1026             : 
    1027           0 :         return (bp);
    1028             : }
    1029             : 
    1030             : /*
    1031             :  * Allocate a buffer.
    1032             :  * If vp is given, put it into the buffer cache for that vnode.
    1033             :  * If size != 0, allocate memory and call buf_map().
    1034             :  * If there is already a buffer for the given vnode/blkno, return NULL.
    1035             :  */
    1036             : struct buf *
    1037           0 : buf_get(struct vnode *vp, daddr_t blkno, size_t size)
    1038             : {
    1039             :         struct buf *bp;
    1040           0 :         int poolwait = size == 0 ? PR_NOWAIT : PR_WAITOK;
    1041             :         int npages;
    1042             :         int s;
    1043             : 
    1044           0 :         s = splbio();
    1045           0 :         if (size) {
    1046             :                 /*
    1047             :                  * Wake up the cleaner if we have lots of dirty pages,
    1048             :                  * or if we are getting low on buffer cache kva.
    1049             :                  */
    1050           0 :                 if (UNCLEAN_PAGES >= hidirtypages ||
    1051           0 :                         bcstats.kvaslots_avail <= 2 * RESERVE_SLOTS)
    1052           0 :                         wakeup(&bd_req);
    1053             : 
    1054           0 :                 npages = atop(round_page(size));
    1055             : 
    1056             :                 /*
    1057             :                  * if our cache has been previously shrunk,
    1058             :                  * allow it to grow again with use up to
    1059             :                  * bufhighpages (cachepercent)
    1060             :                  */
    1061           0 :                 if (bufpages < bufhighpages)
    1062           0 :                         bufadjust(bufhighpages);
    1063             : 
    1064             :                 /*
    1065             :                  * If we would go over the page target with our
    1066             :                  * new allocation, free enough buffers first
    1067             :                  * to stay at the target with our new allocation.
    1068             :                  */
    1069           0 :                 while ((bcstats.dmapages + npages > targetpages) &&
    1070           0 :                     (bp = bufcache_getdmacleanbuf())) {
    1071           0 :                         bufcache_take(bp);
    1072           0 :                         if (bp->b_vp) {
    1073           0 :                                 RBT_REMOVE(buf_rb_bufs,
    1074             :                                     &bp->b_vp->v_bufs_tree, bp);
    1075           0 :                                 brelvp(bp);
    1076           0 :                         }
    1077           0 :                         buf_put(bp);
    1078             :                 }
    1079             : 
    1080             :                 /*
    1081             :                  * If we get here, we tried to free the world down
    1082             :                  * above, and couldn't get down - Wake the cleaner
    1083             :                  * and wait for it to push some buffers out.
    1084             :                  */
    1085           0 :                 if ((bcstats.dmapages + npages > targetpages ||
    1086           0 :                     bcstats.kvaslots_avail <= RESERVE_SLOTS) &&
    1087           0 :                     curproc != syncerproc && curproc != cleanerproc) {
    1088           0 :                         wakeup(&bd_req);
    1089           0 :                         needbuffer++;
    1090           0 :                         tsleep(&needbuffer, PRIBIO, "needbuffer", 0);
    1091           0 :                         splx(s);
    1092           0 :                         return (NULL);
    1093             :                 }
    1094           0 :                 if (bcstats.dmapages + npages > bufpages) {
    1095             :                         /* cleaner or syncer */
    1096           0 :                         nobuffers = 1;
    1097           0 :                         tsleep(&nobuffers, PRIBIO, "nobuffers", 0);
    1098           0 :                         splx(s);
    1099           0 :                         return (NULL);
    1100             :                 }
    1101             :         }
    1102             : 
    1103           0 :         bp = pool_get(&bufpool, poolwait|PR_ZERO);
    1104             : 
    1105           0 :         if (bp == NULL) {
    1106           0 :                 splx(s);
    1107           0 :                 return (NULL);
    1108             :         }
    1109             : 
    1110           0 :         bp->b_freelist.tqe_next = NOLIST;
    1111           0 :         bp->b_dev = NODEV;
    1112           0 :         LIST_INIT(&bp->b_dep);
    1113           0 :         bp->b_bcount = size;
    1114             : 
    1115           0 :         buf_acquire_nomap(bp);
    1116             : 
    1117           0 :         if (vp != NULL) {
    1118             :                 /*
    1119             :                  * We insert the buffer into the hash with B_BUSY set
    1120             :                  * while we allocate pages for it. This way any getblk
    1121             :                  * that happens while we allocate pages will wait for
    1122             :                  * this buffer instead of starting its own buf_get.
    1123             :                  *
    1124             :                  * But first, we check if someone beat us to it.
    1125             :                  */
    1126           0 :                 if (incore(vp, blkno)) {
    1127           0 :                         pool_put(&bufpool, bp);
    1128           0 :                         splx(s);
    1129           0 :                         return (NULL);
    1130             :                 }
    1131             : 
    1132           0 :                 bp->b_blkno = bp->b_lblkno = blkno;
    1133           0 :                 bgetvp(vp, bp);
    1134           0 :                 if (RBT_INSERT(buf_rb_bufs, &vp->v_bufs_tree, bp))
    1135           0 :                         panic("buf_get: dup lblk vp %p bp %p", vp, bp);
    1136             :         } else {
    1137           0 :                 bp->b_vnbufs.le_next = NOLIST;
    1138           0 :                 SET(bp->b_flags, B_INVAL);
    1139           0 :                 bp->b_vp = NULL;
    1140             :         }
    1141             : 
    1142           0 :         LIST_INSERT_HEAD(&bufhead, bp, b_list);
    1143           0 :         bcstats.numbufs++;
    1144             : 
    1145           0 :         if (size) {
    1146           0 :                 buf_alloc_pages(bp, round_page(size));
    1147           0 :                 KASSERT(ISSET(bp->b_flags, B_DMA));
    1148           0 :                 buf_map(bp);
    1149           0 :         }
    1150             : 
    1151           0 :         SET(bp->b_flags, B_BC);
    1152           0 :         splx(s);
    1153             : 
    1154           0 :         return (bp);
    1155           0 : }
    1156             : 
    1157             : /*
    1158             :  * Buffer cleaning daemon.
    1159             :  */
    1160             : void
    1161           0 : buf_daemon(void *arg)
    1162             : {
    1163             :         struct buf *bp = NULL;
    1164             :         int s, pushed = 0;
    1165             : 
    1166           0 :         s = splbio();
    1167           0 :         for (;;) {
    1168           0 :                 if (bp == NULL || (pushed >= 16 &&
    1169           0 :                     UNCLEAN_PAGES < hidirtypages &&
    1170           0 :                     bcstats.kvaslots_avail > 2 * RESERVE_SLOTS)){
    1171             :                         pushed = 0;
    1172             :                         /*
    1173             :                          * Wake up anyone who was waiting for buffers
    1174             :                          * to be released.
    1175             :                          */
    1176           0 :                         if (needbuffer) {
    1177           0 :                                 needbuffer = 0;
    1178           0 :                                 wakeup(&needbuffer);
    1179           0 :                         }
    1180           0 :                         tsleep(&bd_req, PRIBIO - 7, "cleaner", 0);
    1181           0 :                 }
    1182             : 
    1183           0 :                 while ((bp = bufcache_getdirtybuf())) {
    1184             : 
    1185           0 :                         if (UNCLEAN_PAGES < lodirtypages &&
    1186           0 :                             bcstats.kvaslots_avail > 2 * RESERVE_SLOTS &&
    1187           0 :                             pushed >= 16)
    1188             :                                 break;
    1189             : 
    1190           0 :                         bufcache_take(bp);
    1191           0 :                         buf_acquire(bp);
    1192           0 :                         splx(s);
    1193             : 
    1194           0 :                         if (ISSET(bp->b_flags, B_INVAL)) {
    1195           0 :                                 brelse(bp);
    1196           0 :                                 s = splbio();
    1197           0 :                                 continue;
    1198             :                         }
    1199             : #ifdef DIAGNOSTIC
    1200           0 :                         if (!ISSET(bp->b_flags, B_DELWRI))
    1201           0 :                                 panic("Clean buffer on dirty queue");
    1202             : #endif
    1203           0 :                         if (LIST_FIRST(&bp->b_dep) != NULL &&
    1204           0 :                             !ISSET(bp->b_flags, B_DEFERRED) &&
    1205           0 :                             buf_countdeps(bp, 0, 0)) {
    1206           0 :                                 SET(bp->b_flags, B_DEFERRED);
    1207           0 :                                 s = splbio();
    1208           0 :                                 bufcache_release(bp);
    1209           0 :                                 buf_release(bp);
    1210           0 :                                 continue;
    1211             :                         }
    1212             : 
    1213           0 :                         bawrite(bp);
    1214           0 :                         pushed++;
    1215             : 
    1216           0 :                         sched_pause(yield);
    1217             : 
    1218           0 :                         s = splbio();
    1219             :                 }
    1220             :         }
    1221             : }
    1222             : 
    1223             : /*
    1224             :  * Wait for operations on the buffer to complete.
    1225             :  * When they do, extract and return the I/O's error value.
    1226             :  */
    1227             : int
    1228           0 : biowait(struct buf *bp)
    1229             : {
    1230             :         int s;
    1231             : 
    1232           0 :         KASSERT(!(bp->b_flags & B_ASYNC));
    1233             : 
    1234           0 :         s = splbio();
    1235           0 :         while (!ISSET(bp->b_flags, B_DONE))
    1236           0 :                 tsleep(bp, PRIBIO + 1, "biowait", 0);
    1237           0 :         splx(s);
    1238             : 
    1239             :         /* check for interruption of I/O (e.g. via NFS), then errors. */
    1240           0 :         if (ISSET(bp->b_flags, B_EINTR)) {
    1241           0 :                 CLR(bp->b_flags, B_EINTR);
    1242           0 :                 return (EINTR);
    1243             :         }
    1244             : 
    1245           0 :         if (ISSET(bp->b_flags, B_ERROR))
    1246           0 :                 return (bp->b_error ? bp->b_error : EIO);
    1247             :         else
    1248           0 :                 return (0);
    1249           0 : }
    1250             : 
    1251             : /*
    1252             :  * Mark I/O complete on a buffer.
    1253             :  *
    1254             :  * If a callback has been requested, e.g. the pageout
    1255             :  * daemon, do so. Otherwise, awaken waiting processes.
    1256             :  *
    1257             :  * [ Leffler, et al., says on p.247:
    1258             :  *      "This routine wakes up the blocked process, frees the buffer
    1259             :  *      for an asynchronous write, or, for a request by the pagedaemon
    1260             :  *      process, invokes a procedure specified in the buffer structure" ]
    1261             :  *
    1262             :  * In real life, the pagedaemon (or other system processes) wants
    1263             :  * to do async stuff to, and doesn't want the buffer brelse()'d.
    1264             :  * (for swap pager, that puts swap buffers on the free lists (!!!),
    1265             :  * for the vn device, that puts malloc'd buffers on the free lists!)
    1266             :  *
    1267             :  * Must be called at splbio().
    1268             :  */
    1269             : void
    1270           0 : biodone(struct buf *bp)
    1271             : {
    1272           0 :         splassert(IPL_BIO);
    1273             : 
    1274           0 :         if (ISSET(bp->b_flags, B_DONE))
    1275           0 :                 panic("biodone already");
    1276           0 :         SET(bp->b_flags, B_DONE);            /* note that it's done */
    1277             : 
    1278           0 :         if (bp->b_bq)
    1279           0 :                 bufq_done(bp->b_bq, bp);
    1280             : 
    1281           0 :         if (LIST_FIRST(&bp->b_dep) != NULL)
    1282           0 :                 buf_complete(bp);
    1283             : 
    1284           0 :         if (!ISSET(bp->b_flags, B_READ)) {
    1285           0 :                 CLR(bp->b_flags, B_WRITEINPROG);
    1286           0 :                 vwakeup(bp->b_vp);
    1287           0 :         }
    1288           0 :         if (bcstats.numbufs &&
    1289           0 :             (!(ISSET(bp->b_flags, B_RAW) || ISSET(bp->b_flags, B_PHYS)))) {
    1290           0 :                 if (!ISSET(bp->b_flags, B_READ)) {
    1291           0 :                         bcstats.pendingwrites--;
    1292           0 :                 } else
    1293           0 :                         bcstats.pendingreads--;
    1294             :         }
    1295           0 :         if (ISSET(bp->b_flags, B_CALL)) {    /* if necessary, call out */
    1296           0 :                 CLR(bp->b_flags, B_CALL);    /* but note callout done */
    1297           0 :                 (*bp->b_iodone)(bp);
    1298           0 :         } else {
    1299           0 :                 if (ISSET(bp->b_flags, B_ASYNC)) {/* if async, release it */
    1300           0 :                         brelse(bp);
    1301           0 :                 } else {                        /* or just wakeup the buffer */
    1302           0 :                         CLR(bp->b_flags, B_WANTED);
    1303           0 :                         wakeup(bp);
    1304             :                 }
    1305             :         }
    1306           0 : }
    1307             : 
    1308             : #ifdef DDB
    1309             : void    bcstats_print(int (*)(const char *, ...)
    1310             :     __attribute__((__format__(__kprintf__,1,2))));
    1311             : /*
    1312             :  * bcstats_print: ddb hook to print interesting buffer cache counters
    1313             :  */
    1314             : void
    1315           0 : bcstats_print(
    1316             :     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
    1317             : {
    1318           0 :         (*pr)("Current Buffer Cache status:\n");
    1319           0 :         (*pr)("numbufs %lld busymapped %lld, delwri %lld\n",
    1320           0 :             bcstats.numbufs, bcstats.busymapped, bcstats.delwribufs);
    1321           0 :         (*pr)("kvaslots %lld avail kva slots %lld\n",
    1322           0 :             bcstats.kvaslots, bcstats.kvaslots_avail);
    1323           0 :         (*pr)("bufpages %lld, dmapages %lld, dirtypages %lld\n",
    1324           0 :             bcstats.numbufpages, bcstats.dmapages, bcstats.numdirtypages);
    1325           0 :         (*pr)("pendingreads %lld, pendingwrites %lld\n",
    1326           0 :             bcstats.pendingreads, bcstats.pendingwrites);
    1327           0 :         (*pr)("highflips %lld, highflops %lld, dmaflips %lld\n",
    1328           0 :             bcstats.highflips, bcstats.highflops, bcstats.dmaflips);
    1329           0 : }
    1330             : #endif
    1331             : 
    1332             : void
    1333           0 : buf_adjcnt(struct buf *bp, long ncount)
    1334             : {
    1335           0 :         KASSERT(ncount <= bp->b_bufsize);
    1336           0 :         bp->b_bcount = ncount;
    1337           0 : }
    1338             : 
    1339             : /* bufcache freelist code below */
    1340             : /*
    1341             :  * Copyright (c) 2014 Ted Unangst <tedu@openbsd.org>
    1342             :  *
    1343             :  * Permission to use, copy, modify, and distribute this software for any
    1344             :  * purpose with or without fee is hereby granted, provided that the above
    1345             :  * copyright notice and this permission notice appear in all copies.
    1346             :  *
    1347             :  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
    1348             :  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
    1349             :  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
    1350             :  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    1351             :  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
    1352             :  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
    1353             :  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
    1354             :  */
    1355             : 
    1356             : /*
    1357             :  * The code below implements a variant of the 2Q buffer cache algorithm by
    1358             :  * Johnson and Shasha.
    1359             :  *
    1360             :  * General Outline
    1361             :  * We divide the buffer cache into three working sets: current, previous,
    1362             :  * and long term. Each list is itself LRU and buffers get promoted and moved
    1363             :  * around between them. A buffer starts its life in the current working set.
    1364             :  * As time passes and newer buffers push it out, it will turn into the previous
    1365             :  * working set and is subject to recycling. But if it's accessed again from
    1366             :  * the previous working set, that's an indication that it's actually in the
    1367             :  * long term working set, so we promote it there. The separation of current
    1368             :  * and previous working sets prevents us from promoting a buffer that's only
    1369             :  * temporarily hot to the long term cache.
    1370             :  *
    1371             :  * The objective is to provide scan resistance by making the long term
    1372             :  * working set ineligible for immediate recycling, even as the current 
    1373             :  * working set is rapidly turned over.
    1374             :  *
    1375             :  * Implementation
    1376             :  * The code below identifies the current, previous, and long term sets as
    1377             :  * hotqueue, coldqueue, and warmqueue. The hot and warm queues are capped at
    1378             :  * 1/3 of the total clean pages, after which point they start pushing their
    1379             :  * oldest buffers into coldqueue.
    1380             :  * A buf always starts out with neither WARM or COLD flags set (implying HOT).
    1381             :  * When released, it will be returned to the tail of the hotqueue list.
    1382             :  * When the hotqueue gets too large, the oldest hot buf will be moved to the
    1383             :  * coldqueue, with the B_COLD flag set. When a cold buf is released, we set
    1384             :  * the B_WARM flag and put it onto the warmqueue. Warm bufs are also
    1385             :  * directly returned to the end of the warmqueue. As with the hotqueue, when
    1386             :  * the warmqueue grows too large, B_WARM bufs are moved onto the coldqueue.
    1387             :  *
    1388             :  * Note that this design does still support large working sets, greater
    1389             :  * than the cap of hotqueue or warmqueue would imply. The coldqueue is still
    1390             :  * cached and has no maximum length. The hot and warm queues form a Y feeding
    1391             :  * into the coldqueue. Moving bufs between queues is constant time, so this
    1392             :  * design decays to one long warm->cold queue.
    1393             :  *
    1394             :  * In the 2Q paper, hotqueue and coldqueue are A1in and A1out. The warmqueue
    1395             :  * is Am. We always cache pages, as opposed to pointers to pages for A1.
    1396             :  *
    1397             :  * This implementation adds support for multiple 2q caches.
    1398             :  *
    1399             :  * If we have more than one 2q cache, as bufs fall off the cold queue
    1400             :  * for recyclying, bufs that have been warm before (which retain the
    1401             :  * B_WARM flag in addition to B_COLD) can be put into the hot queue of
    1402             :  * a second level 2Q cache. buffers which are only B_COLD are
    1403             :  * recycled. Bufs falling off the last cache's cold queue are always
    1404             :  * recycled.
    1405             :  *
    1406             :  */
    1407             : 
    1408             : /*
    1409             :  * this function is called when a hot or warm queue may have exceeded its
    1410             :  * size limit. it will move a buf to the coldqueue.
    1411             :  */
    1412             : int chillbufs(struct
    1413             :     bufcache *cache, struct bufqueue *queue, int64_t *queuepages);
    1414             : 
    1415             : void
    1416           0 : bufcache_init(void)
    1417             : {
    1418             :         int i;
    1419           0 :         for (i=0; i < NUM_CACHES; i++) {
    1420           0 :                 TAILQ_INIT(&cleancache[i].hotqueue);
    1421           0 :                 TAILQ_INIT(&cleancache[i].coldqueue);
    1422           0 :                 TAILQ_INIT(&cleancache[i].warmqueue);
    1423             :         }
    1424           0 :         TAILQ_INIT(&dirtyqueue);
    1425           0 : }
    1426             : 
    1427             : /*
    1428             :  * if the buffer caches have shrunk, we may need to rebalance our queues.
    1429             :  */
    1430             : void
    1431           0 : bufcache_adjust(void)
    1432             : {
    1433             :         int i;
    1434           0 :         for (i=0; i < NUM_CACHES; i++) {
    1435           0 :                 while (chillbufs(&cleancache[i], &cleancache[i].warmqueue,
    1436           0 :                     &cleancache[i].warmbufpages) ||
    1437           0 :                     chillbufs(&cleancache[i], &cleancache[i].hotqueue,
    1438           0 :                     &cleancache[i].hotbufpages))
    1439           0 :                         continue;
    1440             :         }
    1441           0 : }
    1442             : 
    1443             : /*
    1444             :  * Get a clean buffer from the cache. if "discard" is set do not promote
    1445             :  * previously warm buffers as normal, because we are tossing everything
    1446             :  * away such as in a hibernation
    1447             :  */
    1448             : struct buf *
    1449           0 : bufcache_getcleanbuf(int cachenum, int discard)
    1450             : {
    1451             :         struct buf *bp = NULL;
    1452           0 :         struct bufcache *cache = &cleancache[cachenum];
    1453             : 
    1454           0 :         splassert(IPL_BIO);
    1455             : 
    1456             :         /* try  cold queue */
    1457           0 :         while ((bp = TAILQ_FIRST(&cache->coldqueue))) {
    1458           0 :                 if ((!discard) &&
    1459           0 :                     cachenum < NUM_CACHES - 1 && ISSET(bp->b_flags, B_WARM)) {
    1460           0 :                         int64_t pages = atop(bp->b_bufsize);
    1461             :                         struct bufcache *newcache;
    1462             : 
    1463           0 :                         KASSERT(bp->cache == cachenum);
    1464             : 
    1465             :                         /*
    1466             :                          * If this buffer was warm before, move it to
    1467             :                          * the hot queue in the next cache
    1468             :                          */
    1469             : 
    1470           0 :                         if (fliphigh) {
    1471             :                                 /*
    1472             :                                  * If we are in the DMA cache, try to flip the
    1473             :                                  * buffer up high to move it on to the other
    1474             :                                  * caches. if we can't move the buffer to high
    1475             :                                  * memory without sleeping, we give it up and
    1476             :                                  * return it rather than fight for more memory
    1477             :                                  * against non buffer cache competitors.
    1478             :                                  */
    1479           0 :                                 SET(bp->b_flags, B_BUSY);
    1480           0 :                                 if (bp->cache == 0 && buf_flip_high(bp) == -1) {
    1481           0 :                                         CLR(bp->b_flags, B_BUSY);
    1482           0 :                                         return bp;
    1483             :                                 }
    1484           0 :                                 CLR(bp->b_flags, B_BUSY);
    1485           0 :                         }
    1486             : 
    1487             :                         /* Move the buffer to the hot queue in the next cache */
    1488           0 :                         TAILQ_REMOVE(&cache->coldqueue, bp, b_freelist);
    1489           0 :                         CLR(bp->b_flags, B_WARM);
    1490           0 :                         CLR(bp->b_flags, B_COLD);
    1491           0 :                         bp->cache++;
    1492           0 :                         newcache= &cleancache[bp->cache];
    1493           0 :                         newcache->cachepages += pages;
    1494           0 :                         newcache->hotbufpages += pages;
    1495           0 :                         chillbufs(newcache, &newcache->hotqueue,
    1496             :                             &newcache->hotbufpages);
    1497           0 :                         TAILQ_INSERT_TAIL(&newcache->hotqueue, bp, b_freelist);
    1498           0 :                 }
    1499             :                 else
    1500             :                         /* buffer is cold - give it up */
    1501           0 :                         return bp;
    1502             :         }
    1503           0 :         if ((bp = TAILQ_FIRST(&cache->warmqueue)))
    1504           0 :                 return bp;
    1505           0 :         if ((bp = TAILQ_FIRST(&cache->hotqueue)))
    1506           0 :                 return bp;
    1507             :         return bp;
    1508           0 : }
    1509             : 
    1510             : struct buf *
    1511           0 : bufcache_getcleanbuf_range(int start, int end, int discard)
    1512             : {
    1513             :         int i, j = start, q = end;
    1514             :         struct buf *bp = NULL;
    1515             : 
    1516             :         /*
    1517             :          * XXX in theory we could promote warm buffers into a previous queue
    1518             :          * so in the pathological case of where we go through all the caches
    1519             :          * without getting a buffer we have to start at the beginning again.
    1520             :          */
    1521           0 :         while (j <= q)       {
    1522           0 :                 for (i = q; i >= j; i--)
    1523           0 :                         if ((bp = bufcache_getcleanbuf(i, discard)))
    1524           0 :                                 return(bp);
    1525           0 :                 j++;
    1526             :         }
    1527           0 :         return bp;
    1528           0 : }
    1529             : 
    1530             : struct buf *
    1531           0 : bufcache_gethighcleanbuf(void)
    1532             : {
    1533           0 :         if (!fliphigh)
    1534           0 :                 return NULL;
    1535           0 :         return bufcache_getcleanbuf_range(DMA_CACHE + 1, NUM_CACHES - 1, 0);
    1536           0 : }
    1537             : 
    1538             : struct buf *
    1539           0 : bufcache_getdmacleanbuf(void)
    1540             : {
    1541           0 :         if (fliphigh)
    1542           0 :                 return bufcache_getcleanbuf_range(DMA_CACHE, DMA_CACHE, 0);
    1543           0 :         return bufcache_getcleanbuf_range(DMA_CACHE, NUM_CACHES - 1, 0);
    1544           0 : }
    1545             : 
    1546             : struct buf *
    1547           0 : bufcache_getdirtybuf(void)
    1548             : {
    1549           0 :         return TAILQ_FIRST(&dirtyqueue);
    1550             : }
    1551             : 
    1552             : void
    1553           0 : bufcache_take(struct buf *bp)
    1554             : {
    1555             :         struct bufqueue *queue;
    1556             :         int64_t pages;
    1557             : 
    1558           0 :         splassert(IPL_BIO);
    1559           0 :         KASSERT(ISSET(bp->b_flags, B_BC));
    1560           0 :         KASSERT(bp->cache >= DMA_CACHE);
    1561           0 :         KASSERT((bp->cache < NUM_CACHES));
    1562             : 
    1563           0 :         pages = atop(bp->b_bufsize);
    1564           0 :         struct bufcache *cache = &cleancache[bp->cache];
    1565           0 :         if (!ISSET(bp->b_flags, B_DELWRI)) {
    1566           0 :                 if (ISSET(bp->b_flags, B_COLD)) {
    1567           0 :                         queue = &cache->coldqueue;
    1568           0 :                 } else if (ISSET(bp->b_flags, B_WARM)) {
    1569           0 :                         queue = &cache->warmqueue;
    1570           0 :                         cache->warmbufpages -= pages;
    1571           0 :                 } else {
    1572           0 :                         queue = &cache->hotqueue;
    1573           0 :                         cache->hotbufpages -= pages;
    1574             :                 }
    1575           0 :                 bcstats.numcleanpages -= pages;
    1576           0 :                 cache->cachepages -= pages;
    1577           0 :         } else {
    1578             :                 queue = &dirtyqueue;
    1579           0 :                 bcstats.numdirtypages -= pages;
    1580           0 :                 bcstats.delwribufs--;
    1581             :         }
    1582           0 :         TAILQ_REMOVE(queue, bp, b_freelist);
    1583           0 : }
    1584             : 
    1585             : /* move buffers from a hot or warm queue to a cold queue in a cache */
    1586             : int
    1587           0 : chillbufs(struct bufcache *cache, struct bufqueue *queue, int64_t *queuepages)
    1588             : {
    1589             :         struct buf *bp;
    1590             :         int64_t limit, pages;
    1591             : 
    1592             :         /*
    1593             :          * We limit the hot queue to be small, with a max of 4096 pages.
    1594             :          * We limit the warm queue to half the cache size.
    1595             :          *
    1596             :          * We impose a minimum size of 96 to prevent too much "wobbling".
    1597             :          */
    1598           0 :         if (queue == &cache->hotqueue)
    1599           0 :                 limit = min(cache->cachepages / 20, 4096);
    1600           0 :         else if (queue == &cache->warmqueue)
    1601           0 :                 limit = (cache->cachepages / 2);
    1602             :         else
    1603           0 :                 panic("chillbufs: invalid queue");
    1604             : 
    1605           0 :         if (*queuepages > 96 && *queuepages > limit) {
    1606           0 :                 bp = TAILQ_FIRST(queue);
    1607           0 :                 if (!bp)
    1608           0 :                         panic("inconsistent bufpage counts");
    1609           0 :                 pages = atop(bp->b_bufsize);
    1610           0 :                 *queuepages -= pages;
    1611           0 :                 TAILQ_REMOVE(queue, bp, b_freelist);
    1612             :                 /* we do not clear B_WARM */
    1613           0 :                 SET(bp->b_flags, B_COLD);
    1614           0 :                 TAILQ_INSERT_TAIL(&cache->coldqueue, bp, b_freelist);
    1615           0 :                 return 1;
    1616             :         }
    1617           0 :         return 0;
    1618           0 : }
    1619             : 
    1620             : void
    1621           0 : bufcache_release(struct buf *bp)
    1622             : {
    1623             :         struct bufqueue *queue;
    1624             :         int64_t pages;
    1625           0 :         struct bufcache *cache = &cleancache[bp->cache];
    1626             : 
    1627           0 :         pages = atop(bp->b_bufsize);
    1628           0 :         KASSERT(ISSET(bp->b_flags, B_BC));
    1629           0 :         if (fliphigh) {
    1630           0 :                 if (ISSET(bp->b_flags, B_DMA) && bp->cache > 0)
    1631           0 :                         panic("B_DMA buffer release from cache %d",
    1632             :                             bp->cache);
    1633           0 :                 else if ((!ISSET(bp->b_flags, B_DMA)) && bp->cache == 0)
    1634           0 :                         panic("Non B_DMA buffer release from cache %d",
    1635             :                             bp->cache);
    1636             :         }
    1637             : 
    1638           0 :         if (!ISSET(bp->b_flags, B_DELWRI)) {
    1639             :                 int64_t *queuepages;
    1640           0 :                 if (ISSET(bp->b_flags, B_WARM | B_COLD)) {
    1641           0 :                         SET(bp->b_flags, B_WARM);
    1642           0 :                         CLR(bp->b_flags, B_COLD);
    1643           0 :                         queue = &cache->warmqueue;
    1644           0 :                         queuepages = &cache->warmbufpages;
    1645           0 :                 } else {
    1646           0 :                         queue = &cache->hotqueue;
    1647           0 :                         queuepages = &cache->hotbufpages;
    1648             :                 }
    1649           0 :                 *queuepages += pages;
    1650           0 :                 bcstats.numcleanpages += pages;
    1651           0 :                 cache->cachepages += pages;
    1652           0 :                 chillbufs(cache, queue, queuepages);
    1653           0 :         } else {
    1654             :                 queue = &dirtyqueue;
    1655           0 :                 bcstats.numdirtypages += pages;
    1656           0 :                 bcstats.delwribufs++;
    1657             :         }
    1658           0 :         TAILQ_INSERT_TAIL(queue, bp, b_freelist);
    1659           0 : }
    1660             : 
    1661             : #ifdef HIBERNATE
    1662             : /*
    1663             :  * Nuke the buffer cache from orbit when hibernating. We do not want to save
    1664             :  * any clean cache pages to swap and read them back. the original disk files
    1665             :  * are just as good.
    1666             :  */
    1667             : void
    1668           0 : hibernate_suspend_bufcache(void)
    1669             : {
    1670             :         struct buf *bp;
    1671             :         int s;
    1672             : 
    1673           0 :         s = splbio();
    1674             :         /* Chuck away all the cache pages.. discard bufs, do not promote */
    1675           0 :         while ((bp = bufcache_getcleanbuf_range(DMA_CACHE, NUM_CACHES - 1, 1))) {
    1676           0 :                 bufcache_take(bp);
    1677           0 :                 if (bp->b_vp) {
    1678           0 :                         RBT_REMOVE(buf_rb_bufs, &bp->b_vp->v_bufs_tree, bp);
    1679           0 :                         brelvp(bp);
    1680           0 :                 }
    1681           0 :                 buf_put(bp);
    1682             :         }
    1683           0 :         splx(s);
    1684           0 : }
    1685             : 
    1686             : void
    1687           0 : hibernate_resume_bufcache(void)
    1688             : {
    1689             :         /* XXX Nothing needed here for now */
    1690           0 : }
    1691             : #endif /* HIBERNATE */

Generated by: LCOV version 1.13