Line data Source code
1 : /* $OpenBSD: vfs_biomem.c,v 1.39 2018/03/29 01:43:41 mlarkin Exp $ */
2 :
3 : /*
4 : * Copyright (c) 2007 Artur Grabowski <art@openbsd.org>
5 : * Copyright (c) 2012-2016 Bob Beck <beck@openbsd.org>
6 : *
7 : * Permission to use, copy, modify, and distribute this software for any
8 : * purpose with or without fee is hereby granted, provided that the above
9 : * copyright notice and this permission notice appear in all copies.
10 : *
11 : * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 : * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 : * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 : * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 : * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 : * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 : * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 : */
19 :
20 :
21 : #include <sys/param.h>
22 : #include <sys/systm.h>
23 : #include <sys/buf.h>
24 : #include <sys/pool.h>
25 : #include <sys/proc.h> /* XXX for atomic */
26 : #include <sys/mount.h>
27 :
28 : #include <uvm/uvm_extern.h>
29 :
30 : vaddr_t buf_kva_start, buf_kva_end;
31 : int buf_needva;
32 : TAILQ_HEAD(,buf) buf_valist;
33 :
34 : extern struct bcachestats bcstats;
35 :
36 : /*
37 : * Pages are allocated from a uvm object (we only use it for page storage,
38 : * all pages are wired). Since every buffer contains a contiguous range of
39 : * pages, reusing the pages could be very painful. Fortunately voff_t is
40 : * 64 bits, so we can just increment buf_page_offset all the time and ignore
41 : * wraparound. Even if you reuse 4GB worth of buffers every second
42 : * you'll still run out of time_t faster than buffers.
43 : *
44 : */
45 : voff_t buf_page_offset;
46 : struct uvm_object *buf_object, buf_object_store;
47 :
48 : vaddr_t buf_unmap(struct buf *);
49 :
50 : void
51 0 : buf_mem_init(vsize_t size)
52 : {
53 0 : TAILQ_INIT(&buf_valist);
54 :
55 0 : buf_kva_start = vm_map_min(kernel_map);
56 0 : if (uvm_map(kernel_map, &buf_kva_start, size, NULL,
57 : UVM_UNKNOWN_OFFSET, PAGE_SIZE, UVM_MAPFLAG(PROT_NONE,
58 : PROT_NONE, MAP_INHERIT_NONE, MADV_NORMAL, 0)))
59 0 : panic("bufinit: can't reserve VM for buffers");
60 0 : buf_kva_end = buf_kva_start + size;
61 :
62 : /* Contiguous mapping */
63 0 : bcstats.kvaslots = bcstats.kvaslots_avail = size / MAXPHYS;
64 :
65 0 : buf_object = &buf_object_store;
66 :
67 0 : uvm_objinit(buf_object, NULL, 1);
68 0 : }
69 :
70 : /*
71 : * buf_acquire and buf_release manage the kvm mappings of buffers.
72 : */
73 : void
74 0 : buf_acquire(struct buf *bp)
75 : {
76 0 : KASSERT((bp->b_flags & B_BUSY) == 0);
77 0 : splassert(IPL_BIO);
78 : /*
79 : * Busy before waiting for kvm.
80 : */
81 0 : SET(bp->b_flags, B_BUSY);
82 0 : buf_map(bp);
83 0 : }
84 :
85 : /*
86 : * Acquire a buf but do not map it. Preserve any mapping it did have.
87 : */
88 : void
89 0 : buf_acquire_nomap(struct buf *bp)
90 : {
91 0 : splassert(IPL_BIO);
92 0 : SET(bp->b_flags, B_BUSY);
93 0 : if (bp->b_data != NULL) {
94 0 : TAILQ_REMOVE(&buf_valist, bp, b_valist);
95 0 : bcstats.kvaslots_avail--;
96 0 : bcstats.busymapped++;
97 0 : }
98 0 : }
99 :
100 : void
101 0 : buf_map(struct buf *bp)
102 : {
103 : vaddr_t va;
104 :
105 0 : splassert(IPL_BIO);
106 :
107 0 : if (bp->b_data == NULL) {
108 : unsigned long i;
109 :
110 : /*
111 : * First, just use the pre-allocated space until we run out.
112 : */
113 0 : if (buf_kva_start < buf_kva_end) {
114 : va = buf_kva_start;
115 0 : buf_kva_start += MAXPHYS;
116 0 : bcstats.kvaslots_avail--;
117 0 : } else {
118 : struct buf *vbp;
119 :
120 : /*
121 : * Find some buffer we can steal the space from.
122 : */
123 0 : vbp = TAILQ_FIRST(&buf_valist);
124 0 : while ((curproc != syncerproc &&
125 0 : curproc != cleanerproc &&
126 0 : bcstats.kvaslots_avail <= RESERVE_SLOTS) ||
127 0 : vbp == NULL) {
128 0 : buf_needva++;
129 0 : tsleep(&buf_needva, PRIBIO, "buf_needva", 0);
130 0 : vbp = TAILQ_FIRST(&buf_valist);
131 : }
132 0 : va = buf_unmap(vbp);
133 : }
134 :
135 0 : for (i = 0; i < atop(bp->b_bufsize); i++) {
136 0 : struct vm_page *pg = uvm_pagelookup(bp->b_pobj,
137 0 : bp->b_poffs + ptoa(i));
138 :
139 0 : KASSERT(pg != NULL);
140 :
141 0 : pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg),
142 : PROT_READ | PROT_WRITE);
143 : }
144 : pmap_update(pmap_kernel());
145 0 : bp->b_data = (caddr_t)va;
146 0 : } else {
147 0 : TAILQ_REMOVE(&buf_valist, bp, b_valist);
148 0 : bcstats.kvaslots_avail--;
149 : }
150 :
151 0 : bcstats.busymapped++;
152 0 : }
153 :
154 : void
155 0 : buf_release(struct buf *bp)
156 : {
157 :
158 0 : KASSERT(bp->b_flags & B_BUSY);
159 0 : splassert(IPL_BIO);
160 :
161 0 : if (bp->b_data) {
162 0 : bcstats.busymapped--;
163 0 : TAILQ_INSERT_TAIL(&buf_valist, bp, b_valist);
164 0 : bcstats.kvaslots_avail++;
165 0 : if (buf_needva) {
166 0 : buf_needva=0;
167 0 : wakeup(&buf_needva);
168 0 : }
169 : }
170 0 : CLR(bp->b_flags, B_BUSY);
171 0 : }
172 :
173 : /*
174 : * Deallocate all memory resources for this buffer. We need to be careful
175 : * to not drop kvm since we have no way to reclaim it. So, if the buffer
176 : * has kvm, we need to free it later. We put it on the front of the
177 : * freelist just so it gets picked up faster.
178 : *
179 : * Also, lots of assertions count on bp->b_data being NULL, so we
180 : * set it temporarily to NULL.
181 : *
182 : * Return non-zero if we take care of the freeing later.
183 : */
184 : int
185 0 : buf_dealloc_mem(struct buf *bp)
186 : {
187 : caddr_t data;
188 :
189 0 : splassert(IPL_BIO);
190 :
191 0 : data = bp->b_data;
192 0 : bp->b_data = NULL;
193 :
194 0 : if (data) {
195 0 : if (bp->b_flags & B_BUSY)
196 0 : bcstats.busymapped--;
197 0 : pmap_kremove((vaddr_t)data, bp->b_bufsize);
198 : pmap_update(pmap_kernel());
199 0 : }
200 :
201 0 : if (bp->b_pobj)
202 0 : buf_free_pages(bp);
203 :
204 0 : if (data == NULL)
205 0 : return (0);
206 :
207 0 : bp->b_data = data;
208 0 : if (!(bp->b_flags & B_BUSY)) { /* XXX - need better test */
209 0 : TAILQ_REMOVE(&buf_valist, bp, b_valist);
210 0 : bcstats.kvaslots_avail--;
211 0 : } else {
212 0 : CLR(bp->b_flags, B_BUSY);
213 0 : if (buf_needva) {
214 0 : buf_needva = 0;
215 0 : wakeup(&buf_needva);
216 0 : }
217 : }
218 0 : SET(bp->b_flags, B_RELEASED);
219 0 : TAILQ_INSERT_HEAD(&buf_valist, bp, b_valist);
220 0 : bcstats.kvaslots_avail++;
221 :
222 0 : return (1);
223 0 : }
224 :
225 : /*
226 : * Only used by bread_cluster.
227 : */
228 : void
229 0 : buf_fix_mapping(struct buf *bp, vsize_t newsize)
230 : {
231 0 : vaddr_t va = (vaddr_t)bp->b_data;
232 :
233 0 : if (newsize < bp->b_bufsize) {
234 0 : pmap_kremove(va + newsize, bp->b_bufsize - newsize);
235 : pmap_update(pmap_kernel());
236 : /*
237 : * Note: the size we lost is actually with the other
238 : * buffers read in by bread_cluster
239 : */
240 0 : bp->b_bufsize = newsize;
241 0 : }
242 0 : }
243 :
244 : vaddr_t
245 0 : buf_unmap(struct buf *bp)
246 : {
247 : vaddr_t va;
248 :
249 0 : KASSERT((bp->b_flags & B_BUSY) == 0);
250 0 : KASSERT(bp->b_data != NULL);
251 0 : splassert(IPL_BIO);
252 :
253 0 : TAILQ_REMOVE(&buf_valist, bp, b_valist);
254 0 : bcstats.kvaslots_avail--;
255 0 : va = (vaddr_t)bp->b_data;
256 0 : bp->b_data = 0;
257 0 : pmap_kremove(va, bp->b_bufsize);
258 : pmap_update(pmap_kernel());
259 :
260 0 : if (bp->b_flags & B_RELEASED)
261 0 : pool_put(&bufpool, bp);
262 :
263 0 : return (va);
264 : }
265 :
266 : /* Always allocates in dma-reachable memory */
267 : void
268 0 : buf_alloc_pages(struct buf *bp, vsize_t size)
269 : {
270 : voff_t offs;
271 : int i;
272 :
273 0 : KASSERT(size == round_page(size));
274 0 : KASSERT(bp->b_pobj == NULL);
275 0 : KASSERT(bp->b_data == NULL);
276 0 : splassert(IPL_BIO);
277 :
278 0 : offs = buf_page_offset;
279 0 : buf_page_offset += size;
280 :
281 0 : KASSERT(buf_page_offset > 0);
282 :
283 : /*
284 : * Attempt to allocate with NOWAIT. if we can't, then throw
285 : * away some clean pages and try again. Finally, if that
286 : * fails, do a WAITOK allocation so the page daemon can find
287 : * memory for us.
288 : */
289 0 : do {
290 0 : i = uvm_pagealloc_multi(buf_object, offs, size,
291 : UVM_PLA_NOWAIT);
292 0 : if (i == 0)
293 : break;
294 0 : } while (bufbackoff(&dma_constraint, 100) == 0);
295 0 : if (i != 0)
296 0 : i = uvm_pagealloc_multi(buf_object, offs, size,
297 : UVM_PLA_WAITOK);
298 : /* should not happen */
299 0 : if (i != 0)
300 0 : panic("uvm_pagealloc_multi unable to allocate an buf_object "
301 : "of size %lu", size);
302 :
303 0 : bcstats.numbufpages += atop(size);
304 0 : bcstats.dmapages += atop(size);
305 0 : SET(bp->b_flags, B_DMA);
306 0 : bp->b_pobj = buf_object;
307 0 : bp->b_poffs = offs;
308 0 : bp->b_bufsize = size;
309 0 : }
310 :
311 : void
312 0 : buf_free_pages(struct buf *bp)
313 : {
314 0 : struct uvm_object *uobj = bp->b_pobj;
315 : struct vm_page *pg;
316 : voff_t off, i;
317 :
318 0 : KASSERT(bp->b_data == NULL);
319 0 : KASSERT(uobj != NULL);
320 0 : splassert(IPL_BIO);
321 :
322 0 : off = bp->b_poffs;
323 0 : bp->b_pobj = NULL;
324 0 : bp->b_poffs = 0;
325 :
326 0 : for (i = 0; i < atop(bp->b_bufsize); i++) {
327 0 : pg = uvm_pagelookup(uobj, off + ptoa(i));
328 0 : KASSERT(pg != NULL);
329 0 : KASSERT(pg->wire_count == 1);
330 0 : pg->wire_count = 0;
331 0 : uvm_pagefree(pg);
332 0 : bcstats.numbufpages--;
333 0 : if (ISSET(bp->b_flags, B_DMA))
334 0 : bcstats.dmapages--;
335 : }
336 0 : CLR(bp->b_flags, B_DMA);
337 0 : }
338 :
339 : /* Reallocate a buf into a particular pmem range specified by "where". */
340 : int
341 0 : buf_realloc_pages(struct buf *bp, struct uvm_constraint_range *where,
342 : int flags)
343 : {
344 : vaddr_t va;
345 : int dma;
346 : int i, r;
347 0 : KASSERT(!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT));
348 :
349 0 : splassert(IPL_BIO);
350 0 : KASSERT(ISSET(bp->b_flags, B_BUSY));
351 0 : dma = ISSET(bp->b_flags, B_DMA);
352 :
353 : /* if the original buf is mapped, unmap it */
354 0 : if (bp->b_data != NULL) {
355 0 : va = (vaddr_t)bp->b_data;
356 0 : pmap_kremove(va, bp->b_bufsize);
357 : pmap_update(pmap_kernel());
358 0 : }
359 :
360 0 : do {
361 0 : r = uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs,
362 0 : bp->b_bufsize, UVM_PLA_NOWAIT, where);
363 0 : if (r == 0)
364 : break;
365 0 : } while ((bufbackoff(where, atop(bp->b_bufsize)) == 0));
366 :
367 : /*
368 : * bufbackoff() failed, so there's no more we can do without
369 : * waiting. If allowed do, make that attempt.
370 : */
371 0 : if (r != 0 && (flags & UVM_PLA_WAITOK))
372 0 : r = uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs,
373 0 : bp->b_bufsize, flags, where);
374 :
375 : /*
376 : * If the allocation has succeeded, we may be somewhere different.
377 : * If the allocation has failed, we are in the same place.
378 : *
379 : * We still have to re-map the buffer before returning.
380 : */
381 :
382 : /* take it out of dma stats until we know where we are */
383 0 : if (dma)
384 0 : bcstats.dmapages -= atop(bp->b_bufsize);
385 :
386 : dma = 1;
387 : /* if the original buf was mapped, re-map it */
388 0 : for (i = 0; i < atop(bp->b_bufsize); i++) {
389 0 : struct vm_page *pg = uvm_pagelookup(bp->b_pobj,
390 0 : bp->b_poffs + ptoa(i));
391 0 : KASSERT(pg != NULL);
392 0 : if (!PADDR_IS_DMA_REACHABLE(VM_PAGE_TO_PHYS(pg)))
393 0 : dma = 0;
394 0 : if (bp->b_data != NULL) {
395 0 : pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg),
396 : PROT_READ|PROT_WRITE);
397 : pmap_update(pmap_kernel());
398 0 : }
399 : }
400 0 : if (dma) {
401 0 : SET(bp->b_flags, B_DMA);
402 0 : bcstats.dmapages += atop(bp->b_bufsize);
403 0 : } else
404 0 : CLR(bp->b_flags, B_DMA);
405 0 : return(r);
406 : }
|