Line data Source code
1 : /* $OpenBSD: kern_bufq.c,v 1.32 2017/08/16 17:52:17 mikeb Exp $ */
2 : /*
3 : * Copyright (c) 2010 Thordur I. Bjornsson <thib@openbsd.org>
4 : * Copyright (c) 2010 David Gwynne <dlg@openbsd.org>
5 : *
6 : * Permission to use, copy, modify, and distribute this software for any
7 : * purpose with or without fee is hereby granted, provided that the above
8 : * copyright notice and this permission notice appear in all copies.
9 : *
10 : * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 : * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 : * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 : * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 : * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 : * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 : * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 : */
18 :
19 : #include <sys/param.h>
20 : #include <sys/systm.h>
21 : #include <sys/kernel.h>
22 : #include <sys/malloc.h>
23 : #include <sys/mount.h>
24 : #include <sys/mutex.h>
25 : #include <sys/buf.h>
26 : #include <sys/errno.h>
27 : #include <sys/queue.h>
28 :
29 : SLIST_HEAD(, bufq) bufqs = SLIST_HEAD_INITIALIZER(bufqs);
30 : struct mutex bufqs_mtx = MUTEX_INITIALIZER(IPL_NONE);
31 : int bufqs_stop;
32 :
33 : struct bufq_impl {
34 : void *(*impl_create)(void);
35 : void (*impl_destroy)(void *);
36 :
37 : void (*impl_queue)(void *, struct buf *);
38 : struct buf *(*impl_dequeue)(void *);
39 : void (*impl_requeue)(void *, struct buf *);
40 : int (*impl_peek)(void *);
41 : };
42 :
43 : void *bufq_fifo_create(void);
44 : void bufq_fifo_destroy(void *);
45 : void bufq_fifo_queue(void *, struct buf *);
46 : struct buf *bufq_fifo_dequeue(void *);
47 : void bufq_fifo_requeue(void *, struct buf *);
48 : int bufq_fifo_peek(void *);
49 :
50 : void *bufq_nscan_create(void);
51 : void bufq_nscan_destroy(void *);
52 : void bufq_nscan_queue(void *, struct buf *);
53 : struct buf *bufq_nscan_dequeue(void *);
54 : void bufq_nscan_requeue(void *, struct buf *);
55 : int bufq_nscan_peek(void *);
56 :
57 : const struct bufq_impl bufq_impls[BUFQ_HOWMANY] = {
58 : {
59 : bufq_fifo_create,
60 : bufq_fifo_destroy,
61 : bufq_fifo_queue,
62 : bufq_fifo_dequeue,
63 : bufq_fifo_requeue,
64 : bufq_fifo_peek
65 : },
66 : {
67 : bufq_nscan_create,
68 : bufq_nscan_destroy,
69 : bufq_nscan_queue,
70 : bufq_nscan_dequeue,
71 : bufq_nscan_requeue,
72 : bufq_nscan_peek
73 : }
74 : };
75 :
76 : int
77 0 : bufq_init(struct bufq *bq, int type)
78 : {
79 : u_int hi = BUFQ_HI, low = BUFQ_LOW;
80 :
81 0 : if (type >= BUFQ_HOWMANY)
82 0 : panic("bufq_init: type %i unknown", type);
83 :
84 : /*
85 : * Ensure that writes can't consume the entire amount of kva
86 : * available the buffer cache if we only have a limited amount
87 : * of kva available to us.
88 : */
89 0 : if (hi >= (bcstats.kvaslots / 16)) {
90 0 : hi = bcstats.kvaslots / 16;
91 0 : if (hi < 2)
92 : hi = 2;
93 0 : low = hi / 2;
94 0 : }
95 :
96 0 : mtx_init(&bq->bufq_mtx, IPL_BIO);
97 0 : bq->bufq_hi = hi;
98 0 : bq->bufq_low = low;
99 0 : bq->bufq_type = type;
100 0 : bq->bufq_impl = &bufq_impls[type];
101 0 : bq->bufq_data = bq->bufq_impl->impl_create();
102 0 : if (bq->bufq_data == NULL) {
103 : /*
104 : * we should actually return failure so disks attaching after
105 : * boot in low memory situations dont panic the system.
106 : */
107 0 : panic("bufq init fail");
108 : }
109 :
110 0 : mtx_enter(&bufqs_mtx);
111 0 : while (bufqs_stop) {
112 0 : msleep(&bufqs_stop, &bufqs_mtx, PRIBIO, "bqinit", 0);
113 : }
114 0 : SLIST_INSERT_HEAD(&bufqs, bq, bufq_entries);
115 0 : mtx_leave(&bufqs_mtx);
116 :
117 0 : return (0);
118 : }
119 :
120 : int
121 0 : bufq_switch(struct bufq *bq, int type)
122 : {
123 : void *data;
124 : void *odata;
125 : int otype;
126 : struct buf *bp;
127 : int ret;
128 :
129 0 : mtx_enter(&bq->bufq_mtx);
130 0 : ret = (bq->bufq_type == type);
131 0 : mtx_leave(&bq->bufq_mtx);
132 0 : if (ret)
133 0 : return (0);
134 :
135 0 : data = bufq_impls[type].impl_create();
136 0 : if (data == NULL)
137 0 : return (ENOMEM);
138 :
139 0 : mtx_enter(&bq->bufq_mtx);
140 0 : if (bq->bufq_type != type) { /* might have changed during create */
141 0 : odata = bq->bufq_data;
142 : otype = bq->bufq_type;
143 :
144 0 : while ((bp = bufq_impls[otype].impl_dequeue(odata)) != NULL)
145 0 : bufq_impls[type].impl_queue(data, bp);
146 :
147 0 : bq->bufq_data = data;
148 0 : bq->bufq_type = type;
149 0 : bq->bufq_impl = &bufq_impls[type];
150 0 : } else {
151 : otype = type;
152 : odata = data;
153 : }
154 0 : mtx_leave(&bq->bufq_mtx);
155 :
156 0 : bufq_impls[otype].impl_destroy(odata);
157 :
158 0 : return (0);
159 0 : }
160 :
161 : void
162 0 : bufq_destroy(struct bufq *bq)
163 : {
164 0 : bufq_drain(bq);
165 :
166 0 : bq->bufq_impl->impl_destroy(bq->bufq_data);
167 0 : bq->bufq_data = NULL;
168 :
169 0 : mtx_enter(&bufqs_mtx);
170 0 : while (bufqs_stop) {
171 0 : msleep(&bufqs_stop, &bufqs_mtx, PRIBIO, "bqdest", 0);
172 : }
173 0 : SLIST_REMOVE(&bufqs, bq, bufq, bufq_entries);
174 0 : mtx_leave(&bufqs_mtx);
175 0 : }
176 :
177 :
178 : void
179 0 : bufq_queue(struct bufq *bq, struct buf *bp)
180 : {
181 0 : mtx_enter(&bq->bufq_mtx);
182 0 : while (bq->bufq_stop) {
183 0 : msleep(&bq->bufq_stop, &bq->bufq_mtx, PRIBIO, "bqqueue", 0);
184 : }
185 :
186 0 : bp->b_bq = bq;
187 0 : bq->bufq_outstanding++;
188 0 : bq->bufq_impl->impl_queue(bq->bufq_data, bp);
189 0 : mtx_leave(&bq->bufq_mtx);
190 0 : }
191 :
192 : struct buf *
193 0 : bufq_dequeue(struct bufq *bq)
194 : {
195 : struct buf *bp;
196 :
197 0 : mtx_enter(&bq->bufq_mtx);
198 0 : bp = bq->bufq_impl->impl_dequeue(bq->bufq_data);
199 0 : mtx_leave(&bq->bufq_mtx);
200 :
201 0 : return (bp);
202 : }
203 :
204 : void
205 0 : bufq_requeue(struct bufq *bq, struct buf *bp)
206 : {
207 0 : mtx_enter(&bq->bufq_mtx);
208 0 : bq->bufq_impl->impl_requeue(bq->bufq_data, bp);
209 0 : mtx_leave(&bq->bufq_mtx);
210 0 : }
211 :
212 : int
213 0 : bufq_peek(struct bufq *bq)
214 : {
215 : int rv;
216 :
217 0 : mtx_enter(&bq->bufq_mtx);
218 0 : rv = bq->bufq_impl->impl_peek(bq->bufq_data);
219 0 : mtx_leave(&bq->bufq_mtx);
220 :
221 0 : return (rv);
222 : }
223 :
224 : void
225 0 : bufq_drain(struct bufq *bq)
226 : {
227 : struct buf *bp;
228 : int s;
229 :
230 0 : while ((bp = bufq_dequeue(bq)) != NULL) {
231 0 : bp->b_error = ENXIO;
232 0 : bp->b_flags |= B_ERROR;
233 0 : s = splbio();
234 0 : biodone(bp);
235 0 : splx(s);
236 : }
237 0 : }
238 :
239 : void
240 0 : bufq_wait(struct bufq *bq)
241 : {
242 0 : if (bq->bufq_hi) {
243 0 : assertwaitok();
244 0 : mtx_enter(&bq->bufq_mtx);
245 0 : while (bq->bufq_outstanding >= bq->bufq_hi) {
246 0 : bq->bufq_waiting++;
247 0 : msleep(&bq->bufq_waiting, &bq->bufq_mtx,
248 : PRIBIO, "bqwait", 0);
249 0 : bq->bufq_waiting--;
250 : }
251 0 : mtx_leave(&bq->bufq_mtx);
252 0 : }
253 0 : }
254 :
255 : void
256 0 : bufq_done(struct bufq *bq, struct buf *bp)
257 : {
258 0 : mtx_enter(&bq->bufq_mtx);
259 0 : KASSERT(bq->bufq_outstanding > 0);
260 0 : bq->bufq_outstanding--;
261 0 : if (bq->bufq_stop && bq->bufq_outstanding == 0)
262 0 : wakeup(&bq->bufq_outstanding);
263 0 : if (bq->bufq_waiting && bq->bufq_outstanding < bq->bufq_low)
264 0 : wakeup(&bq->bufq_waiting);
265 0 : mtx_leave(&bq->bufq_mtx);
266 0 : bp->b_bq = NULL;
267 0 : }
268 :
269 : void
270 0 : bufq_quiesce(void)
271 : {
272 : struct bufq *bq;
273 :
274 0 : mtx_enter(&bufqs_mtx);
275 0 : bufqs_stop = 1;
276 0 : mtx_leave(&bufqs_mtx);
277 : /*
278 : * We can safely walk the list since it can't be modified as
279 : * long as bufqs_stop is non-zero.
280 : */
281 0 : SLIST_FOREACH(bq, &bufqs, bufq_entries) {
282 0 : mtx_enter(&bq->bufq_mtx);
283 0 : bq->bufq_stop = 1;
284 0 : while (bq->bufq_outstanding) {
285 0 : msleep(&bq->bufq_outstanding, &bq->bufq_mtx,
286 : PRIBIO, "bqquies", 0);
287 : }
288 0 : mtx_leave(&bq->bufq_mtx);
289 : }
290 0 : }
291 :
292 : void
293 0 : bufq_restart(void)
294 : {
295 : struct bufq *bq;
296 :
297 0 : mtx_enter(&bufqs_mtx);
298 0 : SLIST_FOREACH(bq, &bufqs, bufq_entries) {
299 0 : mtx_enter(&bq->bufq_mtx);
300 0 : bq->bufq_stop = 0;
301 0 : wakeup(&bq->bufq_stop);
302 0 : mtx_leave(&bq->bufq_mtx);
303 : }
304 0 : bufqs_stop = 0;
305 0 : wakeup(&bufqs_stop);
306 0 : mtx_leave(&bufqs_mtx);
307 0 : }
308 :
309 :
310 : /*
311 : * fifo implementation
312 : */
313 :
314 : void *
315 0 : bufq_fifo_create(void)
316 : {
317 : struct bufq_fifo_head *head;
318 :
319 0 : head = malloc(sizeof(*head), M_DEVBUF, M_NOWAIT | M_ZERO);
320 0 : if (head == NULL)
321 0 : return (NULL);
322 :
323 0 : SIMPLEQ_INIT(head);
324 :
325 0 : return (head);
326 0 : }
327 :
328 : void
329 0 : bufq_fifo_destroy(void *data)
330 : {
331 0 : struct bufq_fifo_head *head = data;
332 :
333 0 : free(head, M_DEVBUF, sizeof(*head));
334 0 : }
335 :
336 : void
337 0 : bufq_fifo_queue(void *data, struct buf *bp)
338 : {
339 0 : struct bufq_fifo_head *head = data;
340 :
341 0 : SIMPLEQ_INSERT_TAIL(head, bp, b_bufq.bufq_data_fifo.bqf_entries);
342 0 : }
343 :
344 : struct buf *
345 0 : bufq_fifo_dequeue(void *data)
346 : {
347 0 : struct bufq_fifo_head *head = data;
348 : struct buf *bp;
349 :
350 0 : bp = SIMPLEQ_FIRST(head);
351 0 : if (bp != NULL)
352 0 : SIMPLEQ_REMOVE_HEAD(head, b_bufq.bufq_data_fifo.bqf_entries);
353 :
354 0 : return (bp);
355 : }
356 :
357 : void
358 0 : bufq_fifo_requeue(void *data, struct buf *bp)
359 : {
360 0 : struct bufq_fifo_head *head = data;
361 :
362 0 : SIMPLEQ_INSERT_HEAD(head, bp, b_bufq.bufq_data_fifo.bqf_entries);
363 0 : }
364 :
365 : int
366 0 : bufq_fifo_peek(void *data)
367 : {
368 0 : struct bufq_fifo_head *head = data;
369 :
370 0 : return (SIMPLEQ_FIRST(head) != NULL);
371 : }
372 :
373 : /*
374 : * nscan implementation
375 : */
376 :
377 : #define BUF_INORDER(ba, bb) ((ba)->b_blkno < (bb)->b_blkno)
378 :
379 : #define dsentries b_bufq.bufq_data_nscan.bqf_entries
380 :
381 : struct bufq_nscan_data {
382 : struct bufq_nscan_head sorted;
383 : struct bufq_nscan_head fifo;
384 : int leftoverroom; /* Remaining number of buffer inserts allowed */
385 : };
386 :
387 : void bufq_nscan_resort(struct bufq_nscan_data *data);
388 : void bufq_simple_nscan(struct bufq_nscan_head *, struct buf *);
389 :
390 : void
391 0 : bufq_simple_nscan(struct bufq_nscan_head *head, struct buf *bp)
392 : {
393 : struct buf *cur, *prev;
394 :
395 : prev = NULL;
396 : /*
397 : * We look for the first slot where we would fit, then insert
398 : * after the element we just passed.
399 : */
400 0 : SIMPLEQ_FOREACH(cur, head, dsentries) {
401 0 : if (BUF_INORDER(bp, cur))
402 : break;
403 : prev = cur;
404 : }
405 0 : if (prev)
406 0 : SIMPLEQ_INSERT_AFTER(head, prev, bp, dsentries);
407 : else
408 0 : SIMPLEQ_INSERT_HEAD(head, bp, dsentries);
409 :
410 0 : }
411 :
412 : /*
413 : * Take N elements from the fifo queue and sort them
414 : */
415 : void
416 0 : bufq_nscan_resort(struct bufq_nscan_data *data)
417 : {
418 0 : struct bufq_nscan_head *fifo = &data->fifo;
419 0 : struct bufq_nscan_head *sorted = &data->sorted;
420 : int count, segmentsize = BUFQ_NSCAN_N;
421 : struct buf *bp;
422 :
423 0 : for (count = 0; count < segmentsize; count++) {
424 0 : bp = SIMPLEQ_FIRST(fifo);
425 0 : if (!bp)
426 : break;
427 0 : SIMPLEQ_REMOVE_HEAD(fifo, dsentries);
428 0 : bufq_simple_nscan(sorted, bp);
429 : }
430 0 : data->leftoverroom = segmentsize - count;
431 0 : }
432 :
433 : void *
434 0 : bufq_nscan_create(void)
435 : {
436 : struct bufq_nscan_data *data;
437 :
438 0 : data = malloc(sizeof(*data), M_DEVBUF, M_NOWAIT | M_ZERO);
439 0 : if (!data)
440 0 : return NULL;
441 0 : SIMPLEQ_INIT(&data->sorted);
442 0 : SIMPLEQ_INIT(&data->fifo);
443 :
444 0 : return data;
445 0 : }
446 :
447 : void
448 0 : bufq_nscan_destroy(void *vdata)
449 : {
450 0 : struct bufq_nscan_data *data = vdata;
451 :
452 0 : free(data, M_DEVBUF, sizeof(*data));
453 0 : }
454 :
455 : void
456 0 : bufq_nscan_queue(void *vdata, struct buf *bp)
457 : {
458 0 : struct bufq_nscan_data *data = vdata;
459 :
460 : /*
461 : * If the previous sorted segment was small, we will continue
462 : * packing in bufs as long as they're in order.
463 : */
464 0 : if (data->leftoverroom) {
465 0 : struct buf *next = SIMPLEQ_FIRST(&data->sorted);
466 0 : if (next && BUF_INORDER(next, bp)) {
467 0 : bufq_simple_nscan(&data->sorted, bp);
468 0 : data->leftoverroom--;
469 0 : return;
470 : }
471 0 : }
472 :
473 0 : SIMPLEQ_INSERT_TAIL(&data->fifo, bp, dsentries);
474 :
475 0 : }
476 :
477 : struct buf *
478 0 : bufq_nscan_dequeue(void *vdata)
479 : {
480 0 : struct bufq_nscan_data *data = vdata;
481 0 : struct bufq_nscan_head *sorted = &data->sorted;
482 : struct buf *bp;
483 :
484 0 : if (SIMPLEQ_FIRST(sorted) == NULL)
485 0 : bufq_nscan_resort(data);
486 :
487 0 : bp = SIMPLEQ_FIRST(sorted);
488 0 : if (bp != NULL)
489 0 : SIMPLEQ_REMOVE_HEAD(sorted, dsentries);
490 :
491 0 : return (bp);
492 : }
493 :
494 : void
495 0 : bufq_nscan_requeue(void *vdata, struct buf *bp)
496 : {
497 0 : struct bufq_nscan_data *data = vdata;
498 :
499 0 : SIMPLEQ_INSERT_HEAD(&data->fifo, bp, dsentries);
500 0 : }
501 :
502 : int
503 0 : bufq_nscan_peek(void *vdata)
504 : {
505 0 : struct bufq_nscan_data *data = vdata;
506 :
507 0 : return (SIMPLEQ_FIRST(&data->sorted) != NULL) ||
508 0 : (SIMPLEQ_FIRST(&data->fifo) != NULL);
509 : }
|