Line data Source code
1 : /* $OpenBSD: mpath.c,v 1.41 2015/08/23 01:55:39 tedu Exp $ */
2 :
3 : /*
4 : * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
5 : *
6 : * Permission to use, copy, modify, and distribute this software for any
7 : * purpose with or without fee is hereby granted, provided that the above
8 : * copyright notice and this permission notice appear in all copies.
9 : *
10 : * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 : * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 : * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 : * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 : * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 : * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 : * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 : */
18 :
19 : #include <sys/param.h>
20 : #include <sys/systm.h>
21 : #include <sys/kernel.h>
22 : #include <sys/malloc.h>
23 : #include <sys/device.h>
24 : #include <sys/conf.h>
25 : #include <sys/queue.h>
26 : #include <sys/rwlock.h>
27 : #include <sys/ioctl.h>
28 : #include <sys/poll.h>
29 : #include <sys/selinfo.h>
30 :
31 : #include <scsi/scsi_all.h>
32 : #include <scsi/scsiconf.h>
33 : #include <scsi/mpathvar.h>
34 :
35 : #define MPATH_BUSWIDTH 256
36 :
37 : int mpath_match(struct device *, void *, void *);
38 : void mpath_attach(struct device *, struct device *, void *);
39 : void mpath_shutdown(void *);
40 :
41 : TAILQ_HEAD(mpath_paths, mpath_path);
42 :
43 : struct mpath_group {
44 : TAILQ_ENTRY(mpath_group) g_entry;
45 : struct mpath_paths g_paths;
46 : struct mpath_dev *g_dev;
47 : u_int g_id;
48 : };
49 : TAILQ_HEAD(mpath_groups, mpath_group);
50 :
51 : struct mpath_dev {
52 : struct mutex d_mtx;
53 :
54 : struct scsi_xfer_list d_xfers;
55 : struct mpath_path *d_next_path;
56 :
57 : struct mpath_groups d_groups;
58 :
59 : struct mpath_group *d_failover_iter;
60 : struct timeout d_failover_tmo;
61 : u_int d_failover;
62 :
63 : const struct mpath_ops *d_ops;
64 : struct devid *d_id;
65 : };
66 :
67 : struct mpath_softc {
68 : struct device sc_dev;
69 : struct scsi_link sc_link;
70 : struct scsibus_softc *sc_scsibus;
71 : struct mpath_dev *sc_devs[MPATH_BUSWIDTH];
72 : };
73 : #define DEVNAME(_s) ((_s)->sc_dev.dv_xname)
74 :
75 : struct mpath_softc *mpath;
76 :
77 : struct cfattach mpath_ca = {
78 : sizeof(struct mpath_softc),
79 : mpath_match,
80 : mpath_attach
81 : };
82 :
83 : struct cfdriver mpath_cd = {
84 : NULL,
85 : "mpath",
86 : DV_DULL
87 : };
88 :
89 : void mpath_cmd(struct scsi_xfer *);
90 : void mpath_minphys(struct buf *, struct scsi_link *);
91 : int mpath_probe(struct scsi_link *);
92 :
93 : struct mpath_path *mpath_next_path(struct mpath_dev *);
94 : void mpath_done(struct scsi_xfer *);
95 :
96 : void mpath_failover(struct mpath_dev *);
97 : void mpath_failover_start(void *);
98 : void mpath_failover_check(struct mpath_dev *);
99 :
100 : struct scsi_adapter mpath_switch = {
101 : mpath_cmd,
102 : scsi_minphys,
103 : mpath_probe
104 : };
105 :
106 : void mpath_xs_stuffup(struct scsi_xfer *);
107 :
108 : int
109 0 : mpath_match(struct device *parent, void *match, void *aux)
110 : {
111 0 : return (1);
112 : }
113 :
114 : void
115 0 : mpath_attach(struct device *parent, struct device *self, void *aux)
116 : {
117 0 : struct mpath_softc *sc = (struct mpath_softc *)self;
118 0 : struct scsibus_attach_args saa;
119 :
120 0 : mpath = sc;
121 :
122 0 : printf("\n");
123 :
124 0 : sc->sc_link.adapter = &mpath_switch;
125 0 : sc->sc_link.adapter_softc = sc;
126 0 : sc->sc_link.adapter_target = MPATH_BUSWIDTH;
127 0 : sc->sc_link.adapter_buswidth = MPATH_BUSWIDTH;
128 0 : sc->sc_link.luns = 1;
129 0 : sc->sc_link.openings = 1024; /* XXX magical */
130 :
131 0 : bzero(&saa, sizeof(saa));
132 0 : saa.saa_sc_link = &sc->sc_link;
133 :
134 0 : sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev,
135 : &saa, scsiprint);
136 0 : }
137 :
138 : void
139 0 : mpath_xs_stuffup(struct scsi_xfer *xs)
140 : {
141 0 : xs->error = XS_DRIVER_STUFFUP;
142 0 : scsi_done(xs);
143 0 : }
144 :
145 : int
146 0 : mpath_probe(struct scsi_link *link)
147 : {
148 0 : struct mpath_softc *sc = link->adapter_softc;
149 0 : struct mpath_dev *d = sc->sc_devs[link->target];
150 :
151 0 : if (link->lun != 0 || d == NULL)
152 0 : return (ENXIO);
153 :
154 0 : link->id = devid_copy(d->d_id);
155 :
156 0 : return (0);
157 0 : }
158 :
159 : struct mpath_path *
160 0 : mpath_next_path(struct mpath_dev *d)
161 : {
162 : struct mpath_group *g;
163 : struct mpath_path *p;
164 :
165 : #ifdef DIAGNOSTIC
166 0 : if (d == NULL)
167 0 : panic("%s: d is NULL", __func__);
168 : #endif
169 :
170 0 : p = d->d_next_path;
171 0 : if (p != NULL) {
172 0 : d->d_next_path = TAILQ_NEXT(p, p_entry);
173 0 : if (d->d_next_path == NULL &&
174 0 : (g = TAILQ_FIRST(&d->d_groups)) != NULL)
175 0 : d->d_next_path = TAILQ_FIRST(&g->g_paths);
176 : }
177 :
178 0 : return (p);
179 : }
180 :
181 : void
182 0 : mpath_cmd(struct scsi_xfer *xs)
183 : {
184 0 : struct scsi_link *link = xs->sc_link;
185 0 : struct mpath_softc *sc = link->adapter_softc;
186 0 : struct mpath_dev *d = sc->sc_devs[link->target];
187 : struct mpath_path *p;
188 : struct scsi_xfer *mxs;
189 :
190 : #ifdef DIAGNOSTIC
191 0 : if (d == NULL)
192 0 : panic("mpath_cmd issued against nonexistant device");
193 : #endif
194 :
195 0 : if (ISSET(xs->flags, SCSI_POLL)) {
196 : mtx_enter(&d->d_mtx);
197 0 : p = mpath_next_path(d);
198 0 : mtx_leave(&d->d_mtx);
199 0 : if (p == NULL) {
200 0 : mpath_xs_stuffup(xs);
201 0 : return;
202 : }
203 :
204 0 : mxs = scsi_xs_get(p->p_link, xs->flags);
205 0 : if (mxs == NULL) {
206 0 : mpath_xs_stuffup(xs);
207 0 : return;
208 : }
209 :
210 0 : memcpy(mxs->cmd, xs->cmd, xs->cmdlen);
211 0 : mxs->cmdlen = xs->cmdlen;
212 0 : mxs->data = xs->data;
213 0 : mxs->datalen = xs->datalen;
214 0 : mxs->retries = xs->retries;
215 0 : mxs->timeout = xs->timeout;
216 0 : mxs->bp = xs->bp;
217 :
218 0 : scsi_xs_sync(mxs);
219 :
220 0 : xs->error = mxs->error;
221 0 : xs->status = mxs->status;
222 0 : xs->resid = mxs->resid;
223 :
224 0 : memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense));
225 :
226 0 : scsi_xs_put(mxs);
227 0 : scsi_done(xs);
228 0 : return;
229 : }
230 :
231 : mtx_enter(&d->d_mtx);
232 0 : SIMPLEQ_INSERT_TAIL(&d->d_xfers, xs, xfer_list);
233 0 : p = mpath_next_path(d);
234 0 : mtx_leave(&d->d_mtx);
235 :
236 0 : if (p != NULL)
237 0 : scsi_xsh_add(&p->p_xsh);
238 0 : }
239 :
240 : void
241 0 : mpath_start(struct mpath_path *p, struct scsi_xfer *mxs)
242 : {
243 0 : struct mpath_dev *d = p->p_group->g_dev;
244 : struct scsi_xfer *xs;
245 : int addxsh = 0;
246 :
247 0 : if (ISSET(p->p_link->state, SDEV_S_DYING) || d == NULL)
248 : goto fail;
249 :
250 0 : mtx_enter(&d->d_mtx);
251 0 : xs = SIMPLEQ_FIRST(&d->d_xfers);
252 0 : if (xs != NULL) {
253 0 : SIMPLEQ_REMOVE_HEAD(&d->d_xfers, xfer_list);
254 0 : if (!SIMPLEQ_EMPTY(&d->d_xfers))
255 0 : addxsh = 1;
256 : }
257 0 : mtx_leave(&d->d_mtx);
258 :
259 0 : if (xs == NULL)
260 : goto fail;
261 :
262 0 : memcpy(mxs->cmd, xs->cmd, xs->cmdlen);
263 0 : mxs->cmdlen = xs->cmdlen;
264 0 : mxs->data = xs->data;
265 0 : mxs->datalen = xs->datalen;
266 0 : mxs->retries = xs->retries;
267 0 : mxs->timeout = xs->timeout;
268 0 : mxs->bp = xs->bp;
269 0 : mxs->flags = xs->flags;
270 :
271 0 : mxs->cookie = xs;
272 0 : mxs->done = mpath_done;
273 :
274 0 : scsi_xs_exec(mxs);
275 :
276 0 : if (addxsh)
277 0 : scsi_xsh_add(&p->p_xsh);
278 :
279 0 : return;
280 : fail:
281 0 : scsi_xs_put(mxs);
282 0 : }
283 :
284 : void
285 0 : mpath_done(struct scsi_xfer *mxs)
286 : {
287 0 : struct scsi_xfer *xs = mxs->cookie;
288 0 : struct scsi_link *link = xs->sc_link;
289 0 : struct mpath_softc *sc = link->adapter_softc;
290 0 : struct mpath_dev *d = sc->sc_devs[link->target];
291 : struct mpath_path *p;
292 :
293 0 : switch (mxs->error) {
294 : case XS_SELTIMEOUT: /* physical path is gone, try the next */
295 : case XS_RESET:
296 0 : mtx_enter(&d->d_mtx);
297 0 : SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list);
298 0 : p = mpath_next_path(d);
299 0 : mtx_leave(&d->d_mtx);
300 :
301 0 : scsi_xs_put(mxs);
302 :
303 0 : if (p != NULL)
304 0 : scsi_xsh_add(&p->p_xsh);
305 0 : return;
306 : case XS_SENSE:
307 0 : switch (d->d_ops->op_checksense(mxs)) {
308 : case MPATH_SENSE_FAILOVER:
309 0 : mtx_enter(&d->d_mtx);
310 0 : SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list);
311 0 : p = mpath_next_path(d);
312 0 : mtx_leave(&d->d_mtx);
313 :
314 0 : scsi_xs_put(mxs);
315 :
316 0 : mpath_failover(d);
317 0 : return;
318 : case MPATH_SENSE_DECLINED:
319 : break;
320 : #ifdef DIAGNOSTIC
321 : default:
322 0 : panic("unexpected return from checksense");
323 : #endif
324 : }
325 : break;
326 : }
327 :
328 0 : xs->error = mxs->error;
329 0 : xs->status = mxs->status;
330 0 : xs->resid = mxs->resid;
331 :
332 0 : memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense));
333 :
334 0 : scsi_xs_put(mxs);
335 :
336 0 : scsi_done(xs);
337 0 : }
338 :
339 : void
340 0 : mpath_failover(struct mpath_dev *d)
341 : {
342 0 : if (!scsi_pending_start(&d->d_mtx, &d->d_failover))
343 : return;
344 :
345 0 : mpath_failover_start(d);
346 0 : }
347 :
348 : void
349 0 : mpath_failover_start(void *xd)
350 : {
351 0 : struct mpath_dev *d = xd;
352 :
353 0 : mtx_enter(&d->d_mtx);
354 0 : d->d_failover_iter = TAILQ_FIRST(&d->d_groups);
355 0 : mtx_leave(&d->d_mtx);
356 :
357 0 : mpath_failover_check(d);
358 0 : }
359 :
360 : void
361 0 : mpath_failover_check(struct mpath_dev *d)
362 : {
363 0 : struct mpath_group *g = d->d_failover_iter;
364 : struct mpath_path *p;
365 :
366 0 : if (g == NULL)
367 0 : timeout_add_sec(&d->d_failover_tmo, 1);
368 : else {
369 0 : p = TAILQ_FIRST(&g->g_paths);
370 0 : d->d_ops->op_status(p->p_link);
371 : }
372 0 : }
373 :
374 : void
375 0 : mpath_path_status(struct mpath_path *p, int status)
376 : {
377 0 : struct mpath_group *g = p->p_group;
378 0 : struct mpath_dev *d = g->g_dev;
379 :
380 0 : mtx_enter(&d->d_mtx);
381 0 : if (status == MPATH_S_ACTIVE) {
382 0 : TAILQ_REMOVE(&d->d_groups, g, g_entry);
383 0 : TAILQ_INSERT_HEAD(&d->d_groups, g, g_entry);
384 0 : d->d_next_path = p;
385 0 : } else
386 0 : d->d_failover_iter = TAILQ_NEXT(d->d_failover_iter, g_entry);
387 0 : mtx_leave(&d->d_mtx);
388 :
389 0 : if (status == MPATH_S_ACTIVE) {
390 0 : scsi_xsh_add(&p->p_xsh);
391 0 : if (!scsi_pending_finish(&d->d_mtx, &d->d_failover))
392 0 : mpath_failover_start(d);
393 : } else
394 0 : mpath_failover_check(d);
395 0 : }
396 :
397 : void
398 0 : mpath_minphys(struct buf *bp, struct scsi_link *link)
399 : {
400 0 : struct mpath_softc *sc = link->adapter_softc;
401 0 : struct mpath_dev *d = sc->sc_devs[link->target];
402 : struct mpath_group *g;
403 : struct mpath_path *p;
404 :
405 : #ifdef DIAGNOSTIC
406 0 : if (d == NULL)
407 0 : panic("mpath_minphys against nonexistant device");
408 : #endif
409 :
410 0 : mtx_enter(&d->d_mtx);
411 0 : TAILQ_FOREACH(g, &d->d_groups, g_entry) {
412 0 : TAILQ_FOREACH(p, &g->g_paths, p_entry) {
413 : /* XXX crossing layers with mutex held */
414 0 : p->p_link->adapter->scsi_minphys(bp, p->p_link);
415 : }
416 : }
417 0 : mtx_leave(&d->d_mtx);
418 0 : }
419 :
420 : int
421 0 : mpath_path_probe(struct scsi_link *link)
422 : {
423 0 : if (mpath == NULL)
424 0 : return (ENXIO);
425 :
426 0 : if (link->id == NULL)
427 0 : return (EINVAL);
428 :
429 0 : if (ISSET(link->flags, SDEV_UMASS))
430 0 : return (EINVAL);
431 :
432 0 : if (mpath == link->adapter_softc)
433 0 : return (ENXIO);
434 :
435 0 : return (0);
436 0 : }
437 :
438 : int
439 0 : mpath_path_attach(struct mpath_path *p, u_int g_id, const struct mpath_ops *ops)
440 : {
441 0 : struct mpath_softc *sc = mpath;
442 0 : struct scsi_link *link = p->p_link;
443 : struct mpath_dev *d = NULL;
444 : struct mpath_group *g;
445 : int newdev = 0, addxsh = 0;
446 : int target;
447 :
448 : #ifdef DIAGNOSTIC
449 0 : if (p->p_link == NULL)
450 0 : panic("mpath_path_attach: NULL link");
451 0 : if (p->p_group != NULL)
452 0 : panic("mpath_path_attach: group is not NULL");
453 : #endif
454 :
455 0 : for (target = 0; target < MPATH_BUSWIDTH; target++) {
456 0 : if ((d = sc->sc_devs[target]) == NULL)
457 : continue;
458 :
459 0 : if (DEVID_CMP(d->d_id, link->id) && d->d_ops == ops)
460 : break;
461 :
462 : d = NULL;
463 0 : }
464 :
465 0 : if (d == NULL) {
466 0 : for (target = 0; target < MPATH_BUSWIDTH; target++) {
467 0 : if (sc->sc_devs[target] == NULL)
468 : break;
469 : }
470 0 : if (target >= MPATH_BUSWIDTH)
471 0 : return (ENXIO);
472 :
473 0 : d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_CANFAIL | M_ZERO);
474 0 : if (d == NULL)
475 0 : return (ENOMEM);
476 :
477 0 : mtx_init(&d->d_mtx, IPL_BIO);
478 0 : TAILQ_INIT(&d->d_groups);
479 0 : SIMPLEQ_INIT(&d->d_xfers);
480 0 : d->d_id = devid_copy(link->id);
481 0 : d->d_ops = ops;
482 :
483 0 : timeout_set(&d->d_failover_tmo, mpath_failover_start, d);
484 :
485 0 : sc->sc_devs[target] = d;
486 : newdev = 1;
487 0 : } else {
488 : /*
489 : * instead of carrying identical values in different devid
490 : * instances, delete the new one and reference the old one in
491 : * the new scsi_link.
492 : */
493 0 : devid_free(link->id);
494 0 : link->id = devid_copy(d->d_id);
495 : }
496 :
497 0 : TAILQ_FOREACH(g, &d->d_groups, g_entry) {
498 0 : if (g->g_id == g_id)
499 : break;
500 : }
501 :
502 0 : if (g == NULL) {
503 0 : g = malloc(sizeof(*g), M_DEVBUF,
504 : M_WAITOK | M_CANFAIL | M_ZERO);
505 0 : if (g == NULL) {
506 0 : if (newdev) {
507 0 : free(d, M_DEVBUF, sizeof(*d));
508 0 : sc->sc_devs[target] = NULL;
509 0 : }
510 :
511 0 : return (ENOMEM);
512 : }
513 :
514 0 : TAILQ_INIT(&g->g_paths);
515 0 : g->g_dev = d;
516 0 : g->g_id = g_id;
517 :
518 0 : mtx_enter(&d->d_mtx);
519 0 : TAILQ_INSERT_TAIL(&d->d_groups, g, g_entry);
520 0 : mtx_leave(&d->d_mtx);
521 0 : }
522 :
523 0 : p->p_group = g;
524 :
525 0 : mtx_enter(&d->d_mtx);
526 0 : TAILQ_INSERT_TAIL(&g->g_paths, p, p_entry);
527 0 : if (!SIMPLEQ_EMPTY(&d->d_xfers))
528 0 : addxsh = 1;
529 :
530 0 : if (d->d_next_path == NULL)
531 0 : d->d_next_path = p;
532 0 : mtx_leave(&d->d_mtx);
533 :
534 0 : if (newdev)
535 0 : scsi_probe_target(mpath->sc_scsibus, target);
536 0 : else if (addxsh)
537 0 : scsi_xsh_add(&p->p_xsh);
538 :
539 0 : return (0);
540 0 : }
541 :
542 : int
543 0 : mpath_path_detach(struct mpath_path *p)
544 : {
545 0 : struct mpath_group *g = p->p_group;
546 : struct mpath_dev *d;
547 : struct mpath_path *np = NULL;
548 :
549 : #ifdef DIAGNOSTIC
550 0 : if (g == NULL)
551 0 : panic("mpath: detaching a path from a nonexistant bus");
552 : #endif
553 0 : d = g->g_dev;
554 0 : p->p_group = NULL;
555 :
556 0 : mtx_enter(&d->d_mtx);
557 0 : TAILQ_REMOVE(&g->g_paths, p, p_entry);
558 0 : if (d->d_next_path == p)
559 0 : d->d_next_path = TAILQ_FIRST(&g->g_paths);
560 :
561 0 : if (TAILQ_EMPTY(&g->g_paths))
562 0 : TAILQ_REMOVE(&d->d_groups, g, g_entry);
563 : else
564 : g = NULL;
565 :
566 0 : if (!SIMPLEQ_EMPTY(&d->d_xfers))
567 0 : np = d->d_next_path;
568 0 : mtx_leave(&d->d_mtx);
569 :
570 0 : if (g != NULL)
571 0 : free(g, M_DEVBUF, sizeof(*g));
572 :
573 0 : scsi_xsh_del(&p->p_xsh);
574 :
575 0 : if (np == NULL)
576 0 : mpath_failover(d);
577 : else
578 0 : scsi_xsh_add(&np->p_xsh);
579 :
580 0 : return (0);
581 : }
582 :
583 : struct device *
584 0 : mpath_bootdv(struct device *dev)
585 : {
586 0 : struct mpath_softc *sc = mpath;
587 : struct mpath_dev *d;
588 : struct mpath_group *g;
589 : struct mpath_path *p;
590 : int target;
591 :
592 0 : if (sc == NULL)
593 0 : return (dev);
594 :
595 0 : for (target = 0; target < MPATH_BUSWIDTH; target++) {
596 0 : if ((d = sc->sc_devs[target]) == NULL)
597 : continue;
598 :
599 0 : TAILQ_FOREACH(g, &d->d_groups, g_entry) {
600 0 : TAILQ_FOREACH(p, &g->g_paths, p_entry) {
601 0 : if (p->p_link->device_softc == dev) {
602 0 : return (scsi_get_link(mpath->sc_scsibus,
603 0 : target, 0)->device_softc);
604 : }
605 : }
606 : }
607 : }
608 :
609 0 : return (dev);
610 0 : }
|