LCOV - code coverage report
Current view: top level - dev - softraid_raid5.c (source / functions) Hit Total Coverage
Test: 6.4 Lines: 0 333 0.0 %
Date: 2018-10-19 03:25:38 Functions: 0 17 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* $OpenBSD: softraid_raid5.c,v 1.27 2016/10/07 19:17:50 krw Exp $ */
       2             : /*
       3             :  * Copyright (c) 2014 Joel Sing <jsing@openbsd.org>
       4             :  * Copyright (c) 2009 Marco Peereboom <marco@peereboom.us>
       5             :  * Copyright (c) 2009 Jordan Hargrave <jordan@openbsd.org>
       6             :  *
       7             :  * Permission to use, copy, modify, and distribute this software for any
       8             :  * purpose with or without fee is hereby granted, provided that the above
       9             :  * copyright notice and this permission notice appear in all copies.
      10             :  *
      11             :  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
      12             :  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
      13             :  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
      14             :  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
      15             :  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
      16             :  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
      17             :  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
      18             :  */
      19             : 
      20             : #include "bio.h"
      21             : 
      22             : #include <sys/param.h>
      23             : #include <sys/systm.h>
      24             : #include <sys/buf.h>
      25             : #include <sys/device.h>
      26             : #include <sys/ioctl.h>
      27             : #include <sys/malloc.h>
      28             : #include <sys/kernel.h>
      29             : #include <sys/disk.h>
      30             : #include <sys/rwlock.h>
      31             : #include <sys/queue.h>
      32             : #include <sys/fcntl.h>
      33             : #include <sys/mount.h>
      34             : #include <sys/sensors.h>
      35             : #include <sys/stat.h>
      36             : #include <sys/task.h>
      37             : #include <sys/pool.h>
      38             : #include <sys/conf.h>
      39             : #include <sys/uio.h>
      40             : 
      41             : #include <scsi/scsi_all.h>
      42             : #include <scsi/scsiconf.h>
      43             : #include <scsi/scsi_disk.h>
      44             : 
      45             : #include <dev/softraidvar.h>
      46             : 
      47             : /* RAID 5 functions. */
      48             : int     sr_raid5_create(struct sr_discipline *, struct bioc_createraid *,
      49             :             int, int64_t);
      50             : int     sr_raid5_assemble(struct sr_discipline *, struct bioc_createraid *,
      51             :             int, void *);
      52             : int     sr_raid5_init(struct sr_discipline *);
      53             : int     sr_raid5_rw(struct sr_workunit *);
      54             : int     sr_raid5_openings(struct sr_discipline *);
      55             : void    sr_raid5_intr(struct buf *);
      56             : int     sr_raid5_wu_done(struct sr_workunit *);
      57             : void    sr_raid5_set_chunk_state(struct sr_discipline *, int, int);
      58             : void    sr_raid5_set_vol_state(struct sr_discipline *);
      59             : 
      60             : int     sr_raid5_addio(struct sr_workunit *wu, int, daddr_t, long,
      61             :             void *, int, int, void *);
      62             : int     sr_raid5_regenerate(struct sr_workunit *, int, daddr_t, long,
      63             :             void *);
      64             : int     sr_raid5_write(struct sr_workunit *, struct sr_workunit *, int, int,
      65             :             daddr_t, long, void *, int, int);
      66             : void    sr_raid5_xor(void *, void *, int);
      67             : 
      68             : void    sr_raid5_rebuild(struct sr_discipline *);
      69             : void    sr_raid5_scrub(struct sr_discipline *);
      70             : 
      71             : /* discipline initialisation. */
      72             : void
      73           0 : sr_raid5_discipline_init(struct sr_discipline *sd)
      74             : {
      75             :         /* Fill out discipline members. */
      76           0 :         sd->sd_type = SR_MD_RAID5;
      77           0 :         strlcpy(sd->sd_name, "RAID 5", sizeof(sd->sd_name));
      78           0 :         sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE |
      79             :             SR_CAP_REBUILD | SR_CAP_REDUNDANT;
      80           0 :         sd->sd_max_wu = SR_RAID5_NOWU + 2;   /* Two for scrub/rebuild. */
      81             : 
      82             :         /* Setup discipline specific function pointers. */
      83           0 :         sd->sd_assemble = sr_raid5_assemble;
      84           0 :         sd->sd_create = sr_raid5_create;
      85           0 :         sd->sd_openings = sr_raid5_openings;
      86           0 :         sd->sd_rebuild = sr_raid5_rebuild;
      87           0 :         sd->sd_scsi_rw = sr_raid5_rw;
      88           0 :         sd->sd_scsi_intr = sr_raid5_intr;
      89           0 :         sd->sd_scsi_wu_done = sr_raid5_wu_done;
      90           0 :         sd->sd_set_chunk_state = sr_raid5_set_chunk_state;
      91           0 :         sd->sd_set_vol_state = sr_raid5_set_vol_state;
      92           0 : }
      93             : 
      94             : int
      95           0 : sr_raid5_create(struct sr_discipline *sd, struct bioc_createraid *bc,
      96             :     int no_chunk, int64_t coerced_size)
      97             : {
      98           0 :         if (no_chunk < 3) {
      99           0 :                 sr_error(sd->sd_sc, "%s requires three or more chunks",
     100           0 :                     sd->sd_name);
     101           0 :                 return EINVAL;
     102             :         }
     103             : 
     104             :         /*
     105             :          * XXX add variable strip size later even though MAXPHYS is really
     106             :          * the clever value, users like to tinker with that type of stuff.
     107             :          */
     108           0 :         sd->sd_meta->ssdi.ssd_strip_size = MAXPHYS;
     109           0 :         sd->sd_meta->ssdi.ssd_size = (coerced_size &
     110           0 :             ~(((u_int64_t)sd->sd_meta->ssdi.ssd_strip_size >>
     111           0 :             DEV_BSHIFT) - 1)) * (no_chunk - 1);
     112             : 
     113           0 :         return sr_raid5_init(sd);
     114           0 : }
     115             : 
     116             : int
     117           0 : sr_raid5_assemble(struct sr_discipline *sd, struct bioc_createraid *bc,
     118             :     int no_chunk, void *data)
     119             : {
     120           0 :         return sr_raid5_init(sd);
     121             : }
     122             : 
     123             : int
     124           0 : sr_raid5_init(struct sr_discipline *sd)
     125             : {
     126             :         /* Initialise runtime values. */
     127           0 :         sd->mds.mdd_raid5.sr5_strip_bits =
     128           0 :             sr_validate_stripsize(sd->sd_meta->ssdi.ssd_strip_size);
     129           0 :         if (sd->mds.mdd_raid5.sr5_strip_bits == -1) {
     130           0 :                 sr_error(sd->sd_sc, "invalid strip size");
     131           0 :                 return EINVAL;
     132             :         }
     133             : 
     134           0 :         sd->sd_max_ccb_per_wu = sd->sd_meta->ssdi.ssd_chunk_no;
     135             : 
     136           0 :         return 0;
     137           0 : }
     138             : 
     139             : int
     140           0 : sr_raid5_openings(struct sr_discipline *sd)
     141             : {
     142             :         /* Two work units per I/O, two for rebuild/scrub. */
     143           0 :         return ((sd->sd_max_wu - 2) >> 1);
     144             : }
     145             : 
     146             : void
     147           0 : sr_raid5_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
     148             : {
     149             :         int                     old_state, s;
     150             : 
     151             :         DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
     152             :             DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
     153             :             sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
     154             : 
     155             :         /* ok to go to splbio since this only happens in error path */
     156           0 :         s = splbio();
     157           0 :         old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
     158             : 
     159             :         /* multiple IOs to the same chunk that fail will come through here */
     160           0 :         if (old_state == new_state)
     161             :                 goto done;
     162             : 
     163           0 :         switch (old_state) {
     164             :         case BIOC_SDONLINE:
     165           0 :                 switch (new_state) {
     166             :                 case BIOC_SDOFFLINE:
     167             :                 case BIOC_SDSCRUB:
     168             :                         break;
     169             :                 default:
     170             :                         goto die;
     171             :                 }
     172             :                 break;
     173             : 
     174             :         case BIOC_SDOFFLINE:
     175           0 :                 if (new_state == BIOC_SDREBUILD) {
     176             :                         ;
     177             :                 } else
     178             :                         goto die;
     179             :                 break;
     180             : 
     181             :         case BIOC_SDSCRUB:
     182           0 :                 switch (new_state) {
     183             :                 case BIOC_SDONLINE:
     184             :                 case BIOC_SDOFFLINE:
     185             :                         break;
     186             :                 default:
     187             :                         goto die;
     188             :                 }
     189             :                 break;
     190             : 
     191             :         case BIOC_SDREBUILD:
     192           0 :                 switch (new_state) {
     193             :                 case BIOC_SDONLINE:
     194             :                 case BIOC_SDOFFLINE:
     195             :                         break;
     196             :                 default:
     197             :                         goto die;
     198             :                 }
     199             :                 break;
     200             : 
     201             :         default:
     202             : die:
     203           0 :                 splx(s); /* XXX */
     204           0 :                 panic("%s: %s: %s: invalid chunk state transition "
     205           0 :                     "%d -> %d", DEVNAME(sd->sd_sc),
     206           0 :                     sd->sd_meta->ssd_devname,
     207           0 :                     sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
     208             :                     old_state, new_state);
     209             :                 /* NOTREACHED */
     210             :         }
     211             : 
     212           0 :         sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
     213           0 :         sd->sd_set_vol_state(sd);
     214             : 
     215           0 :         sd->sd_must_flush = 1;
     216           0 :         task_add(systq, &sd->sd_meta_save_task);
     217             : done:
     218           0 :         splx(s);
     219           0 : }
     220             : 
     221             : void
     222           0 : sr_raid5_set_vol_state(struct sr_discipline *sd)
     223             : {
     224           0 :         int                     states[SR_MAX_STATES];
     225             :         int                     new_state, i, s, nd;
     226           0 :         int                     old_state = sd->sd_vol_status;
     227             : 
     228             :         DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
     229             :             DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
     230             : 
     231           0 :         nd = sd->sd_meta->ssdi.ssd_chunk_no;
     232             : 
     233           0 :         for (i = 0; i < SR_MAX_STATES; i++)
     234           0 :                 states[i] = 0;
     235             : 
     236           0 :         for (i = 0; i < nd; i++) {
     237           0 :                 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
     238           0 :                 if (s >= SR_MAX_STATES)
     239           0 :                         panic("%s: %s: %s: invalid chunk state",
     240           0 :                             DEVNAME(sd->sd_sc),
     241           0 :                             sd->sd_meta->ssd_devname,
     242           0 :                             sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
     243           0 :                 states[s]++;
     244             :         }
     245             : 
     246           0 :         if (states[BIOC_SDONLINE] == nd)
     247           0 :                 new_state = BIOC_SVONLINE;
     248           0 :         else if (states[BIOC_SDONLINE] < nd - 1)
     249           0 :                 new_state = BIOC_SVOFFLINE;
     250           0 :         else if (states[BIOC_SDSCRUB] != 0)
     251           0 :                 new_state = BIOC_SVSCRUB;
     252           0 :         else if (states[BIOC_SDREBUILD] != 0)
     253           0 :                 new_state = BIOC_SVREBUILD;
     254           0 :         else if (states[BIOC_SDONLINE] == nd - 1)
     255             :                 new_state = BIOC_SVDEGRADED;
     256             :         else {
     257             : #ifdef SR_DEBUG
     258             :                 DNPRINTF(SR_D_STATE, "%s: invalid volume state, old state "
     259             :                     "was %d\n", DEVNAME(sd->sd_sc), old_state);
     260             :                 for (i = 0; i < nd; i++)
     261             :                         DNPRINTF(SR_D_STATE, "%s: chunk %d status = %d\n",
     262             :                             DEVNAME(sd->sd_sc), i,
     263             :                             sd->sd_vol.sv_chunks[i]->src_meta.scm_status);
     264             : #endif
     265           0 :                 panic("invalid volume state");
     266             :         }
     267             : 
     268             :         DNPRINTF(SR_D_STATE, "%s: %s: sr_raid5_set_vol_state %d -> %d\n",
     269             :             DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
     270             :             old_state, new_state);
     271             : 
     272           0 :         switch (old_state) {
     273             :         case BIOC_SVONLINE:
     274           0 :                 switch (new_state) {
     275             :                 case BIOC_SVONLINE: /* can go to same state */
     276             :                 case BIOC_SVOFFLINE:
     277             :                 case BIOC_SVDEGRADED:
     278             :                 case BIOC_SVREBUILD: /* happens on boot */
     279             :                         break;
     280             :                 default:
     281             :                         goto die;
     282             :                 }
     283             :                 break;
     284             : 
     285             :         case BIOC_SVOFFLINE:
     286             :                 /* XXX this might be a little too much */
     287             :                 goto die;
     288             : 
     289             :         case BIOC_SVDEGRADED:
     290           0 :                 switch (new_state) {
     291             :                 case BIOC_SVOFFLINE:
     292             :                 case BIOC_SVREBUILD:
     293             :                 case BIOC_SVDEGRADED: /* can go to the same state */
     294             :                         break;
     295             :                 default:
     296             :                         goto die;
     297             :                 }
     298             :                 break;
     299             : 
     300             :         case BIOC_SVBUILDING:
     301           0 :                 switch (new_state) {
     302             :                 case BIOC_SVONLINE:
     303             :                 case BIOC_SVOFFLINE:
     304             :                 case BIOC_SVBUILDING: /* can go to the same state */
     305             :                         break;
     306             :                 default:
     307             :                         goto die;
     308             :                 }
     309             :                 break;
     310             : 
     311             :         case BIOC_SVSCRUB:
     312           0 :                 switch (new_state) {
     313             :                 case BIOC_SVONLINE:
     314             :                 case BIOC_SVOFFLINE:
     315             :                 case BIOC_SVDEGRADED:
     316             :                 case BIOC_SVSCRUB: /* can go to same state */
     317             :                         break;
     318             :                 default:
     319             :                         goto die;
     320             :                 }
     321             :                 break;
     322             : 
     323             :         case BIOC_SVREBUILD:
     324           0 :                 switch (new_state) {
     325             :                 case BIOC_SVONLINE:
     326             :                 case BIOC_SVOFFLINE:
     327             :                 case BIOC_SVDEGRADED:
     328             :                 case BIOC_SVREBUILD: /* can go to the same state */
     329             :                         break;
     330             :                 default:
     331             :                         goto die;
     332             :                 }
     333             :                 break;
     334             : 
     335             :         default:
     336             : die:
     337           0 :                 panic("%s: %s: invalid volume state transition %d -> %d",
     338           0 :                     DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
     339             :                     old_state, new_state);
     340             :                 /* NOTREACHED */
     341             :         }
     342             : 
     343           0 :         sd->sd_vol_status = new_state;
     344           0 : }
     345             : 
     346             : static inline int
     347           0 : sr_raid5_chunk_online(struct sr_discipline *sd, int chunk)
     348             : {
     349           0 :         switch (sd->sd_vol.sv_chunks[chunk]->src_meta.scm_status) {
     350             :         case BIOC_SDONLINE:
     351             :         case BIOC_SDSCRUB:
     352           0 :                 return 1;
     353             :         default:
     354           0 :                 return 0;
     355             :         }
     356           0 : }
     357             : 
     358             : static inline int
     359           0 : sr_raid5_chunk_rebuild(struct sr_discipline *sd, int chunk)
     360             : {
     361           0 :         switch (sd->sd_vol.sv_chunks[chunk]->src_meta.scm_status) {
     362             :         case BIOC_SDREBUILD:
     363           0 :                 return 1;
     364             :         default:
     365           0 :                 return 0;
     366             :         }
     367           0 : }
     368             : 
     369             : int
     370           0 : sr_raid5_rw(struct sr_workunit *wu)
     371             : {
     372             :         struct sr_workunit      *wu_r = NULL;
     373           0 :         struct sr_discipline    *sd = wu->swu_dis;
     374           0 :         struct scsi_xfer        *xs = wu->swu_xs;
     375             :         struct sr_chunk         *scp;
     376           0 :         daddr_t                 blkno, lba;
     377             :         int64_t                 chunk_offs, lbaoffs, offset, strip_offs;
     378             :         int64_t                 strip_bits, strip_no, strip_size;
     379             :         int64_t                 chunk, no_chunk;
     380             :         int64_t                 parity, row_size;
     381             :         long                    length, datalen;
     382             :         void                    *data;
     383             :         int                     s;
     384             : 
     385             :         /* blkno and scsi error will be handled by sr_validate_io */
     386           0 :         if (sr_validate_io(wu, &blkno, "sr_raid5_rw"))
     387             :                 goto bad;
     388             : 
     389             :         DNPRINTF(SR_D_DIS, "%s: %s sr_raid5_rw %s: blkno %lld size %d\n",
     390             :             DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
     391             :             (xs->flags & SCSI_DATA_IN) ? "read" : "write",
     392             :             (long long)blkno, xs->datalen);
     393             : 
     394           0 :         strip_size = sd->sd_meta->ssdi.ssd_strip_size;
     395           0 :         strip_bits = sd->mds.mdd_raid5.sr5_strip_bits;
     396           0 :         no_chunk = sd->sd_meta->ssdi.ssd_chunk_no - 1;
     397           0 :         row_size = (no_chunk << strip_bits) >> DEV_BSHIFT;
     398             : 
     399           0 :         data = xs->data;
     400           0 :         datalen = xs->datalen;
     401           0 :         lbaoffs = blkno << DEV_BSHIFT;
     402             : 
     403           0 :         if (xs->flags & SCSI_DATA_OUT) {
     404           0 :                 if ((wu_r = sr_scsi_wu_get(sd, SCSI_NOSLEEP)) == NULL){
     405           0 :                         printf("%s: %s failed to get read work unit",
     406           0 :                             DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
     407           0 :                         goto bad;
     408             :                 }
     409           0 :                 wu_r->swu_state = SR_WU_INPROGRESS;
     410           0 :                 wu_r->swu_flags |= SR_WUF_DISCIPLINE;
     411           0 :         }
     412             : 
     413           0 :         wu->swu_blk_start = 0;
     414           0 :         while (datalen != 0) {
     415           0 :                 strip_no = lbaoffs >> strip_bits;
     416           0 :                 strip_offs = lbaoffs & (strip_size - 1);
     417           0 :                 chunk_offs = (strip_no / no_chunk) << strip_bits;
     418           0 :                 offset = chunk_offs + strip_offs;
     419             : 
     420             :                 /* get size remaining in this stripe */
     421           0 :                 length = MIN(strip_size - strip_offs, datalen);
     422             : 
     423             :                 /*
     424             :                  * Map disk offset to data and parity chunks, using a left
     425             :                  * asymmetric algorithm for the parity assignment.
     426             :                  */
     427           0 :                 chunk = strip_no % no_chunk;
     428           0 :                 parity = no_chunk - ((strip_no / no_chunk) % (no_chunk + 1));
     429           0 :                 if (chunk >= parity)
     430           0 :                         chunk++;
     431             : 
     432           0 :                 lba = offset >> DEV_BSHIFT;
     433             : 
     434             :                 /* XXX big hammer.. exclude I/O from entire stripe */
     435           0 :                 if (wu->swu_blk_start == 0)
     436           0 :                         wu->swu_blk_start = (strip_no / no_chunk) * row_size;
     437           0 :                 wu->swu_blk_end = (strip_no / no_chunk) * row_size +
     438           0 :                     (row_size - 1);
     439             : 
     440           0 :                 scp = sd->sd_vol.sv_chunks[chunk];
     441           0 :                 if (xs->flags & SCSI_DATA_IN) {
     442           0 :                         switch (scp->src_meta.scm_status) {
     443             :                         case BIOC_SDONLINE:
     444             :                         case BIOC_SDSCRUB:
     445             :                                 /*
     446             :                                  * Chunk is online, issue a single read
     447             :                                  * request.
     448             :                                  */
     449           0 :                                 if (sr_raid5_addio(wu, chunk, lba, length,
     450           0 :                                     data, xs->flags, 0, NULL))
     451             :                                         goto bad;
     452             :                                 break;
     453             :                         case BIOC_SDOFFLINE:
     454             :                         case BIOC_SDREBUILD:
     455             :                         case BIOC_SDHOTSPARE:
     456           0 :                                 if (sr_raid5_regenerate(wu, chunk, lba,
     457             :                                     length, data))
     458             :                                         goto bad;
     459             :                                 break;
     460             :                         default:
     461           0 :                                 printf("%s: is offline, can't read\n",
     462           0 :                                     DEVNAME(sd->sd_sc));
     463           0 :                                 goto bad;
     464             :                         }
     465             :                 } else {
     466           0 :                         if (sr_raid5_write(wu, wu_r, chunk, parity, lba,
     467             :                             length, data, xs->flags, 0))
     468             :                                 goto bad;
     469             :                 }
     470             : 
     471             :                 /* advance to next block */
     472           0 :                 lbaoffs += length;
     473           0 :                 datalen -= length;
     474           0 :                 data += length;
     475             :         }
     476             : 
     477           0 :         s = splbio();
     478           0 :         if (wu_r) {
     479           0 :                 if (wu_r->swu_io_count > 0) {
     480             :                         /* collide write request with reads */
     481           0 :                         wu_r->swu_blk_start = wu->swu_blk_start;
     482           0 :                         wu_r->swu_blk_end = wu->swu_blk_end;
     483             : 
     484           0 :                         wu->swu_state = SR_WU_DEFERRED;
     485           0 :                         wu_r->swu_collider = wu;
     486           0 :                         TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link);
     487             : 
     488             :                         wu = wu_r;
     489           0 :                 } else {
     490           0 :                         sr_scsi_wu_put(sd, wu_r);
     491             :                 }
     492             :         }
     493           0 :         splx(s);
     494             : 
     495           0 :         sr_schedule_wu(wu);
     496             : 
     497           0 :         return (0);
     498             : 
     499             : bad:
     500             :         /* wu is unwound by sr_wu_put */
     501           0 :         if (wu_r)
     502           0 :                 sr_scsi_wu_put(sd, wu_r);
     503           0 :         return (1);
     504           0 : }
     505             : 
     506             : int
     507           0 : sr_raid5_regenerate(struct sr_workunit *wu, int chunk, daddr_t blkno,
     508             :     long len, void *data)
     509             : {
     510           0 :         struct sr_discipline    *sd = wu->swu_dis;
     511             :         int                     i;
     512             : 
     513             :         /*
     514             :          * Regenerate a block on a RAID 5 volume by xoring the data and parity
     515             :          * from all of the remaining online chunks. This requires the parity
     516             :          * to already be correct.
     517             :          */
     518             : 
     519             :         DNPRINTF(SR_D_DIS, "%s: %s sr_raid5_regenerate chunk %d offline, "
     520             :             "regenerating block %llu\n",
     521             :             DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, chunk, blkno);
     522             : 
     523           0 :         memset(data, 0, len);
     524           0 :         for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
     525           0 :                 if (i == chunk)
     526             :                         continue;
     527           0 :                 if (!sr_raid5_chunk_online(sd, i))
     528             :                         goto bad;
     529           0 :                 if (sr_raid5_addio(wu, i, blkno, len, NULL, SCSI_DATA_IN,
     530             :                     0, data))
     531             :                         goto bad;
     532             :         }
     533           0 :         return (0);
     534             : 
     535             : bad:
     536           0 :         return (1);
     537           0 : }
     538             : 
     539             : int
     540           0 : sr_raid5_write(struct sr_workunit *wu, struct sr_workunit *wu_r, int chunk,
     541             :     int parity, daddr_t blkno, long len, void *data, int xsflags,
     542             :     int ccbflags)
     543             : {
     544           0 :         struct sr_discipline    *sd = wu->swu_dis;
     545           0 :         struct scsi_xfer        *xs = wu->swu_xs;
     546             :         void                    *xorbuf;
     547             :         int                     chunk_online, chunk_rebuild;
     548             :         int                     parity_online, parity_rebuild;
     549             :         int                     other_offline = 0, other_rebuild = 0;
     550             :         int                     i;
     551             : 
     552             :         /*
     553             :          * Perform a write to a RAID 5 volume. This write routine does not
     554             :          * require the parity to already be correct and will operate on a
     555             :          * uninitialised volume.
     556             :          *
     557             :          * There are four possible cases:
     558             :          *
     559             :          * 1) All data chunks and parity are online. In this case we read the
     560             :          *    data from all data chunks, except the one we are writing to, in
     561             :          *    order to calculate and write the new parity.
     562             :          *
     563             :          * 2) The parity chunk is offline. In this case we only need to write
     564             :          *    to the data chunk. No parity calculation is required.
     565             :          *
     566             :          * 3) The data chunk is offline. In this case we read the data from all
     567             :          *    online chunks in order to calculate and write the new parity.
     568             :          *    This is the same as (1) except we do not write the data chunk.
     569             :          *
     570             :          * 4) A different data chunk is offline. The new parity is calculated
     571             :          *    by taking the existing parity, xoring the original data and
     572             :          *    xoring in the new data. This requires that the parity already be
     573             :          *    correct, which it will be if any of the data chunks has
     574             :          *    previously been written.
     575             :          *
     576             :          * There is an additional complication introduced by a chunk that is
     577             :          * being rebuilt. If this is the data or parity chunk, then we want
     578             :          * to write to it as per normal. If it is another data chunk then we
     579             :          * need to presume that it has not yet been regenerated and use the
     580             :          * same method as detailed in (4) above.
     581             :          */
     582             : 
     583             :         DNPRINTF(SR_D_DIS, "%s: %s sr_raid5_write chunk %i parity %i "
     584             :             "blkno %llu\n", DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
     585             :             chunk, parity, (unsigned long long)blkno);
     586             : 
     587           0 :         chunk_online = sr_raid5_chunk_online(sd, chunk);
     588           0 :         chunk_rebuild = sr_raid5_chunk_rebuild(sd, chunk);
     589           0 :         parity_online = sr_raid5_chunk_online(sd, parity);
     590           0 :         parity_rebuild = sr_raid5_chunk_rebuild(sd, parity);
     591             : 
     592           0 :         for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
     593           0 :                 if (i == chunk || i == parity)
     594             :                         continue;
     595           0 :                 if (sr_raid5_chunk_rebuild(sd, i))
     596           0 :                         other_rebuild = 1;
     597           0 :                 else if (!sr_raid5_chunk_online(sd, i))
     598           0 :                         other_offline = 1;
     599             :         }
     600             : 
     601             :         DNPRINTF(SR_D_DIS, "%s: %s chunk online %d, parity online %d, "
     602             :             "other offline %d\n", DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
     603             :             chunk_online, parity_online, other_offline);
     604             : 
     605           0 :         if (!parity_online && !parity_rebuild)
     606             :                 goto data_write;
     607             : 
     608           0 :         xorbuf = sr_block_get(sd, len);
     609           0 :         if (xorbuf == NULL)
     610             :                 goto bad;
     611           0 :         memcpy(xorbuf, data, len);
     612             : 
     613           0 :         if (other_offline || other_rebuild) {
     614             : 
     615             :                 /*
     616             :                  * XXX - If we can guarantee that this LBA has been scrubbed
     617             :                  * then we can also take this faster path.
     618             :                  */
     619             : 
     620             :                 /* Read in existing data and existing parity. */
     621           0 :                 if (sr_raid5_addio(wu_r, chunk, blkno, len, NULL,
     622             :                     SCSI_DATA_IN, 0, xorbuf))
     623             :                         goto bad;
     624           0 :                 if (sr_raid5_addio(wu_r, parity, blkno, len, NULL,
     625             :                     SCSI_DATA_IN, 0, xorbuf))
     626             :                         goto bad;
     627             : 
     628             :         } else {
     629             : 
     630             :                 /* Read in existing data from all other chunks. */
     631           0 :                 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
     632           0 :                         if (i == chunk || i == parity)
     633             :                                 continue;
     634           0 :                         if (sr_raid5_addio(wu_r, i, blkno, len, NULL,
     635             :                             SCSI_DATA_IN, 0, xorbuf))
     636             :                                 goto bad;
     637             :                 }
     638             : 
     639             :         }
     640             : 
     641             :         /* Write new parity. */
     642           0 :         if (sr_raid5_addio(wu, parity, blkno, len, xorbuf, xs->flags,
     643             :             SR_CCBF_FREEBUF, NULL))
     644             :                 goto bad;
     645             : 
     646             : data_write:
     647             :         /* Write new data. */
     648           0 :         if (chunk_online || chunk_rebuild)
     649           0 :                 if (sr_raid5_addio(wu, chunk, blkno, len, data, xs->flags,
     650             :                     0, NULL))
     651             :                         goto bad;
     652             : 
     653           0 :         return (0);
     654             : 
     655             : bad:
     656           0 :         return (1);
     657           0 : }
     658             : 
     659             : void
     660           0 : sr_raid5_intr(struct buf *bp)
     661             : {
     662           0 :         struct sr_ccb           *ccb = (struct sr_ccb *)bp;
     663           0 :         struct sr_workunit      *wu = ccb->ccb_wu;
     664           0 :         struct sr_discipline    *sd = wu->swu_dis;
     665             :         int                     s;
     666             : 
     667             :         DNPRINTF(SR_D_INTR, "%s: sr_raid5_intr bp %p xs %p\n",
     668             :             DEVNAME(sd->sd_sc), bp, wu->swu_xs);
     669             : 
     670           0 :         s = splbio();
     671           0 :         sr_ccb_done(ccb);
     672             : 
     673             :         /* XXX - Should this be done via the taskq? */
     674             : 
     675             :         /* XOR data to result. */
     676           0 :         if (ccb->ccb_state == SR_CCB_OK && ccb->ccb_opaque)
     677           0 :                 sr_raid5_xor(ccb->ccb_opaque, ccb->ccb_buf.b_data,
     678           0 :                     ccb->ccb_buf.b_bcount);
     679             : 
     680             :         /* Free allocated data buffer. */
     681           0 :         if (ccb->ccb_flags & SR_CCBF_FREEBUF) {
     682           0 :                 sr_block_put(sd, ccb->ccb_buf.b_data, ccb->ccb_buf.b_bcount);
     683           0 :                 ccb->ccb_buf.b_data = NULL;
     684           0 :         }
     685             : 
     686           0 :         sr_wu_done(wu);
     687           0 :         splx(s);
     688           0 : }
     689             : 
     690             : int
     691           0 : sr_raid5_wu_done(struct sr_workunit *wu)
     692             : {
     693           0 :         struct sr_discipline    *sd = wu->swu_dis;
     694           0 :         struct scsi_xfer        *xs = wu->swu_xs;
     695             : 
     696             :         /* XXX - we have no way of propagating errors... */
     697           0 :         if (wu->swu_flags & (SR_WUF_DISCIPLINE | SR_WUF_REBUILD))
     698           0 :                 return SR_WU_OK;
     699             : 
     700             :         /* XXX - This is insufficient for RAID 5. */
     701           0 :         if (wu->swu_ios_succeeded > 0) {
     702           0 :                 xs->error = XS_NOERROR;
     703           0 :                 return SR_WU_OK;
     704             :         }
     705             : 
     706           0 :         if (xs->flags & SCSI_DATA_IN) {
     707           0 :                 printf("%s: retrying read on block %lld\n",
     708             :                     sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
     709           0 :                 sr_wu_release_ccbs(wu);
     710           0 :                 wu->swu_state = SR_WU_RESTART;
     711           0 :                 if (sd->sd_scsi_rw(wu) == 0)
     712           0 :                         return SR_WU_RESTART;
     713             :         } else {
     714             :                 /* XXX - retry write if we just went from online to degraded. */
     715           0 :                 printf("%s: permanently fail write on block %lld\n",
     716             :                     sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
     717             :         }
     718             : 
     719           0 :         wu->swu_state = SR_WU_FAILED;
     720           0 :         xs->error = XS_DRIVER_STUFFUP;
     721             : 
     722           0 :         return SR_WU_FAILED;
     723           0 : }
     724             : 
     725             : int
     726           0 : sr_raid5_addio(struct sr_workunit *wu, int chunk, daddr_t blkno,
     727             :     long len, void *data, int xsflags, int ccbflags, void *xorbuf)
     728             : {
     729           0 :         struct sr_discipline    *sd = wu->swu_dis;
     730             :         struct sr_ccb           *ccb;
     731             : 
     732             :         DNPRINTF(SR_D_DIS, "sr_raid5_addio: %s chunk %d block %lld "
     733             :             "length %ld %s\n", (xsflags & SCSI_DATA_IN) ? "read" : "write",
     734             :             chunk, (long long)blkno, len, xorbuf ? "X0R" : "-");
     735             : 
     736             :         /* Allocate temporary buffer. */
     737           0 :         if (data == NULL) {
     738           0 :                 data = sr_block_get(sd, len);
     739           0 :                 if (data == NULL)
     740           0 :                         return (-1);
     741           0 :                 ccbflags |= SR_CCBF_FREEBUF;
     742           0 :         }
     743             : 
     744           0 :         ccb = sr_ccb_rw(sd, chunk, blkno, len, data, xsflags, ccbflags);
     745           0 :         if (ccb == NULL) {
     746           0 :                 if (ccbflags & SR_CCBF_FREEBUF)
     747           0 :                         sr_block_put(sd, data, len);
     748           0 :                 return (-1);
     749             :         }
     750           0 :         ccb->ccb_opaque = xorbuf;
     751           0 :         sr_wu_enqueue_ccb(wu, ccb);
     752             : 
     753           0 :         return (0);
     754           0 : }
     755             : 
     756             : void
     757           0 : sr_raid5_xor(void *a, void *b, int len)
     758             : {
     759           0 :         uint32_t                *xa = a, *xb = b;
     760             : 
     761           0 :         len >>= 2;
     762           0 :         while (len--)
     763           0 :                 *xa++ ^= *xb++;
     764           0 : }
     765             : 
     766             : void
     767           0 : sr_raid5_rebuild(struct sr_discipline *sd)
     768             : {
     769             :         int64_t strip_no, strip_size, strip_bits, i, restart;
     770             :         int64_t chunk_count, chunk_strips, chunk_lba, chunk_size, row_size;
     771             :         struct sr_workunit *wu_r, *wu_w;
     772             :         int s, slept, percent = 0, old_percent = -1;
     773             :         int rebuild_chunk = -1;
     774             :         void *xorbuf;
     775             : 
     776             :         /* Find the rebuild chunk. */
     777           0 :         for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
     778           0 :                 if (sr_raid5_chunk_rebuild(sd, i)) {
     779             :                         rebuild_chunk = i;
     780           0 :                         break;
     781             :                 }
     782             :         }
     783           0 :         if (rebuild_chunk == -1)
     784             :                 goto bad;
     785             : 
     786           0 :         strip_size = sd->sd_meta->ssdi.ssd_strip_size;
     787           0 :         strip_bits = sd->mds.mdd_raid5.sr5_strip_bits;
     788           0 :         chunk_count = sd->sd_meta->ssdi.ssd_chunk_no - 1;
     789           0 :         chunk_size = sd->sd_meta->ssdi.ssd_size / chunk_count;
     790           0 :         chunk_strips = (chunk_size << DEV_BSHIFT) >> strip_bits;
     791           0 :         row_size = (chunk_count << strip_bits) >> DEV_BSHIFT;
     792             : 
     793             :         DNPRINTF(SR_D_REBUILD, "%s: %s sr_raid5_rebuild volume size = %lld, "
     794             :             "chunk count = %lld, chunk size = %lld, chunk strips = %lld, "
     795             :             "row size = %lld\n", DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
     796             :             sd->sd_meta->ssdi.ssd_size, chunk_count, chunk_size, chunk_strips,
     797             :             row_size);
     798             : 
     799           0 :         restart = sd->sd_meta->ssd_rebuild / row_size;
     800           0 :         if (restart > chunk_strips) {
     801           0 :                 printf("%s: bogus rebuild restart offset, starting from 0\n",
     802           0 :                     DEVNAME(sd->sd_sc));
     803             :                 restart = 0;
     804           0 :         }
     805           0 :         if (restart != 0) {
     806           0 :                 percent = sr_rebuild_percent(sd);
     807           0 :                 printf("%s: resuming rebuild on %s at %d%%\n",
     808           0 :                     DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, percent);
     809           0 :         }
     810             : 
     811           0 :         for (strip_no = restart; strip_no < chunk_strips; strip_no++) {
     812           0 :                 chunk_lba = (strip_size >> DEV_BSHIFT) * strip_no;
     813             : 
     814             :                 DNPRINTF(SR_D_REBUILD, "%s: %s rebuild strip %lld, "
     815             :                     "chunk lba = %lld\n", DEVNAME(sd->sd_sc),
     816             :                     sd->sd_meta->ssd_devname, strip_no, chunk_lba);
     817             : 
     818           0 :                 wu_w = sr_scsi_wu_get(sd, 0);
     819           0 :                 wu_r = sr_scsi_wu_get(sd, 0);
     820             : 
     821           0 :                 xorbuf = sr_block_get(sd, strip_size);
     822           0 :                 if (sr_raid5_regenerate(wu_r, rebuild_chunk, chunk_lba,
     823             :                     strip_size, xorbuf))
     824             :                         goto bad;
     825           0 :                 if (sr_raid5_addio(wu_w, rebuild_chunk, chunk_lba, strip_size,
     826             :                     xorbuf, SCSI_DATA_OUT, SR_CCBF_FREEBUF, NULL))
     827             :                         goto bad;
     828             : 
     829             :                 /* Collide write work unit with read work unit. */
     830           0 :                 wu_r->swu_state = SR_WU_INPROGRESS;
     831           0 :                 wu_r->swu_flags |= SR_WUF_REBUILD;
     832           0 :                 wu_w->swu_state = SR_WU_DEFERRED;
     833           0 :                 wu_w->swu_flags |= SR_WUF_REBUILD | SR_WUF_WAKEUP;
     834           0 :                 wu_r->swu_collider = wu_w;
     835             : 
     836             :                 /* Block I/O to this strip while we rebuild it. */
     837           0 :                 wu_r->swu_blk_start = (strip_no / chunk_count) * row_size;
     838           0 :                 wu_r->swu_blk_end = wu_r->swu_blk_start + row_size - 1;
     839           0 :                 wu_w->swu_blk_start = wu_r->swu_blk_start;
     840           0 :                 wu_w->swu_blk_end = wu_r->swu_blk_end;
     841             : 
     842             :                 DNPRINTF(SR_D_REBUILD, "%s: %s rebuild swu_blk_start = %lld, "
     843             :                     "swu_blk_end = %lld\n", DEVNAME(sd->sd_sc),
     844             :                     sd->sd_meta->ssd_devname,
     845             :                     wu_r->swu_blk_start, wu_r->swu_blk_end);
     846             : 
     847           0 :                 s = splbio();
     848           0 :                 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link);
     849           0 :                 splx(s);
     850             : 
     851           0 :                 sr_schedule_wu(wu_r);
     852             : 
     853             :                 slept = 0;
     854           0 :                 while ((wu_w->swu_flags & SR_WUF_REBUILDIOCOMP) == 0) {
     855           0 :                         tsleep(wu_w, PRIBIO, "sr_rebuild", 0);
     856             :                         slept = 1;
     857             :                 }
     858           0 :                 if (!slept)
     859           0 :                         tsleep(sd->sd_sc, PWAIT, "sr_yield", 1);
     860             : 
     861           0 :                 sr_scsi_wu_put(sd, wu_r);
     862           0 :                 sr_scsi_wu_put(sd, wu_w);
     863             : 
     864           0 :                 sd->sd_meta->ssd_rebuild = chunk_lba * chunk_count;
     865             : 
     866           0 :                 percent = sr_rebuild_percent(sd);
     867           0 :                 if (percent != old_percent && strip_no != chunk_strips - 1) {
     868           0 :                         if (sr_meta_save(sd, SR_META_DIRTY))
     869           0 :                                 printf("%s: could not save metadata to %s\n",
     870           0 :                                     DEVNAME(sd->sd_sc),
     871           0 :                                     sd->sd_meta->ssd_devname);
     872             :                         old_percent = percent;
     873           0 :                 }
     874             : 
     875           0 :                 if (sd->sd_reb_abort)
     876             :                         goto abort;
     877             :         }
     878             : 
     879             :         DNPRINTF(SR_D_REBUILD, "%s: %s rebuild complete\n", DEVNAME(sd->sd_sc),
     880             :             sd->sd_meta->ssd_devname);
     881             : 
     882             :         /* all done */
     883           0 :         sd->sd_meta->ssd_rebuild = 0;
     884           0 :         for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
     885           0 :                 if (sd->sd_vol.sv_chunks[i]->src_meta.scm_status ==
     886             :                     BIOC_SDREBUILD) {
     887           0 :                         sd->sd_set_chunk_state(sd, i, BIOC_SDONLINE);
     888           0 :                         break;
     889             :                 }
     890             :         }
     891             : 
     892           0 :         return;
     893             : 
     894             : abort:
     895           0 :         if (sr_meta_save(sd, SR_META_DIRTY))
     896           0 :                 printf("%s: could not save metadata to %s\n",
     897           0 :                     DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
     898             : bad:
     899           0 :         return;
     900           0 : }
     901             : 
     902             : #if 0
     903             : void
     904             : sr_raid5_scrub(struct sr_discipline *sd)
     905             : {
     906             :         int64_t strip_no, strip_size, no_chunk, parity, max_strip, strip_bits;
     907             :         int64_t i;
     908             :         struct sr_workunit *wu_r, *wu_w;
     909             :         int s, slept;
     910             :         void *xorbuf;
     911             : 
     912             :         wu_w = sr_scsi_wu_get(sd, 0);
     913             :         wu_r = sr_scsi_wu_get(sd, 0);
     914             : 
     915             :         no_chunk = sd->sd_meta->ssdi.ssd_chunk_no - 1;
     916             :         strip_size = sd->sd_meta->ssdi.ssd_strip_size;
     917             :         strip_bits = sd->mds.mdd_raid5.sr5_strip_bits;
     918             :         max_strip = sd->sd_meta->ssdi.ssd_size >> strip_bits;
     919             : 
     920             :         for (strip_no = 0; strip_no < max_strip; strip_no++) {
     921             :                 parity = no_chunk - ((strip_no / no_chunk) % (no_chunk + 1));
     922             : 
     923             :                 xorbuf = sr_block_get(sd, strip_size);
     924             :                 for (i = 0; i <= no_chunk; i++) {
     925             :                         if (i != parity)
     926             :                                 sr_raid5_addio(wu_r, i, 0xBADCAFE, strip_size,
     927             :                                     NULL, SCSI_DATA_IN, 0, xorbuf);
     928             :                 }
     929             :                 sr_raid5_addio(wu_w, parity, 0xBADCAFE, strip_size, xorbuf,
     930             :                     SCSI_DATA_OUT, SR_CCBF_FREEBUF, NULL);
     931             : 
     932             :                 wu_r->swu_flags |= SR_WUF_REBUILD;
     933             : 
     934             :                 /* Collide wu_w with wu_r */
     935             :                 wu_w->swu_state = SR_WU_DEFERRED;
     936             :                 wu_w->swu_flags |= SR_WUF_REBUILD | SR_WUF_WAKEUP;
     937             :                 wu_r->swu_collider = wu_w;
     938             : 
     939             :                 s = splbio();
     940             :                 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link);
     941             :                 splx(s);
     942             : 
     943             :                 wu_r->swu_state = SR_WU_INPROGRESS;
     944             :                 sr_schedule_wu(wu_r);
     945             : 
     946             :                 slept = 0;
     947             :                 while ((wu_w->swu_flags & SR_WUF_REBUILDIOCOMP) == 0) {
     948             :                         tsleep(wu_w, PRIBIO, "sr_scrub", 0);
     949             :                         slept = 1;
     950             :                 }
     951             :                 if (!slept)
     952             :                         tsleep(sd->sd_sc, PWAIT, "sr_yield", 1);
     953             :         }
     954             : done:
     955             :         return;
     956             : }
     957             : #endif

Generated by: LCOV version 1.13