Hello RaidFrame lacks the ability to remove failed components and hot spares, which require a reboot to replace a dead disk. I have been working with Greg Oster to address that. I now have code that passes basic tests, it needs review. Please find it attached. What's new? - Removing an unused or failed compoenent wotks using raidctl -r dev raid0 - Failing a used hot spare works by failing the component. i.e. if wd1 is spared by wd2, raiidctl -f wd1 raid0 will fail wd2. It looks odds, but it matches the behavior of other operations. - failing a reconstructing componennt is now supported. Some comments on the patch: - I renamed ioclt RAIDFRAME_REMOVE_HOT_SPARE to RAIDFRAME_REMOVE_COMPONENT to avoid modifying raidctl, raidctl -r already uses that ioctl to remove hot spares, but it was unimplemented in kernel. Now the flag means remove hot spare or component, and older raidctl works (and builds) with updated kernel. - If you have two spares and remove the first one, raidctl -s shows an absent_spare0 where the first spare was. We had some discussion with Greg whether this absent spare should be hidden or not. Hiding cause userland and kernel ideas of column number to mismatch for the remaining spare, I consider the complexity of addressing that not worth it, hence the absent_spare0 displayed. - numSpare is the count of available hot spares, as before maxSpare it the count of available hot spares and absent_spare slots maxQueue is like maxSpare for disk queues, except we never decrease it: if the last spare is removed, maxSpare is decreased and maxQueue is unchanged, since we do not free the queue for the removed spare. RaidFrame is designed to free all allocated stuff at unconfigure time. I also include a sample script that tests the new features. Your device names may vary. Feedback welcome! -- Emmanuel Dreyfus manu%netbsd.org@localhost
? sbin/raidctl/.depend ? sbin/raidctl/.gdbinit ? sbin/raidctl/raidctl ? sbin/raidctl/raidctl.d ? sbin/raidctl/raidctl.html8 ? sbin/raidctl/raidctl_hostops.d ? sbin/raidctl/raidctl_rumpops.d ? sbin/raidctl/rf_configure.d ? sbin/raidctl/rump.raidctl Index: sbin/raidctl/raidctl.8 =================================================================== RCS file: /cvsroot/src/sbin/raidctl/raidctl.8,v retrieving revision 1.79 diff -U4 -r1.79 raidctl.8 --- sbin/raidctl/raidctl.8 14 Jun 2022 08:06:18 -0000 1.79 +++ sbin/raidctl/raidctl.8 7 Sep 2023 06:23:17 -0000 @@ -321,11 +321,14 @@ .Ar component . This is useful for reconstructing back onto a component after it has been replaced following a failure. .It Fl r Ar component Ar dev -Remove the spare disk specified by +Remove the specified .Ar component -from the set of available spare components. +of the device. If +.Ar compoennt +is a hot spare, it must be in failed or spare state. Otherwise, it must +be in failed or spared state. .It Fl S Ar dev Check the status of parity re-writing, component reconstruction, and component copyback. The output indicates the amount of progress Index: sbin/raidctl/raidctl.c =================================================================== RCS file: /cvsroot/src/sbin/raidctl/raidctl.c,v retrieving revision 1.78 diff -U4 -r1.78 raidctl.c --- sbin/raidctl/raidctl.c 14 Jun 2022 08:06:18 -0000 1.78 +++ sbin/raidctl/raidctl.c 7 Sep 2023 06:23:17 -0000 @@ -63,8 +63,12 @@ #include <dev/raidframe/raidframeio.h> #include "rf_configure.h" #include "prog_ops.h" +#ifndef RAIDFRAME_REMOVE_COMPONENT +#define RAIDFRAME_REMOVE_COMPONENT RAIDFRAME_REMOVE_HOT_SPARE +#endif + #define CONFIGURE_TEST 1 /* must be different from any raidframe ioctl */ void do_ioctl(int, u_long, void *, const char *); static void rf_configure(int, char*, int); @@ -78,9 +82,9 @@ static void set_component_label(int, char *); static void init_component_labels(int, int); static void set_autoconfig(int, int, char *); static void add_hot_spare(int, char *); -static void remove_hot_spare(int, char *); +static void remove_component(int, char *); static void rebuild_in_place(int, char *); static void check_status(int,int); static void check_parity(int,int, char *); static void do_meter(int, u_long); @@ -235,9 +239,9 @@ do_rewrite = 1; num_options++; break; case 'r': - action = RAIDFRAME_REMOVE_HOT_SPARE; + action = RAIDFRAME_REMOVE_COMPONENT; get_comp(component, optarg, sizeof(component)); num_options++; break; case 'R': @@ -318,10 +322,10 @@ switch (action) { case RAIDFRAME_ADD_HOT_SPARE: add_hot_spare(fd, component); break; - case RAIDFRAME_REMOVE_HOT_SPARE: - remove_hot_spare(fd, component); + case RAIDFRAME_REMOVE_COMPONENT: + remove_component(fd, component); break; case RAIDFRAME_CONFIGURE: rf_configure(fd, config_filename, force); break; @@ -917,24 +921,24 @@ "RAIDFRAME_ADD_HOT_SPARE"); } static void -remove_hot_spare(int fd, char *component) +remove_component(int fd, char *component) { - RF_SingleComponent_t hot_spare; + RF_SingleComponent_t comp; int component_num; int num_cols; get_component_number(fd, component, &component_num, &num_cols); - hot_spare.row = component_num / num_cols; - hot_spare.column = component_num % num_cols; + comp.row = component_num / num_cols; + comp.column = component_num % num_cols; - strncpy(hot_spare.component_name, component, - sizeof(hot_spare.component_name)); + strncpy(comp.component_name, component, + sizeof(comp.component_name)); - do_ioctl( fd, RAIDFRAME_REMOVE_HOT_SPARE, &hot_spare, - "RAIDFRAME_REMOVE_HOT_SPARE"); + do_ioctl( fd, RAIDFRAME_REMOVE_COMPONENT, &comp, + "RAIDFRAME_REMOVE_COMPONENT"); } static void rebuild_in_place(int fd, char *component) Index: sys/dev/raidframe/raidframeio.h =================================================================== RCS file: /cvsroot/src/sys/dev/raidframe/raidframeio.h,v retrieving revision 1.11 diff -U4 -r1.11 raidframeio.h --- sys/dev/raidframe/raidframeio.h 7 Aug 2021 16:19:15 -0000 1.11 +++ sys/dev/raidframe/raidframeio.h 7 Sep 2023 06:23:17 -0000 @@ -109,9 +109,10 @@ #define RAIDFRAME_SET_COMPONENT_LABEL _IOW ('r', 20, RF_ComponentLabel_t) #define RAIDFRAME_INIT_LABELS _IOW ('r', 21, RF_ComponentLabel_t) #define RAIDFRAME_ADD_HOT_SPARE _IOW ('r', 22, RF_SingleComponent_t) -#define RAIDFRAME_REMOVE_HOT_SPARE _IOW ('r', 23, RF_SingleComponent_t) +#define RAIDFRAME_REMOVE_COMPONENT _IOW ('r', 23, RF_SingleComponent_t) +#define RAIDFRAME_REMOVE_HOT_SPARE RAIDFRAME_REMOVE_COMPONENT #define RAIDFRAME_REBUILD_IN_PLACE _IOW ('r', 24, RF_SingleComponent_t) #define RAIDFRAME_CHECK_PARITY _IOWR ('r', 25, int) #define RAIDFRAME_CHECK_PARITYREWRITE_STATUS _IOWR ('r', 26, int) #define RAIDFRAME_CHECK_COPYBACK_STATUS _IOWR ('r', 27, int) Index: sys/dev/raidframe/raidframevar.h =================================================================== RCS file: /cvsroot/src/sys/dev/raidframe/raidframevar.h,v retrieving revision 1.23 diff -U4 -r1.23 raidframevar.h --- sys/dev/raidframe/raidframevar.h 7 Aug 2021 16:19:15 -0000 1.23 +++ sys/dev/raidframe/raidframevar.h 7 Sep 2023 06:23:17 -0000 @@ -390,12 +390,15 @@ yet replaced */ rf_ds_spare, /* an available spare disk */ rf_ds_used_spare, /* a spare which has been used, and hence is * not available */ - rf_ds_rebuilding_spare /* a spare which is being rebuilt to */ + rf_ds_rebuilding_spare, /* a spare which is being rebuilt to */ + rf_ds_absent_spare, /* removed spare */ }; typedef enum RF_DiskStatus_e RF_DiskStatus_t; +#define RF_ABSENT_SPARE(disk) (disk.status == rf_ds_absent_spare) + struct RF_RaidDisk_s { char devname[56]; /* name of device file */ RF_DiskStatus_t status; /* whether it is up or down */ RF_RowCol_t spareCol; /* if in status "spared", this identifies the @@ -637,6 +640,5 @@ struct rf_pmctrs ctrs; }; - #endif /* !_RF_RAIDFRAMEVAR_H_ */ Index: sys/dev/raidframe/rf_diskqueue.c =================================================================== RCS file: /cvsroot/src/sys/dev/raidframe/rf_diskqueue.c,v retrieving revision 1.63 diff -U4 -r1.63 rf_diskqueue.c --- sys/dev/raidframe/rf_diskqueue.c 14 Dec 2021 00:46:43 -0000 1.63 +++ sys/dev/raidframe/rf_diskqueue.c 7 Sep 2023 06:23:17 -0000 @@ -186,8 +186,15 @@ rf_ShutdownCreate(listp, rf_ShutdownDiskQueue, diskqueue); return (0); } +int +rf_UpdateDiskQueue(RF_DiskQueue_t *diskqueue, RF_RaidDisk_t *disk) +{ + diskqueue->dev = disk->dev; + return(0); +} + static void rf_ShutdownDiskQueueSystem(void *arg) { RF_Raid_t *raidPtr; @@ -254,9 +261,9 @@ return (rc); } spareQueues = &raidPtr->Queues[raidPtr->numCol]; - for (r = 0; r < raidPtr->numSpare; r++) { + for (r = 0; r < raidPtr->maxQueue; r++) { rc = rf_ConfigureDiskQueue(raidPtr, &spareQueues[r], raidPtr->numCol + r, p, raidPtr->sectorsPerDisk, raidPtr->Disks[raidPtr->numCol + r].dev, Index: sys/dev/raidframe/rf_diskqueue.h =================================================================== RCS file: /cvsroot/src/sys/dev/raidframe/rf_diskqueue.h,v retrieving revision 1.29 diff -U4 -r1.29 rf_diskqueue.h --- sys/dev/raidframe/rf_diskqueue.h 27 Jul 2021 03:01:48 -0000 1.29 +++ sys/dev/raidframe/rf_diskqueue.h 7 Sep 2023 06:23:17 -0000 @@ -148,6 +148,7 @@ RF_RowCol_t, const RF_DiskQueueSW_t *, RF_SectorCount_t, dev_t, int, RF_ShutdownList_t **, RF_AllocListElem_t *); +int rf_UpdateDiskQueue(RF_DiskQueue_t *, RF_RaidDisk_t *); #endif /* !_RF__RF_DISKQUEUE_H_ */ Index: sys/dev/raidframe/rf_disks.c =================================================================== RCS file: /cvsroot/src/sys/dev/raidframe/rf_disks.c,v retrieving revision 1.93 diff -U4 -r1.93 rf_disks.c --- sys/dev/raidframe/rf_disks.c 10 Aug 2022 01:16:38 -0000 1.93 +++ sys/dev/raidframe/rf_disks.c 7 Sep 2023 06:23:17 -0000 @@ -247,9 +247,9 @@ /* The space for the spares should have already been allocated by * ConfigureDisks() */ disks = &raidPtr->Disks[raidPtr->numCol]; - for (i = 0; i < raidPtr->numSpare; i++) { + for (i = 0; i < raidPtr->maxSpare; i++) { ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0], &disks[i], raidPtr->numCol + i); if (ret) goto fail; @@ -269,9 +269,12 @@ } /* check sizes and block sizes on spare disks */ bs = 1 << raidPtr->logBytesPerSector; - for (i = 0; i < raidPtr->numSpare; i++) { + for (i = 0; i < raidPtr->maxSpare; i++) { + if (RF_ABSENT_SPARE(disks[i])) + continue; + if (disks[i].blockSize != bs) { RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs); ret = EINVAL; goto fail; @@ -336,8 +339,15 @@ ret = ENOMEM; goto fail; } + raidPtr->abortRecon = RF_MallocAndAdd( + entries * sizeof(int), raidPtr->cleanupList); + if (raidPtr->abortRecon == NULL) { + ret = ENOMEM; + goto fail; + } + return(0); fail: rf_UnconfigureVnodes( raidPtr ); @@ -999,9 +1009,14 @@ /* the beginning of the spares... */ disks = &raidPtr->Disks[raidPtr->numCol]; - spare_number = raidPtr->numSpare; + for (spare_number = 0; + spare_number < raidPtr->maxSpare; + spare_number++) { + if (RF_ABSENT_SPARE(disks[spare_number])) + break; + } ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name, &disks[spare_number], raidPtr->numCol + spare_number); @@ -1053,25 +1068,43 @@ disks[spare_number].numBlocks = raidPtr->sectorsPerDisk; } } - spareQueues = &raidPtr->Queues[raidPtr->numCol]; - ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number], - raidPtr->numCol + spare_number, - raidPtr->qType, - raidPtr->sectorsPerDisk, - raidPtr->Disks[raidPtr->numCol + - spare_number].dev, - raidPtr->maxOutstanding, - &raidPtr->shutdownList, - raidPtr->cleanupList); + /* + * We only grow one initialized diskQueue at a time + * spare_number can be lower than raidPtr->maxQueue (update) + * or they can be equal (initialize new queue) + */ + RF_ASSERT(spare_number <= raidPtr->maxQueue); - rf_lock_mutex2(raidPtr->mutex); - raidPtr->numSpare++; - rf_unlock_mutex2(raidPtr->mutex); + spareQueues = &raidPtr->Queues[raidPtr->numCol]; + if (spare_number == raidPtr->maxQueue) { + ret = rf_ConfigureDiskQueue(raidPtr, &spareQueues[spare_number], + raidPtr->numCol + spare_number, + raidPtr->qType, + raidPtr->sectorsPerDisk, + raidPtr->Disks[raidPtr->numCol + + spare_number].dev, + raidPtr->maxOutstanding, + &raidPtr->shutdownList, + raidPtr->cleanupList); + if (ret) + goto fail; + raidPtr->maxQueue++; + } else { + (void)rf_UpdateDiskQueue(&spareQueues[spare_number], + &disks[spare_number]); + } fail: rf_lock_mutex2(raidPtr->mutex); + + if (ret == 0) { + if (spare_number == raidPtr->maxSpare) + raidPtr->maxSpare++; + raidPtr->numSpare++; + } + raidPtr->adding_hot_spare = 0; rf_signal_cond2(raidPtr->adding_hot_spare_cv); rf_unlock_mutex2(raidPtr->mutex); @@ -1080,58 +1113,119 @@ int rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr) { -#if 0 int spare_number; -#endif + int maxspare; + RF_RaidDisk_t *disk; + RF_RaidDisk_t *disks; + struct vnode *vp; + int ret = EINVAL; - if (raidPtr->numSpare==0) { - printf("No spares to remove!\n"); - return(EINVAL); - } + spare_number = sparePtr->column - raidPtr->numCol; + if (spare_number < 0 || spare_number > raidPtr->maxSpare) + return(ret); + + rf_lock_mutex2(raidPtr->mutex); - return(EINVAL); /* XXX not implemented yet */ -#if 0 - spare_number = sparePtr->column; + disk = &raidPtr->Disks[raidPtr->numCol + spare_number]; + if (disk->status != rf_ds_spare && + disk->status != rf_ds_failed) { + printf("Spare is in use %d\n", disk->status); + ret = EBUSY; + goto out; + } + + vp = raidPtr->raid_cinfo[raidPtr->numCol + spare_number].ci_vp; + raidPtr->raid_cinfo[raidPtr->numCol + spare_number].ci_vp = NULL; + raidPtr->raid_cinfo[raidPtr->numCol + spare_number].ci_dev = 0; + + /* This component was not automatically configured */ + disk->auto_configured = 0; + disk->dev = 0; + disk->numBlocks = 0; + disk->status = rf_ds_absent_spare; + snprintf(disk->devname, sizeof(disk->devname), + "absent_spare%d", spare_number); + rf_close_component(raidPtr, vp, 0); - if (spare_number < 0 || spare_number > raidPtr->numSpare) { - return(EINVAL); + /* Find new highest configured spare */ + disks = &raidPtr->Disks[raidPtr->numCol]; + for (maxspare = raidPtr->maxSpare; maxspare > 0; maxspare--) { + if (!RF_ABSENT_SPARE(disks[maxspare - 1])) + break; } + raidPtr->maxSpare = maxspare; - /* verify that this spare isn't in use... */ + raidPtr->numSpare--; + ret = 0; +out: + rf_unlock_mutex2(raidPtr->mutex); + return(ret); +} + +/* + * Delete a non hot spare component + */ +int +rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component) +{ + RF_RaidDisk_t *disk; + RF_RowCol_t col = component->column; + struct vnode *vp; + int ret = EINVAL; + if (col < 0 || col >= raidPtr->numCol) + return(ret); - /* it's gone.. */ + rf_lock_mutex2(raidPtr->mutex); - raidPtr->numSpare--; + disk = &raidPtr->Disks[col]; - return(0); -#endif -} + /* 1. This component must be marked as failed or spared */ + switch (disk->status) { + case rf_ds_failed: + case rf_ds_dist_spared: + case rf_ds_spared: + break; + default: + ret = EBUSY; + goto out; + } + + vp = raidPtr->raid_cinfo[col].ci_vp; + raidPtr->raid_cinfo[col].ci_vp = NULL; + raidPtr->raid_cinfo[col].ci_dev = 0; + + /* This component was not automatically configured */ + disk->auto_configured = 0; + disk->dev = 0; + disk->numBlocks = 0; + snprintf(disk->devname, sizeof(disk->devname), "component%d", col); + + rf_close_component(raidPtr, vp, 0); + ret = 0; +out: + rf_unlock_mutex2(raidPtr->mutex); + + return(ret); +} int -rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component) +rf_remove_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component) { -#if 0 - RF_RaidDisk_t *disks; -#endif + RF_RowCol_t col = component->column; - if ((component->column < 0) || - (component->column >= raidPtr->numCol)) { + if (col < 0 || col >= raidPtr->numCol + raidPtr->maxSpare) return(EINVAL); - } -#if 0 - disks = &raidPtr->Disks[component->column]; -#endif - - /* 1. This component must be marked as 'failed' */ - - return(EINVAL); /* Not implemented yet. */ + if (col >= raidPtr->numCol) + return rf_remove_hot_spare(raidPtr, component); + else + return rf_delete_component(raidPtr, component); } int rf_incorporate_hot_spare(RF_Raid_t *raidPtr, Index: sys/dev/raidframe/rf_disks.h =================================================================== RCS file: /cvsroot/src/sys/dev/raidframe/rf_disks.h,v retrieving revision 1.14 diff -U4 -r1.14 rf_disks.h --- sys/dev/raidframe/rf_disks.h 11 Dec 2005 12:23:37 -0000 1.14 +++ sys/dev/raidframe/rf_disks.h 7 Sep 2023 06:23:17 -0000 @@ -51,7 +51,8 @@ int rf_CheckLabels(RF_Raid_t *, RF_Config_t *); int rf_add_hot_spare(RF_Raid_t *, RF_SingleComponent_t *); int rf_remove_hot_spare(RF_Raid_t *, RF_SingleComponent_t *); int rf_delete_component(RF_Raid_t *r, RF_SingleComponent_t *); +int rf_remove_component(RF_Raid_t *r, RF_SingleComponent_t *); int rf_incorporate_hot_spare(RF_Raid_t *, RF_SingleComponent_t *); #endif /* !_RF__RF_DISKS_H_ */ Index: sys/dev/raidframe/rf_driver.c =================================================================== RCS file: /cvsroot/src/sys/dev/raidframe/rf_driver.c,v retrieving revision 1.140 diff -U4 -r1.140 rf_driver.c --- sys/dev/raidframe/rf_driver.c 10 Aug 2022 01:16:38 -0000 1.140 +++ sys/dev/raidframe/rf_driver.c 7 Sep 2023 06:23:17 -0000 @@ -347,8 +347,10 @@ KASSERT(cfgPtr->numSpare >= 0); raidPtr->numCol = cfgPtr->numCol; raidPtr->numSpare = cfgPtr->numSpare; + raidPtr->maxSpare = cfgPtr->numSpare; + raidPtr->maxQueue = cfgPtr->numSpare; raidPtr->status = rf_rs_optimal; raidPtr->reconControl = NULL; Index: sys/dev/raidframe/rf_netbsdkintf.c =================================================================== RCS file: /cvsroot/src/sys/dev/raidframe/rf_netbsdkintf.c,v retrieving revision 1.412 diff -U4 -r1.412 rf_netbsdkintf.c --- sys/dev/raidframe/rf_netbsdkintf.c 15 Jun 2023 09:15:54 -0000 1.412 +++ sys/dev/raidframe/rf_netbsdkintf.c 7 Sep 2023 06:23:17 -0000 @@ -771,9 +771,9 @@ If we didn't find a live ocmponent, we now check to see if there is a relevant spared component. */ - for (c = 0; c < raidPtr->numSpare; c++) { + for (c = 0; c < raidPtr->maxSpare; c++) { sparecol = raidPtr->numCol + c; if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { /* How about this one? */ scol = -1; @@ -1109,23 +1109,34 @@ /* bad column */ return EINVAL; } + if (RF_ABSENT_SPARE(raidPtr->Disks[rr->col])) + return EINVAL; + rf_lock_mutex2(raidPtr->mutex); if (raidPtr->status == rf_rs_reconstructing) { - /* you can't fail a disk while we're reconstructing! */ - /* XXX wrong for RAID6 */ - goto out; + raidPtr->abortRecon[rr->col] = 1; } if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) && (raidPtr->numFailures > 0)) { /* some other component has failed. Let's not make things worse. XXX wrong for RAID6 */ goto out; } if (raidPtr->Disks[rr->col].status == rf_ds_spared) { - /* Can't fail a spared disk! */ - goto out; + int spareCol = raidPtr->Disks[rr->col].spareCol; + + if (spareCol < raidPtr->numCol || + spareCol >= raidPtr->numCol + raidPtr->maxSpare) + goto out; + + /* + * Fail the spare disk so that we can + * reconstruct on another one. + */ + raidPtr->Disks[spareCol].status = rf_ds_failed; + } rf_unlock_mutex2(raidPtr->mutex); /* make a copy of the recon request so that we don't rely on @@ -1559,15 +1570,17 @@ case RAIDFRAME_ADD_HOT_SPARE: rf_copy_single_component(&component, data); return rf_add_hot_spare(raidPtr, &component); - case RAIDFRAME_REMOVE_HOT_SPARE: - return retcode; - + /* Remove a non hot-spare component, never implemented in userland */ case RAIDFRAME_DELETE_COMPONENT: rf_copy_single_component(&component, data); return rf_delete_component(raidPtr, &component); + case RAIDFRAME_REMOVE_COMPONENT: + rf_copy_single_component(&component, data); + return rf_remove_component(raidPtr, &component); + case RAIDFRAME_INCORPORATE_HOT_SPARE: rf_copy_single_component(&component, data); return rf_incorporate_hot_spare(raidPtr, &component); @@ -2555,9 +2568,9 @@ } } } - for( c = 0; c < raidPtr->numSpare ; c++) { + for (c = 0; c < raidPtr->maxSpare ; c++) { sparecol = raidPtr->numCol + c; if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { /* @@ -2628,9 +2641,9 @@ } /* else we don't touch it.. */ } - for( c = 0; c < raidPtr->numSpare ; c++) { + for (c = 0; c < raidPtr->maxSpare ; c++) { sparecol = raidPtr->numCol + c; /* Need to ensure that the reconstruct actually completed! */ if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { /* @@ -2704,9 +2717,11 @@ raidPtr->raid_cinfo[c].ci_vp = NULL; raidPtr->Disks[c].auto_configured = 0; } - for (r = 0; r < raidPtr->numSpare; r++) { + for (r = 0; r < raidPtr->maxSpare; r++) { + if (RF_ABSENT_SPARE(raidPtr->Disks[raidPtr->numCol + r])) + continue; vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; rf_close_component(raidPtr, vp, acd); raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; @@ -3456,9 +3471,9 @@ clabel->autoconfigure = new_value; raidflush_component_label(raidPtr, column); } } - for(column = 0; column < raidPtr->numSpare ; column++) { + for(column = 0; column < raidPtr->maxSpare ; column++) { sparecol = raidPtr->numCol + column; if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { clabel = raidget_component_label(raidPtr, sparecol); clabel->autoconfigure = new_value; @@ -3482,10 +3497,11 @@ clabel->root_partition = new_value; raidflush_component_label(raidPtr, column); } } - for(column = 0; column < raidPtr->numSpare ; column++) { + for (column = 0; column < raidPtr->maxSpare ; column++) { sparecol = raidPtr->numCol + column; + if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { clabel = raidget_component_label(raidPtr, sparecol); clabel->root_partition = new_value; raidflush_component_label(raidPtr, sparecol); @@ -3765,9 +3781,12 @@ int c; int error; int dkwhole = 0, dkpart; - for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) { + for (c = 0; c < raidPtr->numCol + raidPtr->maxSpare; c++) { + if (RF_ABSENT_SPARE(raidPtr->Disks[c])) + continue; + /* * Check any non-dead disk, even when currently being * reconstructed. */ @@ -3831,9 +3850,9 @@ error = e; } } - for (c = 0; c < raidPtr->numSpare ; c++) { + for (c = 0; c < raidPtr->maxSpare ; c++) { int sparecol = raidPtr->numCol + c; /* Need to ensure that the reconstruct actually completed! */ if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { int e = rf_sync_component_cache(raidPtr, sparecol, @@ -3908,22 +3927,33 @@ config->cols = raidPtr->numCol; config->ndevs = raidPtr->numCol; if (config->ndevs >= RF_MAX_DISKS) return ENOMEM; - config->nspares = raidPtr->numSpare; + config->nspares = raidPtr->maxSpare; if (config->nspares >= RF_MAX_DISKS) return ENOMEM; config->maxqdepth = raidPtr->maxQueueDepth; d = 0; for (j = 0; j < config->cols; j++) { config->devs[d] = raidPtr->Disks[j]; d++; } - for (j = config->cols, i = 0; i < config->nspares; i++, j++) { - config->spares[i] = raidPtr->Disks[j]; - if (config->spares[i].status == rf_ds_rebuilding_spare) { - /* XXX: raidctl(8) expects to see this as a used spare */ + for (i = 0; i < config->nspares; i++) { + config->spares[i] = raidPtr->Disks[raidPtr->numCol + i]; + /* + * Cope with raidctl(8) expectations. We could add an + * updated ioctl so that raidctl(8) can tell it knows + * about values used by kernel. + */ + switch (config->spares[i].status) { + case rf_ds_rebuilding_spare: config->spares[i].status = rf_ds_used_spare; + break; + case rf_ds_absent_spare: + config->spares[i].status = rf_ds_failed; + break; + default: + break; } } return 0; } @@ -3934,9 +3964,9 @@ RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data; RF_ComponentLabel_t *raid_clabel; int column = clabel->column; - if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare)) + if ((column < 0) || (column >= raidPtr->numCol + raidPtr->maxSpare)) return EINVAL; raid_clabel = raidget_component_label(raidPtr, column); memcpy(clabel, raid_clabel, sizeof *clabel); /* Fix-up for userland. */ Index: sys/dev/raidframe/rf_paritymap.c =================================================================== RCS file: /cvsroot/src/sys/dev/raidframe/rf_paritymap.c,v retrieving revision 1.10 diff -U4 -r1.10 rf_paritymap.c --- sys/dev/raidframe/rf_paritymap.c 27 Sep 2020 21:39:08 -0000 1.10 +++ sys/dev/raidframe/rf_paritymap.c 7 Sep 2023 06:23:17 -0000 @@ -375,9 +375,9 @@ raidflush_component_label(raidPtr, col); } /* handle the spares too... */ - for (col = 0; col < raidPtr->numSpare; col++) { + for (col = 0; col < raidPtr->maxSpare; col++) { if (raidPtr->Disks[raidPtr->numCol+col].status == rf_ds_used_spare) { clabel = raidget_component_label(raidPtr, raidPtr->numCol+col); clabel->parity_map_ntick = cooldown; clabel->parity_map_tickms = tickms; @@ -768,9 +768,9 @@ clabel = raidget_component_label(raidPtr, col); if (clabel->parity_map_flags & RF_PMLABEL_DISABLE) dis = 1; } - for (col = 0; col < raidPtr->numSpare; col++) { + for (col = 0; col < raidPtr->maxSpare; col++) { if (raidPtr->Disks[raidPtr->numCol+col].status != rf_ds_used_spare) continue; clabel = raidget_component_label(raidPtr, raidPtr->numCol+col); if (clabel->parity_map_flags & RF_PMLABEL_DISABLE) @@ -798,9 +798,9 @@ raidflush_component_label(raidPtr, col); } /* update any used spares as well */ - for (col = 0; col < raidPtr->numSpare; col++) { + for (col = 0; col < raidPtr->maxSpare; col++) { if (raidPtr->Disks[raidPtr->numCol+col].status != rf_ds_used_spare) continue; clabel = raidget_component_label(raidPtr, raidPtr->numCol+col); Index: sys/dev/raidframe/rf_raid.h =================================================================== RCS file: /cvsroot/src/sys/dev/raidframe/rf_raid.h,v retrieving revision 1.51 diff -U4 -r1.51 rf_raid.h --- sys/dev/raidframe/rf_raid.h 7 Aug 2021 16:19:15 -0000 1.51 +++ sys/dev/raidframe/rf_raid.h 7 Sep 2023 06:23:17 -0000 @@ -115,9 +115,11 @@ RF_int32 sectorMask; /* mask of bytes-per-sector */ RF_RaidLayout_t Layout; /* all information related to layout */ RF_RaidDisk_t *Disks; /* all information related to physical disks */ - RF_DiskQueue_t *Queues;/* all information related to disk queues */ + u_int maxSpare; /* initialized spares in Disks array */ + RF_DiskQueue_t *Queues; /* all information related to disk queues */ + u_int maxQueue; /* initialized queues in Queues array */ const RF_DiskQueueSW_t *qType;/* pointer to the DiskQueueSW used for the component queues. */ /* NOTE: This is an anchor point via which the queues can be * accessed, but the enqueue/dequeue routines in diskqueue.c use a @@ -258,8 +260,10 @@ rf_declare_cond2(outstandingCond); int waitShutdown; int nAccOutstanding; + int *abortRecon; /* Abort background operations requested */ + RF_DiskId_t **diskids; int raidid; void *softc; Index: sys/dev/raidframe/rf_reconstruct.c =================================================================== RCS file: /cvsroot/src/sys/dev/raidframe/rf_reconstruct.c,v retrieving revision 1.127 diff -U4 -r1.127 rf_reconstruct.c --- sys/dev/raidframe/rf_reconstruct.c 27 Jul 2021 03:01:48 -0000 1.127 +++ sys/dev/raidframe/rf_reconstruct.c 7 Sep 2023 06:23:17 -0000 @@ -263,9 +263,9 @@ } scol = (-1); } else { #endif - for (scol = raidPtr->numCol; scol < raidPtr->numCol + raidPtr->numSpare; scol++) { + for (scol = raidPtr->numCol; scol < raidPtr->numCol + raidPtr->maxSpare; scol++) { if (raidPtr->Disks[scol].status == rf_ds_spare) { spareDiskPtr = &raidPtr->Disks[scol]; spareDiskPtr->status = rf_ds_rebuilding_spare; break; @@ -642,11 +642,17 @@ done = 0; while (!done) { - if (raidPtr->waitShutdown) { - /* someone is unconfiguring this array... bail on the reconstruct.. */ + if (raidPtr->waitShutdown || + raidPtr->abortRecon[col]) { + /* + * someone is unconfiguring this array + * or failed a component + *... bail on the reconstruct.. + */ recon_error = 1; + raidPtr->abortRecon[col] = 0; break; } num_writes = 0;
Attachment:
rftest.sh
Description: Bourne shell script