At Fri, 20 Jan 2012 12:48:32 +0100, Manuel Bouyer <bouyer%antioche.eu.org@localhost> wrote: Subject: Re: ongoing major problems with NetBSD-5 and LOCKDEBUG on multi-core system (mfi(4) related?) > > On Wed, Jan 18, 2012 at 08:59:47PM -0800, Greg A. Woods wrote: > > [...] > > So if I understand correctly these calls from other subsystems are not > > in an interrupt context and if I remember correctly tsleep() should be > > fine in any normal driver ioctl() function (for a non-MPSAFE driver, if > > indeed its ioctl() function was in the device switch table), but clearly > > grabbing the KERNEL_LOCK() is the wrong thing to do for this case, at > > least without releasing it properly before the tsleep() call. > > It's no problem calling tsleep() with the KERNEL_LOCK held: sleeping > functions will release the lock before sleep, and reaquire it after. Thanks, I finally got around to re-adding the KERNEL_LOCK() hooks to mfi.c, but I did it quite differently than the original two patches which did it, and instead I moved the splbio()/splx() calls into the function where they're really needed, as was done in OpenBSD, since that code can, I think, be called by other than bio(4) and sysmon_envstat(9). I'll attach my current changes, which include the missing bug fixes from OpenBSD, as well as my changes to splbio()/splx() > BTW, I run several dell PE2950 with this code and I don't have > problems. I'm more or less certain now that mfi(4) is not the cause of the problems I have been seeing (the ones first reported in PR# kern/45827) Are you running kernels from the netbsd-5 branch on any of them? Kernels with LOCKDEBUG (and DIAGNOSTIC and DEBUG)? I'm still stuck on how to use GDB to look at the different CPU stack backtraces. -- Greg A. Woods Planix, Inc. <woods%planix.com@localhost> +1 250 762-7675 http://www.planix.com/ --- mfi.c 12 Apr 2010 10:30:24 -0700 1.19.4.4 +++ mfi.c 19 Jan 2012 18:19:46 -0800 @@ -23,6 +23,7 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/mutex.h> #include <sys/buf.h> #include <sys/ioctl.h> #include <sys/device.h> @@ -91,7 +92,7 @@ uint32_t, uint32_t, uint32_t, void *, uint8_t *); static void mfi_mgmt_done(struct mfi_ccb *); -#if NBIO > 0 +#if NBIO > 0 /* XXX this is bogus -- should allow for /dev entries and normal ioctl(2) as well! */ static int mfi_ioctl(device_t, u_long, void *); static int mfi_ioctl_inq(struct mfi_softc *, struct bioc_inq *); static int mfi_ioctl_vol(struct mfi_softc *, struct bioc_vol *); @@ -103,6 +104,7 @@ static int mfi_ioctl_setstate(struct mfi_softc *, struct bioc_setstate *); static int mfi_bio_hs(struct mfi_softc *, int, int, void *); +/* XXX these next only depend on NBIO because without NBIO we have no ioctl() functions */ static int mfi_create_sensors(struct mfi_softc *); static void mfi_sensor_refresh(struct sysmon_envsys *, envsys_data_t *); @@ -153,15 +155,14 @@ mfi_get_ccb(struct mfi_softc *sc) { struct mfi_ccb *ccb; - int s; - s = splbio(); + mutex_enter(&sc->sc_ccb_mtx); /* OpenBSD 1.97 */ ccb = TAILQ_FIRST(&sc->sc_ccb_freeq); if (ccb) { TAILQ_REMOVE(&sc->sc_ccb_freeq, ccb, ccb_link); ccb->ccb_state = MFI_CCB_READY; } - splx(s); + mutex_exit(&sc->sc_ccb_mtx); /* OpenBSD 1.97 */ DNPRINTF(MFI_D_CCB, "%s: mfi_get_ccb: %p\n", DEVNAME(sc), ccb); @@ -172,11 +173,9 @@ mfi_put_ccb(struct mfi_ccb *ccb) { struct mfi_softc *sc = ccb->ccb_sc; - int s; DNPRINTF(MFI_D_CCB, "%s: mfi_put_ccb: %p\n", DEVNAME(sc), ccb); - s = splbio(); ccb->ccb_state = MFI_CCB_FREE; ccb->ccb_xs = NULL; ccb->ccb_flags = 0; @@ -187,8 +186,10 @@ ccb->ccb_sgl = NULL; ccb->ccb_data = NULL; ccb->ccb_len = 0; + + mutex_enter(&sc->sc_ccb_mtx); /* OpenBSD 1.97 */ TAILQ_INSERT_TAIL(&sc->sc_ccb_freeq, ccb, ccb_link); - splx(s); + mutex_exit(&sc->sc_ccb_mtx); /* OpenBSD 1.97 */ } static int @@ -640,6 +641,7 @@ return 1; TAILQ_INIT(&sc->sc_ccb_freeq); + mutex_init(&sc->sc_ccb_mtx, MUTEX_DEFAULT, IPL_BIO); /* OpenBSD 1.97 */ status = mfi_fw_state(sc); sc->sc_max_cmds = status & MFI_STATE_MAXCMD_MASK; @@ -1237,6 +1239,16 @@ if (mfi_poll(ccb)) goto done; } else { + /* + * revision 1.90 + * date: 2009/03/29 01:02:35; author: dlg; state: Exp; lines: +3 -0 + * fix a small race in mfi_mgmt between the checking of a ccbs completion and + * the sleep waiting for the completion. it is possible to get the interrupt + * completing the command just before the tsleep, which will never get a + * wakeup because the interrupt with the wakeup has already happened. + */ + int s = splbio(); + mfi_post(sc, ccb); DNPRINTF(MFI_D_MISC, "%s: mfi_mgmt_internal sleeping\n", @@ -1244,6 +1256,8 @@ while (ccb->ccb_state != MFI_CCB_DONE) tsleep(ccb, PRIBIO, "mfi_mgmt", 0); + splx(s); + if (ccb->ccb_flags & MFI_CCB_F_ERR) goto done; } @@ -1336,10 +1350,8 @@ { struct mfi_softc *sc = device_private(dev); int error = 0; - int s; KERNEL_LOCK(1, curlwp); - s = splbio(); DNPRINTF(MFI_D_IOCTL, "%s: mfi_ioctl ", DEVNAME(sc)); @@ -1378,7 +1390,7 @@ DNPRINTF(MFI_D_IOCTL, " invalid ioctl\n"); error = EINVAL; } - splx(s); + KERNEL_UNLOCK_ONE(curlwp); DNPRINTF(MFI_D_IOCTL, "%s: mfi_ioctl return %x\n", DEVNAME(sc), error); @@ -1428,6 +1440,13 @@ sizeof(sc->sc_ld_list), &sc->sc_ld_list, NULL)) goto done; + /* OpenBSD 1.86 */ + if (bv->bv_volid >= sc->sc_ld_list.mll_no_ld) { + /* go do hotspares */ + rv = mfi_bio_hs(sc, bv->bv_volid, MFI_MGMT_VD, bv); + goto done; + } + i = bv->bv_volid; mbox[0] = sc->sc_ld_list.mll_list[i].mll_ld.mld_target; DNPRINTF(MFI_D_IOCTL, "%s: mfi_ioctl_vol target %#x\n", @@ -1437,12 +1456,6 @@ sizeof(sc->sc_ld_details), &sc->sc_ld_details, mbox)) goto done; - if (bv->bv_volid >= sc->sc_ld_list.mll_no_ld) { - /* go do hotspares */ - rv = mfi_bio_hs(sc, bv->bv_volid, MFI_MGMT_VD, bv); - goto done; - } - strlcpy(bv->bv_dev, sc->sc_ld[i].ld_dev, sizeof(bv->bv_dev)); switch(sc->sc_ld_list.mll_list[i].mll_state) { @@ -1513,8 +1526,8 @@ struct mfi_pd_details *pd; struct scsipi_inquiry_data *inqbuf; char vend[8+16+4+1]; - int i, rv = EINVAL; - int arr, vol, disk; + int rv = EINVAL; + int arr, vol, disk, span; uint32_t size; uint8_t mbox[MFI_MBOX_SIZE]; @@ -1540,11 +1553,6 @@ ar = cfg->mfc_array; - /* calculate offset to ld structure */ - ld = (struct mfi_ld_cfg *)( - ((uint8_t *)cfg) + offsetof(struct mfi_conf, mfc_array) + - cfg->mfc_array_size * cfg->mfc_no_array); - vol = bd->bd_volid; if (vol >= cfg->mfc_no_ld) { @@ -1553,20 +1561,29 @@ goto freeme; } - /* find corresponding array for ld */ - for (i = 0, arr = 0; i < vol; i++) - arr += ld[i].mlc_parm.mpa_span_depth; + /* OpenBSD 1.85 */ + /* calculate offset to ld structure */ + ld = (struct mfi_ld_cfg *)( + ((uint8_t *)cfg) + offsetof(struct mfi_conf, mfc_array) + + cfg->mfc_array_size * cfg->mfc_no_array); + /* use span 0 only when raid group is not spanned */ + if (ld[vol].mlc_parm.mpa_span_depth > 1) + span = bd->bd_diskid / ld[vol].mlc_parm.mpa_no_drv_per_span; + else + span = 0; + arr = ld[vol].mlc_span[span].mls_index; + /* offset disk into pd list */ disk = bd->bd_diskid % ld[vol].mlc_parm.mpa_no_drv_per_span; - /* offset array index into the next spans */ - arr += bd->bd_diskid / ld[vol].mlc_parm.mpa_no_drv_per_span; - bd->bd_target = ar[arr].pd[disk].mar_enc_slot; + + /* get status */ switch (ar[arr].pd[disk].mar_pd_state){ case MFI_PD_UNCONFIG_GOOD: - bd->bd_status = BIOC_SDUNUSED; + case MFI_PD_FAILED: /* OpenBSD 1.82, OpenBSD PR#5645 */ + bd->bd_status = BIOC_SDFAILED; /* OpenBSD 1.82, OpenBSD PR#5645 */ break; case MFI_PD_HOTSPARE: /* XXX dedicated hotspare part of array? */ @@ -1577,10 +1594,11 @@ bd->bd_status = BIOC_SDOFFLINE; break; +#if 0 /* OpenBSD 1.82, OpenBSD PR#5645 */ case MFI_PD_FAILED: bd->bd_status = BIOC_SDFAILED; break; - +#endif case MFI_PD_REBUILD: bd->bd_status = BIOC_SDREBUILD; break; @@ -1600,8 +1618,11 @@ *((uint16_t *)&mbox) = ar[arr].pd[disk].mar_pd.mfp_id; memset(pd, 0, sizeof(*pd)); if (mfi_mgmt_internal(sc, MR_DCMD_PD_GET_INFO, MFI_DATA_IN, - sizeof *pd, pd, mbox)) + sizeof *pd, pd, mbox)) { + /* disk is missing but succeed command */ + rv = 0; /* OpenBSD 1.82, OpenBSD PR#5645 */ goto freeme; + } bd->bd_size = pd->mpd_size * 512; /* bytes per block */ @@ -1948,7 +1969,6 @@ { struct mfi_softc *sc = sme->sme_cookie; struct bioc_vol bv; - int s; int error; if (edata->sensor >= sc->sc_ld_cnt) @@ -1956,11 +1976,11 @@ bzero(&bv, sizeof(bv)); bv.bv_volid = edata->sensor; + KERNEL_LOCK(1, curlwp); - s = splbio(); error = mfi_ioctl_vol(sc, &bv); - splx(s); KERNEL_UNLOCK_ONE(curlwp); + if (error) return;
Attachment:
pgpwNnMWw9lXz.pgp
Description: PGP signature