Subject: Re: data corruption on aceride
To: Hiroki Sato <hrs@allbsd.org>
From: Manuel Bouyer <bouyer@antioche.eu.org>
List: port-sparc64
Date: 06/22/2005 22:03:12
--n8g4imXOkfNTN/H1
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
On Sun, Jun 19, 2005 at 09:07:20PM +0900, Hiroki Sato wrote:
> I am using netbsd-2-0 as of March, but I have data corruption
> issue on Sun Blade 100 for nearly a year.
> The corruption does not occur immediately, but find(1) performed
> by the /etc/security script triggers the problem gradually
> and it starts reporting "bad file descriptor". Until
> the problem occurs it takes several weeks or months.
>
> After DMA of the aceride driver is disabled by using "flags 0x0002,"
> the symptom disappears and the box becomes stable for these months.
> Should I try the latest current kernel?
Could you please try the attached patch against the netbsd-2 branch ?
I'm testing a similar change to a current kernel and it seems to work, but
as I didn't see the corruption probleme before ...
--
Manuel Bouyer <bouyer@antioche.eu.org>
NetBSD: 26 ans d'experience feront toujours la difference
--
--n8g4imXOkfNTN/H1
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=diff
Index: ic/wdc.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/wdc.c,v
retrieving revision 1.172.2.7.2.2
diff -u -r1.172.2.7.2.2 wdc.c
--- ic/wdc.c 16 Apr 2005 10:59:50 -0000 1.172.2.7.2.2
+++ ic/wdc.c 22 Jun 2005 19:53:59 -0000
@@ -697,23 +697,13 @@
delay(5000);
#endif
- if (wdc != NULL && (wdc->cap & WDC_CAPABILITY_SELECT))
- wdc->select(chp,0);
- bus_space_write_1(chp->cmd_iot, chp->cmd_iohs[wd_sdh], 0, WDSD_IBM);
- delay(10); /* 400ns delay */
- /* assert SRST, wait for reset to complete */
- bus_space_write_1(chp->ctl_iot, chp->ctl_ioh, wd_aux_ctlr,
- WDCTL_RST | WDCTL_IDS | WDCTL_4BIT);
- DELAY(1000);
- bus_space_write_1(chp->ctl_iot, chp->ctl_ioh, wd_aux_ctlr,
- WDCTL_IDS | WDCTL_4BIT);
+ if (wdc != NULL)
+ wdc->reset(chp, RESET_POLL);
+ else
+ wdc_do_reset(chp, RESET_POLL);
DELAY(2000);
(void) bus_space_read_1(chp->cmd_iot, chp->cmd_iohs[wd_error], 0);
bus_space_write_1(chp->ctl_iot, chp->ctl_ioh, wd_aux_ctlr, WDCTL_4BIT);
- delay(10); /* 400ns delay */
- /* ACK interrupt in case there is one pending left (Promise ATA100) */
- if (wdc != NULL && (wdc->cap & WDC_CAPABILITY_IRQACK))
- wdc->irqack(chp);
splx(s);
ret_value = __wdcwait_reset(chp, ret_value, poll);
@@ -782,6 +772,8 @@
callout_init(&chp->ch_callout);
if (wdc->drv_probe == NULL)
wdc->drv_probe = wdc_drvprobe;
+ if (wdc->reset == NULL)
+ wdc->reset = wdc_do_reset;
if (inited == 0) {
/* Initialize the ata_xfer pool. */
pool_init(&wdc_xfer_pool, sizeof(struct ata_xfer), 0,
@@ -1033,9 +1025,33 @@
{
struct wdc_softc *wdc = chp->ch_wdc;
int drv_mask1, drv_mask2;
+
+ wdc->reset(chp, poll);
+
+ drv_mask1 = (chp->ch_drive[0].drive_flags & DRIVE) ? 0x01:0x00;
+ drv_mask1 |= (chp->ch_drive[1].drive_flags & DRIVE) ? 0x02:0x00;
+ drv_mask2 = __wdcwait_reset(chp, drv_mask1,
+ (poll == RESET_SLEEP) ? 0 : 1);
+ if (drv_mask2 != drv_mask1) {
+ printf("%s channel %d: reset failed for",
+ wdc->sc_dev.dv_xname, chp->ch_channel);
+ if ((drv_mask1 & 0x01) != 0 && (drv_mask2 & 0x01) == 0)
+ printf(" drive 0");
+ if ((drv_mask1 & 0x02) != 0 && (drv_mask2 & 0x02) == 0)
+ printf(" drive 1");
+ printf("\n");
+ }
+ bus_space_write_1(chp->ctl_iot, chp->ctl_ioh, wd_aux_ctlr, WDCTL_4BIT);
+ return (drv_mask1 != drv_mask2) ? 1 : 0;
+}
+
+void
+wdc_do_reset(struct wdc_channel *chp, int poll)
+{
+ struct wdc_softc *wdc = chp->ch_wdc;
int s = 0;
- if (wdc->cap & WDC_CAPABILITY_SELECT)
+ if (wdc && wdc->cap & WDC_CAPABILITY_SELECT)
wdc->select(chp,0);
if (poll != RESET_SLEEP)
s = splbio();
@@ -1043,6 +1059,10 @@
bus_space_write_1(chp->cmd_iot, chp->cmd_iohs[wd_sdh], 0, WDSD_IBM);
delay(10); /* 400ns delay */
bus_space_write_1(chp->ctl_iot, chp->ctl_ioh, wd_aux_ctlr,
+ WDCTL_IDS | WDCTL_4BIT);
+ delay(10); /* 400ns delay */
+ /* assert SRST, wait for reset to complete */
+ bus_space_write_1(chp->ctl_iot, chp->ctl_ioh, wd_aux_ctlr,
WDCTL_RST | WDCTL_IDS | WDCTL_4BIT);
delay(2000);
(void) bus_space_read_1(chp->cmd_iot, chp->cmd_iohs[wd_error], 0);
@@ -1050,26 +1070,10 @@
WDCTL_4BIT | WDCTL_IDS);
delay(10); /* 400ns delay */
if (poll != RESET_SLEEP) {
- if (wdc->cap & WDC_CAPABILITY_IRQACK)
+ if (wdc && wdc->cap & WDC_CAPABILITY_IRQACK)
wdc->irqack(chp);
splx(s);
}
-
- drv_mask1 = (chp->ch_drive[0].drive_flags & DRIVE) ? 0x01:0x00;
- drv_mask1 |= (chp->ch_drive[1].drive_flags & DRIVE) ? 0x02:0x00;
- drv_mask2 = __wdcwait_reset(chp, drv_mask1,
- (poll == RESET_SLEEP) ? 0 : 1);
- if (drv_mask2 != drv_mask1) {
- printf("%s channel %d: reset failed for",
- wdc->sc_dev.dv_xname, chp->ch_channel);
- if ((drv_mask1 & 0x01) != 0 && (drv_mask2 & 0x01) == 0)
- printf(" drive 0");
- if ((drv_mask1 & 0x02) != 0 && (drv_mask2 & 0x02) == 0)
- printf(" drive 1");
- printf("\n");
- }
- bus_space_write_1(chp->ctl_iot, chp->ctl_ioh, wd_aux_ctlr, WDCTL_4BIT);
- return (drv_mask1 != drv_mask2) ? 1 : 0;
}
static int
Index: ic/wdcvar.h
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/wdcvar.h,v
retrieving revision 1.55.2.1
diff -u -r1.55.2.1 wdcvar.h
--- ic/wdcvar.h 18 Apr 2004 02:23:41 -0000 1.55.2.1
+++ ic/wdcvar.h 22 Jun 2005 19:53:59 -0000
@@ -165,6 +165,8 @@
/* if WDC_CAPABILITY_IRQACK set in 'cap' */
void (*irqack)(struct wdc_channel *);
+ /* Optional callback to perform a bus reset */
+ void (*reset)(struct wdc_channel *, int);
};
/*
@@ -205,6 +207,7 @@
void wdccommandshort(struct wdc_channel *, int, int);
void wdctimeout(void *arg);
void wdc_reset_channel(struct ata_drive_datas *, int);
+void wdc_do_reset(struct wdc_channel *, int);
int wdc_exec_command(struct ata_drive_datas *, struct wdc_command*);
#define WDC_COMPLETE 0x01
Index: pci/aceride.c
===================================================================
RCS file: /cvsroot/src/sys/dev/pci/aceride.c,v
retrieving revision 1.6
diff -u -r1.6 aceride.c
--- pci/aceride.c 3 Jan 2004 22:56:53 -0000 1.6
+++ pci/aceride.c 22 Jun 2005 19:53:59 -0000
@@ -38,6 +38,8 @@
#include <dev/pci/pciidevar.h>
#include <dev/pci/pciide_acer_reg.h>
+static int acer_pcib_match(struct pci_attach_args *);
+static void acer_do_reset(struct wdc_channel *, int);
static void acer_chip_map(struct pciide_softc*, struct pci_attach_args*);
static void acer_setup_channel(struct wdc_channel*);
static int acer_pci_intr(void *);
@@ -45,7 +47,11 @@
static int aceride_match(struct device *, struct cfdata *, void *);
static void aceride_attach(struct device *, struct device *, void *);
-CFATTACH_DECL(aceride, sizeof(struct pciide_softc),
+struct aceride_softc {
+ struct pciide_softc pciide_sc;
+ struct pci_attach_args pcib_pa;
+};
+CFATTACH_DECL(aceride, sizeof(struct aceride_softc),
aceride_match, aceride_attach, NULL, NULL);
static const struct pciide_product_desc pciide_acer_products[] = {
@@ -86,6 +92,21 @@
}
+static int
+acer_pcib_match(struct pci_attach_args *pa)
+{
+ /*
+ * we need to access the PCI config space of the pcib, see
+ * acer_do_reset()
+ */
+ if (PCI_CLASS(pa->pa_class) == PCI_CLASS_BRIDGE &&
+ PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_BRIDGE_ISA &&
+ PCI_VENDOR(pa->pa_id) == PCI_VENDOR_ALI &&
+ PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_ALI_M1543)
+ return 1;
+ return 0;
+}
+
static void
acer_chip_map(struct pciide_softc *sc, struct pci_attach_args *pa)
{
@@ -94,6 +115,7 @@
pcireg_t cr, interface;
bus_size_t cmdsize, ctlsize;
pcireg_t rev = PCI_REVISION(pa->pa_class);
+ struct aceride_softc *acer_sc = (struct aceride_softc *)sc;
if (pciide_chipen(sc, pa) == 0)
return;
@@ -152,6 +174,15 @@
| ACER_0x4B_CDETECT);
}
+ if (rev == 0xC3) {
+ /* install reset bug workaround */
+ if (pci_find_device(&acer_sc->pcib_pa, acer_pcib_match) == 0) {
+ printf("%s: WARNING: can't find pci-isa bridge\n",
+ sc->sc_wdcdev.sc_dev.dv_xname);
+ } else
+ sc->sc_wdcdev.reset = acer_do_reset;
+ }
+
for (channel = 0; channel < sc->sc_wdcdev.nchannels; channel++) {
cp = &sc->pciide_channels[channel];
if (pciide_chansetup(sc, channel, interface) == 0)
@@ -169,6 +200,31 @@
}
static void
+acer_do_reset(struct wdc_channel *chp, int poll)
+{
+ struct pciide_channel *cp = (struct pciide_channel*)chp;
+ struct pciide_softc *sc = (struct pciide_softc *)cp->wdc_channel.ch_wdc;
+ struct aceride_softc *acer_sc = (struct aceride_softc *)sc;
+ u_int8_t reg;
+
+ /*
+ * From OpenSolaris: after a reset we need to disable/enable the
+ * corresponding channel, or data corruption will occur in
+ * UltraDMA modes
+ */
+
+ wdc_do_reset(chp, poll);
+ reg = pciide_pci_read(acer_sc->pcib_pa.pa_pc, acer_sc->pcib_pa.pa_tag,
+ ACER_PCIB_CTRL);
+ printf("acer_do_reset reg 0x%x\n", reg);
+ pciide_pci_write(acer_sc->pcib_pa.pa_pc, acer_sc->pcib_pa.pa_tag,
+ ACER_PCIB_CTRL, reg & ACER_PCIB_CTRL_ENCHAN(chp->ch_channel));
+ delay(1000);
+ pciide_pci_write(acer_sc->pcib_pa.pa_pc, acer_sc->pcib_pa.pa_tag,
+ ACER_PCIB_CTRL, reg);
+}
+
+static void
acer_setup_channel(struct wdc_channel *chp)
{
struct ata_drive_datas *drvp;
Index: pci/pciide_acer_reg.h
===================================================================
RCS file: /cvsroot/src/sys/dev/pci/pciide_acer_reg.h,v
retrieving revision 1.7
diff -u -r1.7 pciide_acer_reg.h
--- pci/pciide_acer_reg.h 5 Oct 2003 17:48:49 -0000 1.7
+++ pci/pciide_acer_reg.h 22 Jun 2005 19:53:59 -0000
@@ -90,6 +90,10 @@
#define ACER_0x79_REVC2_EN 0x4
#define ACER_0x79_EN 0x2
+/* OpenSolaris: channel enable/disable in the PCI-ISA bridge */
+#define ACER_PCIB_CTRL 0x58
+#define ACER_PCIB_CTRL_ENCHAN(chan) (0x4 << (chan))
+
/*
* IDE bus frequency (1 byte)
* This should be setup by the BIOS - can we rely on this ?
--n8g4imXOkfNTN/H1--