Port-sparc64 archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Revisiting gem(4) resets
Hi all,
Some time ago, we added a workaround for hardware receive lockups in the
gem(4) driver based on discussion in PR 46260 [*]. However, when I load
both CPU and network, I see a number of resets when the receiver isn't
locked up (i.e. in the "else" part of the check).
I don't think that we should reset in the cases where we are still making
progress (read/write pointers changing). I also added a counter for the
overflow and I see that increase. However, looking at the other counters,
also see the > 63 descriptors counter increasing, so I suspect that we do
overflow the receive descriptor ring sometimes. For example:
# uptime; vmstat -e | grep gem
9:22AM up 23:23, 5 users, load averages: 4.50, 5.35, 5.48
gem0 interrupts 298354724 3545 intr
gem0 rx overflow 3 0 intr
gem0 tx interrupts 193911012 2304 intr
gem0 rx interrupts 120872075 1436 intr
gem0 rx 0desc 1541503 18 intr
gem0 rx 1desc 32223639 382 intr
gem0 rx 2desc 23263318 276 intr
gem0 rx 3desc 51706004 614 intr
gem0 rx >3desc 10289047 122 intr
gem0 rx >7desc 1847846 21 intr
gem0 rx >15desc 436 0 intr
gem0 rx >31desc 115 0 intr
gem0 rx >63desc 167 0 intr
where RX overflow has been reported 3 times, and we've processed more than
63 descriptors 167 times.
I would like to commit the attached patch, which still resets in the else
case if the read/write pointers aren't changing.
Regards,
Julian
PS. I'm curious what causes the 0desc rate to increase, but I don't think
that it's relevant here.
[*]
http://cvsweb.netbsd.org/bsdweb.cgi/src/sys/dev/ic/gem.c.diff?r1=1.98&r2=1.99&only_with_tag=MAIN&f=h
http://gnats.netbsd.org/cgi-bin/query-pr-single.pl?number=46260
Index: src/sys/dev/ic/gem.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/gem.c,v
retrieving revision 1.132
diff -u -r1.132 gem.c
--- src/sys/dev/ic/gem.c 15 Sep 2020 08:33:40 -0000 1.132
+++ src/sys/dev/ic/gem.c 21 Jan 2021 08:25:05 -0000
@@ -78,6 +78,7 @@
#include <dev/mii/miivar.h>
#include <dev/mii/mii_bitbang.h>
#include <dev/ic/gemreg.h>
#include <dev/ic/gemvar.h>
@@ -165,10 +166,11 @@
#ifdef GEM_COUNTERS
for (i = __arraycount(sc->sc_ev_rxhist); --i >= 0; )
evcnt_detach(&sc->sc_ev_rxhist[i]);
- evcnt_detach(&sc->sc_ev_rxnobuf);
- evcnt_detach(&sc->sc_ev_rxfull);
evcnt_detach(&sc->sc_ev_rxint);
evcnt_detach(&sc->sc_ev_txint);
+ evcnt_detach(&sc->sc_ev_rxfull);
+ evcnt_detach(&sc->sc_ev_rxnobuf);
+ evcnt_detach(&sc->sc_ev_rxoverflow);
#endif
evcnt_detach(&sc->sc_ev_intr);
@@ -588,14 +590,16 @@
evcnt_attach_dynamic(&sc->sc_ev_intr, EVCNT_TYPE_INTR,
NULL, device_xname(sc->sc_dev), "interrupts");
#ifdef GEM_COUNTERS
+ evcnt_attach_dynamic(&sc->sc_ev_rxoverflow, EVCNT_TYPE_INTR,
+ &sc->sc_ev_rxint, device_xname(sc->sc_dev), "rx overflow");
+ evcnt_attach_dynamic(&sc->sc_ev_rxnobuf, EVCNT_TYPE_INTR,
+ &sc->sc_ev_rxint, device_xname(sc->sc_dev), "rx malloc failure");
+ evcnt_attach_dynamic(&sc->sc_ev_rxfull, EVCNT_TYPE_INTR,
+ &sc->sc_ev_rxint, device_xname(sc->sc_dev), "rx ring full");
evcnt_attach_dynamic(&sc->sc_ev_txint, EVCNT_TYPE_INTR,
&sc->sc_ev_intr, device_xname(sc->sc_dev), "tx interrupts");
evcnt_attach_dynamic(&sc->sc_ev_rxint, EVCNT_TYPE_INTR,
&sc->sc_ev_intr, device_xname(sc->sc_dev), "rx interrupts");
- evcnt_attach_dynamic(&sc->sc_ev_rxfull, EVCNT_TYPE_INTR,
- &sc->sc_ev_rxint, device_xname(sc->sc_dev), "rx ring full");
- evcnt_attach_dynamic(&sc->sc_ev_rxnobuf, EVCNT_TYPE_INTR,
- &sc->sc_ev_rxint, device_xname(sc->sc_dev), "rx malloc failure");
evcnt_attach_dynamic(&sc->sc_ev_rxhist[0], EVCNT_TYPE_INTR,
&sc->sc_ev_rxint, device_xname(sc->sc_dev), "rx 0desc");
evcnt_attach_dynamic(&sc->sc_ev_rxhist[1], EVCNT_TYPE_INTR,
@@ -1803,8 +1807,8 @@
if (rxstat & GEM_RD_BAD_CRC) {
if_statinc(ifp, if_ierrors);
- aprint_error_dev(sc->sc_dev,
- "receive error: CRC error\n");
+ DPRINTF(sc, ("%s: receive error: CRC error\n",
+ device_xname(sc->sc_dev)));
GEM_INIT_RXDESC(sc, i);
continue;
}
@@ -2215,8 +2219,11 @@
*/
if (rxstat & GEM_MAC_RX_OVERFLOW) {
if_statinc(ifp, if_ierrors);
+ GEM_COUNTER_INCR(sc, sc_ev_rxoverflow);
+#ifdef GEM_DEBUG
aprint_error_dev(sc->sc_dev,
"receive error: RX overflow sc->rxptr %d, complete %d\n", sc->sc_rxptr, bus_space_read_4(t, h, GEM_RX_COMPLETION));
+#endif
sc->sc_rx_fifo_wr_ptr =
bus_space_read_4(t, h, GEM_RX_FIFO_WR_PTR);
sc->sc_rx_fifo_rd_ptr =
@@ -2258,6 +2265,7 @@
uint32_t rx_fifo_wr_ptr;
uint32_t rx_fifo_rd_ptr;
uint32_t state;
+ int needreset;
if ((ifp->if_flags & IFF_RUNNING) == 0) {
aprint_error_dev(sc->sc_dev, "receiver not running\n");
@@ -2282,25 +2290,33 @@
"receiver stuck in overflow, resetting\n");
gem_init(ifp);
} else {
+ needreset = 1;
if ((state & GEM_MAC_STATE_OVERFLOW) != GEM_MAC_STATE_OVERFLOW) {
- aprint_error_dev(sc->sc_dev,
- "rx_watchdog: not in overflow state: 0x%x\n",
- state);
+ DPRINTF(sc,
+ ("%s: rx_watchdog: not in overflow state: 0x%x\n",
+ device_xname(sc->sc_dev), state));
}
if (rx_fifo_wr_ptr != rx_fifo_rd_ptr) {
- aprint_error_dev(sc->sc_dev,
- "rx_watchdog: wr & rd ptr different\n");
+ DPRINTF(sc,
+ ("%s: rx_watchdog: wr & rd ptr different\n",
+ device_xname(sc->sc_dev), state));
+ needreset = 0;
}
if (sc->sc_rx_fifo_wr_ptr != rx_fifo_wr_ptr) {
- aprint_error_dev(sc->sc_dev,
- "rx_watchdog: wr pointer != saved\n");
+ DPRINTF(sc, ("%s: rx_watchdog: wr pointer != saved\n",
+ device_xname(sc->sc_dev), state));
+ needreset = 0;
}
if (sc->sc_rx_fifo_rd_ptr != rx_fifo_rd_ptr) {
+ DPRINTF(sc, ("%s: rx_watchdog: rd pointer != saved\n",
+ device_xname(sc->sc_dev), state));
+ needreset = 0;
+ }
+ if (needreset) {
aprint_error_dev(sc->sc_dev,
- "rx_watchdog: rd pointer != saved\n");
+ "rx_watchdog: resetting anyway\n");
+ gem_init(ifp);
}
- aprint_error_dev(sc->sc_dev, "resetting anyway\n");
- gem_init(ifp);
}
}
Index: src/sys/dev/ic/gemvar.h
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/gemvar.h,v
retrieving revision 1.27
diff -u -r1.27 gemvar.h
--- src/sys/dev/ic/gemvar.h 1 Mar 2020 05:50:56 -0000 1.27
+++ src/sys/dev/ic/gemvar.h 21 Jan 2021 08:25:06 -0000
@@ -217,10 +217,11 @@
struct evcnt sc_ev_intr;
#ifdef GEM_COUNTERS
- struct evcnt sc_ev_txint;
- struct evcnt sc_ev_rxint;
+ struct evcnt sc_ev_rxoverflow;
struct evcnt sc_ev_rxnobuf;
struct evcnt sc_ev_rxfull;
+ struct evcnt sc_ev_txint;
+ struct evcnt sc_ev_rxint;
struct evcnt sc_ev_rxhist[9];
#endif
Home |
Main Index |
Thread Index |
Old Index