[src/trunk]: src/sys/dev/pci/ixgbe - Fix broken watchdog timer. This change d...

To: source-changes-hg%NetBSD.org@localhost
Subject: [src/trunk]: src/sys/dev/pci/ixgbe - Fix broken watchdog timer. This change d...
From: msaitoh <msaitoh%NetBSD.org@localhost>
Date: Tue, 31 Jul 2018 22:10:20 +0000
details:   https://anonhg.NetBSD.org/src/rev/10a692564223
branches:  trunk
changeset: 361686:10a692564223
user:      msaitoh <msaitoh%NetBSD.org@localhost>
date:      Tue May 08 09:45:54 2018 +0000

description:
- Fix broken watchdog timer. This change detects TX device timeout correctly.
  NOTE: It's supporsed not to be called {ixgbe,ixv}_rearm_queues() in the
  timer. Those are not required if any chip have no bug. In reality,
  ixgbe_rearm_queues() is required on 82599 and newer chip AND other than
  queue 0 to prevent device timeout. When it occured, packet was sent but the
  descriptor's DD bit wasn't set even though IXGBE_TXD_CMD_EOP and
  IXGBE_TXD_CMD_RS were set. After forcing interrupt by writing EICS register
  in ixgbe_rearm_queues(), DD is set. Why? Is this an undocumented errata? It
  might be possible not call rearm_queues on 82598 or queue 0, we call in any
  cases in case the problem occurs. On ixv(4), I have not seen this problem yet
  (though I tested only on X550_X(Xeon D 12xx)'s virtual function), but we
  do rearm in case TX device timeout happen.
- ixv(4): Call callout_stop() earlier in ixv_stop() like ixgbe_stop().
- KNF.

diffstat:

 sys/dev/pci/ixgbe/ix_txrx.c |   42 ++++-------
 sys/dev/pci/ixgbe/ixgbe.c   |  158 +++++++++++++++++++++++++++----------------
 sys/dev/pci/ixgbe/ixgbe.h   |   10 +-
 sys/dev/pci/ixgbe/ixv.c     |  140 ++++++++++++++++++++++++--------------
 4 files changed, 205 insertions(+), 145 deletions(-)

diffs (truncated from 577 to 300 lines):

diff -r 4dd06be731cd -r 10a692564223 sys/dev/pci/ixgbe/ix_txrx.c
--- a/sys/dev/pci/ixgbe/ix_txrx.c       Tue May 08 07:59:56 2018 +0000
+++ b/sys/dev/pci/ixgbe/ix_txrx.c       Tue May 08 09:45:54 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: ix_txrx.c,v 1.41 2018/04/25 08:46:19 msaitoh Exp $ */
+/* $NetBSD: ix_txrx.c,v 1.42 2018/05/08 09:45:54 msaitoh Exp $ */
 
 /******************************************************************************
 
@@ -130,9 +130,10 @@
 int
 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
 {
-       int rc;
        struct mbuf    *m_head;
        struct adapter *adapter = txr->adapter;
+       int enqueued = 0;
+       int rc;
 
        IXGBE_TX_LOCK_ASSERT(txr);
 
@@ -158,6 +159,7 @@
                if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
                        break;
                }
+               enqueued++;
                IFQ_DEQUEUE(&ifp->if_snd, m_head);
                if (rc != 0) {
                        m_freem(m_head);
@@ -167,6 +169,10 @@
                /* Send a copy of the frame to the BPF listener */
                bpf_mtap(ifp, m_head);
        }
+       if (enqueued) {
+               txr->lastsent = time_uptime;
+               txr->sending = true;
+       }
 
        return IXGBE_SUCCESS;
 } /* ixgbe_legacy_start_locked */
@@ -313,6 +319,11 @@
                        break;
        }
 
+       if (enqueued) {
+               txr->lastsent = time_uptime;
+               txr->sending = true;
+       }
+
        if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
                ixgbe_txeof(txr);
 
@@ -537,10 +548,6 @@
        if (m_head->m_flags & M_MCAST)
                ifp->if_omcasts++;
 
-       /* Mark queue as having work */
-       if (txr->busy == 0)
-               txr->busy = 1;
-
        return (0);
 } /* ixgbe_xmit */
 
@@ -666,6 +673,7 @@
        /* Free any existing tx buffers. */
        txbuf = txr->tx_buffers;
        for (int i = 0; i < txr->num_desc; i++, txbuf++) {
+               txr->sending = false;
                if (txbuf->m_head != NULL) {
                        bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
                            0, txbuf->m_head->m_pkthdr.len,
@@ -1126,7 +1134,7 @@
 #endif /* DEV_NETMAP */
 
        if (txr->tx_avail == txr->num_desc) {
-               txr->busy = 0;
+               txr->sending = false;
                return false;
        }
 
@@ -1208,26 +1216,6 @@
        work += txr->num_desc;
        txr->next_to_clean = work;
 
-       /*
-        * Queue Hang detection, we know there's
-        * work outstanding or the first return
-        * would have been taken, so increment busy
-        * if nothing managed to get cleaned, then
-        * in local_timer it will be checked and
-        * marked as HUNG if it exceeds a MAX attempt.
-        */
-       if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
-               ++txr->busy;
-       /*
-        * If anything gets cleaned we reset state to 1,
-        * note this will turn off HUNG if its set.
-        */
-       if (processed)
-               txr->busy = 1;
-
-       if (txr->tx_avail == txr->num_desc)
-               txr->busy = 0;
-
        return ((limit > 0) ? false : true);
 } /* ixgbe_txeof */
 
diff -r 4dd06be731cd -r 10a692564223 sys/dev/pci/ixgbe/ixgbe.c
--- a/sys/dev/pci/ixgbe/ixgbe.c Tue May 08 07:59:56 2018 +0000
+++ b/sys/dev/pci/ixgbe/ixgbe.c Tue May 08 09:45:54 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.c,v 1.149 2018/04/19 07:40:12 msaitoh Exp $ */
+/* $NetBSD: ixgbe.c,v 1.150 2018/05/08 09:45:54 msaitoh Exp $ */
 
 /******************************************************************************
 
@@ -184,6 +184,8 @@
 static void    ixgbe_free_pci_resources(struct adapter *);
 static void    ixgbe_local_timer(void *);
 static void    ixgbe_local_timer1(void *);
+static void    ixgbe_watchdog(struct ifnet *);
+static bool    ixgbe_watchdog_txq(struct ifnet *, struct tx_ring *, bool *);
 static int     ixgbe_setup_interface(device_t, struct adapter *);
 static void    ixgbe_config_gpie(struct adapter *);
 static void    ixgbe_config_dmac(struct adapter *);
@@ -4257,11 +4259,8 @@
 ixgbe_local_timer1(void *arg)
 {
        struct adapter  *adapter = arg;
-       device_t        dev = adapter->dev;
        struct ix_queue *que = adapter->queues;
-       u64             queues = 0;
        u64             v0, v1, v2, v3, v4, v5, v6, v7;
-       int             hung = 0;
        int             i;
 
        KASSERT(mutex_owned(&adapter->core_mtx));
@@ -4298,64 +4297,94 @@
        adapter->enomem_tx_dma_setup.ev_count = v6;
        adapter->tso_err.ev_count = v7;
 
-       /*
-        * Check the TX queues status
-        *      - mark hung queues so we don't schedule on them
-        *      - watchdog only if all queues show hung
-        */
-       que = adapter->queues;
-       for (i = 0; i < adapter->num_queues; i++, que++) {
-               /* Keep track of queues with work for soft irq */
-               if (que->txr->busy)
-                       queues |= ((u64)1 << que->me);
-               /*
-                * Each time txeof runs without cleaning, but there
-                * are uncleaned descriptors it increments busy. If
-                * we get to the MAX we declare it hung.
-                */
-               if (que->busy == IXGBE_QUEUE_HUNG) {
-                       ++hung;
-                       /* Mark the queue as inactive */
-                       adapter->active_queues &= ~((u64)1 << que->me);
-                       continue;
-               } else {
-                       /* Check if we've come back from hung */
-                       if ((adapter->active_queues & ((u64)1 << que->me)) == 0)
-                               adapter->active_queues |= ((u64)1 << que->me);
-               }
-               if (que->busy >= IXGBE_MAX_TX_BUSY) {
-                       device_printf(dev,
-                           "Warning queue %d appears to be hung!\n", i);
-                       que->txr->busy = IXGBE_QUEUE_HUNG;
-                       ++hung;
-               }
-       }
-
-       /* Only truely watchdog if all queues show hung */
-       if (hung == adapter->num_queues)
-               goto watchdog;
-       else if (queues != 0) { /* Force an IRQ on queues with work */
-               que = adapter->queues;
-               for (i = 0; i < adapter->num_queues; i++, que++) {
-                       mutex_enter(&que->dc_mtx);
-                       if (que->disabled_count == 0)
-                               ixgbe_rearm_queues(adapter,
-                                   queues & ((u64)1 << i));
-                       mutex_exit(&que->dc_mtx);
-               }
-       }
+       ixgbe_watchdog(adapter->ifp);
 
 out:
        callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
-       return;
-
-watchdog:
-       device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
-       adapter->ifp->if_flags &= ~IFF_RUNNING;
-       adapter->watchdog_events.ev_count++;
-       ixgbe_init_locked(adapter);
 } /* ixgbe_local_timer */
 
+static void
+ixgbe_watchdog(struct ifnet *ifp)
+{
+       struct adapter  *adapter = ifp->if_softc;
+       struct ix_queue *que;
+       struct tx_ring  *txr;
+       u64             queues = 0;
+       bool            hung = false;
+       bool            sending = false;
+       int             i;
+
+       txr = adapter->tx_rings;
+       for (i = 0; i < adapter->num_queues; i++, txr++) {
+               hung = ixgbe_watchdog_txq(ifp, txr, &sending);
+               if (hung)
+                       break;
+               else if (sending)
+                       queues |= ((u64)1 << txr->me);
+       }
+
+       if (hung) {
+               ifp->if_flags &= ~IFF_RUNNING;
+               ifp->if_oerrors++;
+               adapter->watchdog_events.ev_count++;
+               ixgbe_init_locked(adapter);
+       } else if (queues != 0) {
+               /*
+                * Force an IRQ on queues with work.
+                *
+                * It's supporsed not to be called ixgbe_rearm_queues() if
+                * any chips have no bug. In reality, ixgbe_rearm_queues() is
+                * required on 82599 and newer chip AND other than queue 0 to
+                * prevent device timeout. When it occured, packet was sent but
+                * the descriptor's DD bot wasn't set even though
+                * IXGBE_TXD_CMD_EOP and IXGBE_TXD_CMD_RS were set. After
+                * forcing interrupt by writing EICS register in
+                * ixgbe_rearm_queues(), DD is set. Why? Is this an
+                * undocumented errata? It might be possible not call
+                * rearm_queues on 82598 or queue 0, we call in any cases in
+                * case the problem occurs.
+                */
+               que = adapter->queues;
+               for (i = 0; i < adapter->num_queues; i++, que++) {
+                       u64 index = queues & ((u64)1 << i);
+
+                       mutex_enter(&que->dc_mtx);
+                       if ((index != 0) && (que->disabled_count == 0))
+                               ixgbe_rearm_queues(adapter, index);
+                       mutex_exit(&que->dc_mtx);
+               }
+       }
+}
+
+static bool
+ixgbe_watchdog_txq(struct ifnet *ifp, struct tx_ring *txr, bool *sending)
+{
+       struct adapter *adapter = ifp->if_softc;
+       device_t dev = adapter->dev;
+       bool hung = false;
+       bool more = false;
+
+       IXGBE_TX_LOCK(txr);
+       *sending = txr->sending;
+       if (*sending && ((time_uptime - txr->lastsent) > IXGBE_TX_TIMEOUT)) {
+               /*
+                * Since we're using delayed interrupts, sweep up before we
+                * report an error.
+                */
+               do {
+                       more = ixgbe_txeof(txr);
+               } while (more);
+               hung = true;
+               device_printf(dev,
+                   "Watchdog timeout (queue %d%s)-- resetting\n", txr->me,
+                   (txr->tx_avail == txr->num_desc)
+                   ? ", lost interrupt?" : "");
+       }
+       IXGBE_TX_UNLOCK(txr);
+
+       return hung;
+}
+
 /************************************************************************
  * ixgbe_sfp_probe
  *
@@ -4514,6 +4543,8 @@
        struct ifnet    *ifp;
        struct adapter  *adapter = arg;
        struct ixgbe_hw *hw = &adapter->hw;
+       struct tx_ring  *txr;
+       int i;
 
        ifp = adapter->ifp;
 
@@ -4523,6 +4554,13 @@
        ixgbe_disable_intr(adapter);
        callout_stop(&adapter->timer);
 
+       txr = adapter->tx_rings;
+       for (i = 0; i < adapter->num_queues; i++, txr++) {
Prev by Date: [src/trunk]: src/usr.sbin/cpuctl/arch TGran64 indication was actually the opp...
Next by Date: [src/trunk]: src/sys/nfs Use M_MOVE_PKTHDR.
Previous by Thread: [src/trunk]: src/usr.sbin/cpuctl/arch TGran64 indication was actually the opp...
Next by Thread: [src/trunk]: src/sys/nfs Use M_MOVE_PKTHDR.
Indexes:
Home | Main Index | Thread Index | Old Index