Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/dev/pci/ixgbe Sync ixg(4) up to FreeBSD r230572.



details:   https://anonhg.NetBSD.org/src/rev/c4daed94cf6d
branches:  trunk
changeset: 806923:c4daed94cf6d
user:      msaitoh <msaitoh%NetBSD.org@localhost>
date:      Thu Mar 19 14:22:23 2015 +0000

description:
Sync ixg(4) up to FreeBSD r230572.

 - Fix in the interrupt handler to make sure the stack TX queue is processed.
   (FreeBSD r222588)
 - The maximum read size of incoming packets is done in 1024-byte increments.
   The current code was rounding down the maximum frame size instead of
   routing up, resulting in a read size of 1024 bytes, in the non-jumbo
   frame case, and splitting the packets across multiple mbufs.
   (FreeBSD r225045)
 - Consequently the above problem exposed another issue, which is when
   packets were splitted across multiple mbufs, and all of the mbufs in the
   chain have the M_PKTHDR flag set. (FreeBSD r225045)
 - Use the correct constant for conversion between interrupt rate
   and EITR values (the previous values were off by a factor of 2)
   (FreeBSD r230572)
 - Make dev.ix.N.queueM.interrupt_rate a RW sysctl variable. Changing
   individual values affects the queue immediately, and propagates to all
   interfaces at the next reinit. (FreeBSD r230572)
 - Add dev.ix.N.queueM.irqs rdonly sysctl, to export the actual interrupt
   counts. (FreeBSD r230572)
 - Some netmap related changes.

diffstat:

 sys/dev/pci/ixgbe/ixgbe.c |  245 +++++++++++++++++++++++++++++++++++++++++----
 sys/dev/pci/ixgbe/ixv.c   |   22 +--
 2 files changed, 231 insertions(+), 36 deletions(-)

diffs (truncated from 563 to 300 lines):

diff -r 3a700f87c7c8 -r c4daed94cf6d sys/dev/pci/ixgbe/ixgbe.c
--- a/sys/dev/pci/ixgbe/ixgbe.c Thu Mar 19 12:22:36 2015 +0000
+++ b/sys/dev/pci/ixgbe/ixgbe.c Thu Mar 19 14:22:23 2015 +0000
@@ -1,6 +1,6 @@
 /******************************************************************************
 
-  Copyright (c) 2001-2011, Intel Corporation 
+  Copyright (c) 2001-2013, Intel Corporation 
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without 
@@ -59,9 +59,10 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 /*$FreeBSD: src/sys/dev/ixgbe/ixgbe.c,v 1.51 2011/04/25 23:34:21 jfv Exp $*/
-/*$NetBSD: ixgbe.c,v 1.21 2015/02/24 14:49:28 msaitoh Exp $*/
+/*$NetBSD: ixgbe.c,v 1.22 2015/03/19 14:22:23 msaitoh Exp $*/
 
 #include "opt_inet.h"
+#include "opt_inet6.h"
 
 #include "ixgbe.h"
 
@@ -73,7 +74,7 @@
 /*********************************************************************
  *  Driver version
  *********************************************************************/
-char ixgbe_driver_version[] = "2.3.10";
+char ixgbe_driver_version[] = "2.3.11";
 
 /*********************************************************************
  *  PCI Device ID Table
@@ -269,7 +270,7 @@
 #define TUNABLE_INT(__x, __y)
 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
 
-static int ixgbe_max_interrupt_rate = (8000000 / IXGBE_LOW_LATENCY);
+static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
 
 /* How many packets rxeof tries to clean at a time */
@@ -302,7 +303,7 @@
  * it can be a performance win in some workloads, but
  * in others it actually hurts, its off by default. 
  */
-static bool ixgbe_header_split = FALSE;
+static int ixgbe_header_split = FALSE;
 TUNABLE_INT("hw.ixgbe.hdr_split", &ixgbe_header_split);
 
 #if defined(NETBSD_MSI_OR_MSIX)
@@ -352,6 +353,18 @@
 static int fdir_pballoc = 1;
 #endif
 
+#ifdef DEV_NETMAP
+/*
+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
+ * be a reference on how to implement netmap support in a driver.
+ * Additional comments are in ixgbe_netmap.h .
+ *
+ * <dev/netma/ixgbe_netmap.h> contains functions for netmap support
+ * that extend the standard driver.
+ */
+#include <dev/netmap/ixgbe_netmap.h>
+#endif /* DEV_NETMAP */
+
 /*********************************************************************
  *  Device identification routine
  *
@@ -649,6 +662,9 @@
 
        ixgbe_add_hw_stats(adapter);
 
+#ifdef DEV_NETMAP
+       ixgbe_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
        INIT_DEBUGOUT("ixgbe_attach: end");
        return;
 err_late:
@@ -719,6 +735,9 @@
 
        ether_ifdetach(adapter->ifp);
        callout_halt(&adapter->timer, NULL);
+#ifdef DEV_NETMAP
+       netmap_detach(adapter->ifp);
+#endif /* DEV_NETMAP */
        ixgbe_free_pci_resources(adapter);
 #if 0  /* XXX the NetBSD port is probably missing something here */
        bus_generic_detach(dev);
@@ -1291,6 +1310,31 @@
                                msec_delay(1);
                }
                wmb();
+#ifdef DEV_NETMAP
+               /*
+                * In netmap mode, we must preserve the buffers made
+                * available to userspace before the if_init()
+                * (this is true by default on the TX side, because
+                * init makes all buffers available to userspace).
+                *
+                * netmap_reset() and the device specific routines
+                * (e.g. ixgbe_setup_receive_rings()) map these
+                * buffers at the end of the NIC ring, so here we
+                * must set the RDT (tail) register to make sure
+                * they are not overwritten.
+                *
+                * In this driver the NIC ring starts at RDH = 0,
+                * RDT points to the last slot available for reception (?),
+                * so RDT = num_rx_desc - 1 means the whole ring is available.
+                */
+               if (ifp->if_capenable & IFCAP_NETMAP) {
+                       struct netmap_adapter *na = NA(adapter->ifp);
+                       struct netmap_kring *kring = &na->rx_rings[i];
+                       int t = na->num_rx_desc - 1 - kring->nr_hwavail;
+
+                       IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
+               } else
+#endif /* DEV_NETMAP */
                IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
        }
 
@@ -1536,7 +1580,7 @@
 #if defined(NETBSD_MSI_OR_MSIX)
 /*********************************************************************
  *
- *  MSI Queue Interrupt Service routine
+ *  MSIX Queue Interrupt Service routine
  *
  **********************************************************************/
 void
@@ -1555,6 +1599,17 @@
 
        IXGBE_TX_LOCK(txr);
        more_tx = ixgbe_txeof(txr);
+       /*
+       ** Make certain that if the stack 
+       ** has anything queued the task gets
+       ** scheduled to handle it.
+       */
+#if __FreeBSD_version < 800000
+       if (!IFQ_DRV_IS_EMPTY(&adapter->ifp->if_snd))
+#else
+       if (!drbr_empty(adapter->ifp, txr->br))
+#endif
+               more_tx = 1;
        IXGBE_TX_UNLOCK(txr);
 
        /* Do AIM now? */
@@ -1891,11 +1946,7 @@
        txr->next_avail_desc = i;
 
        txbuf->m_head = m_head;
-       /* We exchange the maps instead of copying because otherwise
-        * we end up with many pointers to the same map and we free
-        * one map twice in ixgbe_free_transmit_structures().  Who
-        * knows what other problems this caused.  --dyoung
-        */
+       /* Swap the dma map between the first and last descriptor */
        txr->tx_buffers[first].map = txbuf->map;
        txbuf->map = map;
        bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
@@ -2497,7 +2548,9 @@
 msi:
                msgs = pci_msi_count(dev);
                if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
-                       device_printf(adapter->dev,"Using MSI interrupt\n");
+                       device_printf(adapter->dev,"Using an MSI interrupt\n");
+       else
+                       device_printf(adapter->dev,"Using a Legacy interrupt\n");
        return (msgs);
 #endif
 }
@@ -2635,7 +2688,6 @@
 
        ifp = adapter->ifp = &ec->ec_if;
        strlcpy(ifp->if_xname, device_xname(dev), IFNAMSIZ);
-       ifp->if_mtu = ETHERMTU;
        ifp->if_baudrate = 1000000000;
        ifp->if_init = ixgbe_init;
        ifp->if_stop = ixgbe_ifstop;
@@ -2665,19 +2717,20 @@
        ifp->if_capenable = 0;
 
        ec->ec_capabilities |= ETHERCAP_VLAN_HWCSUM;
-       ec->ec_capabilities |= ETHERCAP_VLAN_HWTAGGING | ETHERCAP_VLAN_MTU;
        ec->ec_capabilities |= ETHERCAP_JUMBO_MTU;
+       ec->ec_capabilities |= ETHERCAP_VLAN_HWTAGGING
+                           | ETHERCAP_VLAN_MTU;
        ec->ec_capenable = ec->ec_capabilities;
 
        /* Don't enable LRO by default */
        ifp->if_capabilities |= IFCAP_LRO;
 
        /*
-       ** Dont turn this on by default, if vlans are
+       ** Don't turn this on by default, if vlans are
        ** created on another pseudo device (eg. lagg)
        ** then vlan events are not passed thru, breaking
        ** operation, but with HW FILTER off it works. If
-       ** using vlans directly on the em driver you can
+       ** using vlans directly on the ixgbe driver you can
        ** enable this and get full hardware tag filtering.
        */
        ec->ec_capabilities |= ETHERCAP_VLAN_HWFILTER;
@@ -3052,9 +3105,20 @@
        struct adapter *adapter = txr->adapter;
        struct ixgbe_tx_buf *txbuf;
        int i;
+#ifdef DEV_NETMAP
+       struct netmap_adapter *na = NA(adapter->ifp);
+       struct netmap_slot *slot;
+#endif /* DEV_NETMAP */
 
        /* Clear the old ring contents */
        IXGBE_TX_LOCK(txr);
+#ifdef DEV_NETMAP
+       /*
+        * (under lock): if in netmap mode, do some consistency
+        * checks and set slot to entry 0 of the netmap ring.
+        */
+       slot = netmap_reset(na, NR_TX, txr->me, 0);
+#endif /* DEV_NETMAP */
        bzero((void *)txr->tx_base,
              (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
        /* Reset indices */
@@ -3072,6 +3136,25 @@
                        m_freem(txbuf->m_head);
                        txbuf->m_head = NULL;
                }
+#ifdef DEV_NETMAP
+               /*
+                * In netmap mode, set the map for the packet buffer.
+                * NOTE: Some drivers (not this one) also need to set
+                * the physical buffer address in the NIC ring.
+                * Slots in the netmap ring (indexed by "si") are
+                * kring->nkr_hwofs positions "ahead" wrt the
+                * corresponding slot in the NIC ring. In some drivers
+                * (not here) nkr_hwofs can be negative. When computing
+                * si = i + kring->nkr_hwofs make sure to handle wraparounds.
+                */
+               if (slot) {
+                       int si = i + na->tx_rings[txr->me].nkr_hwofs;
+
+                       if (si >= na->num_tx_desc)
+                               si -= na->num_tx_desc;
+                       netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
+               }
+#endif /* DEV_NETMAP */
                /* Clear the EOP index */
                txbuf->eop_index = -1;
         }
@@ -3549,6 +3632,48 @@
 
        KASSERT(mutex_owned(&txr->tx_mtx));
 
+#ifdef DEV_NETMAP
+       if (ifp->if_capenable & IFCAP_NETMAP) {
+               struct netmap_adapter *na = NA(ifp);
+               struct netmap_kring *kring = &na->tx_rings[txr->me];
+
+               tx_desc = (struct ixgbe_legacy_tx_desc *)txr->tx_base;
+
+               bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
+                   BUS_DMASYNC_POSTREAD);
+               /*
+                * In netmap mode, all the work is done in the context
+                * of the client thread. Interrupt handlers only wake up
+                * clients, which may be sleeping on individual rings
+                * or on a global resource for all rings.
+                * To implement tx interrupt mitigation, we wake up the client
+                * thread roughly every half ring, even if the NIC interrupts
+                * more frequently. This is implemented as follows:
+                * - ixgbe_txsync() sets kring->nr_kflags with the index of
+                *   the slot that should wake up the thread (nkr_num_slots
+                *   means the user thread should not be woken up);
+                * - the driver ignores tx interrupts unless netmap_mitigate=0
+                *   or the slot has the DD bit set.
+                *
+                * When the driver has separate locks, we need to
+                * release and re-acquire txlock to avoid deadlocks.
+                * XXX see if we can find a better way.
+                */
+               if (!netmap_mitigate ||
+                   (kring->nr_kflags < kring->nkr_num_slots &&
+                    tx_desc[kring->nr_kflags].upper.fields.status & IXGBE_TXD_STAT_DD)) {
+                       kring->nr_kflags = kring->nkr_num_slots;
+                       selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
+                       IXGBE_TX_UNLOCK(txr);
+                       IXGBE_CORE_LOCK(adapter);
+                       selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET);
+                       IXGBE_CORE_UNLOCK(adapter);
+                       IXGBE_TX_LOCK(txr);
+               }
+               return FALSE;
+       }
+#endif /* DEV_NETMAP */
+
        if (txr->tx_avail == adapter->num_tx_desc) {
                txr->queue_status = IXGBE_QUEUE_IDLE;
                return false;



Home | Main Index | Thread Index | Old Index