Subject: Re: nfe(4) hardware checksum support
To: None <jmarin@embedtronics.fi>
From: Izumi Tsutsui <tsutsui@ceres.dti.ne.jp>
List: tech-kern
Date: 01/26/2007 00:01:26
jmarin@embedtronics.fi wrote:

> There seems to be no problem under regular use - but when I try to edit
> video files located on a remote machine, the problems begin.

How about the attached patch?
(though I can't test it on so heavy load)
---
Izumi Tsutsui


Index: if_nfe.c
===================================================================
RCS file: /cvsroot/src/sys/dev/pci/if_nfe.c,v
retrieving revision 1.13
diff -u -r1.13 if_nfe.c
--- if_nfe.c	9 Jan 2007 10:29:27 -0000	1.13
+++ if_nfe.c	25 Jan 2007 14:59:48 -0000
@@ -490,36 +490,50 @@
 	struct nfe_softc *sc = arg;
 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
 	uint32_t r;
+	int handled;
 
-	if ((r = NFE_READ(sc, NFE_IRQ_STATUS)) == 0)
-		return 0;	/* not for us */
-	NFE_WRITE(sc, NFE_IRQ_STATUS, r);
+	if ((ifp->if_flags & IFF_UP) == 0)
+		return 0;
 
-	DPRINTFN(5, ("nfe_intr: interrupt register %x\n", r));
+	handled = 0;
 
 	NFE_WRITE(sc, NFE_IRQ_MASK, 0);
 
-	if (r & NFE_IRQ_LINK) {
-		NFE_READ(sc, NFE_PHY_STATUS);
-		NFE_WRITE(sc, NFE_PHY_STATUS, 0xf);
-		DPRINTF(("%s: link state changed\n", sc->sc_dev.dv_xname));
-	}
-
-	if (ifp->if_flags & IFF_RUNNING) {
-		/* check Rx ring */
-		nfe_rxeof(sc);
+	for (;;) {
+		r = NFE_READ(sc, NFE_IRQ_STATUS);
+		if ((r & NFE_IRQ_WANTED) == 0)
+			break;
 
-		/* check Tx ring */
-		nfe_txeof(sc);
+		NFE_WRITE(sc, NFE_IRQ_STATUS, r);
+		handled = 1;
+		DPRINTFN(5, ("nfe_intr: interrupt register %x\n", r));
+
+		if ((r & (NFE_IRQ_RXERR | NFE_IRQ_RX_NOBUF | NFE_IRQ_RX))
+		    != 0) {
+			/* check Rx ring */
+			nfe_rxeof(sc);
+		}
+
+		if ((r & (NFE_IRQ_TXERR | NFE_IRQ_TXERR2 | NFE_IRQ_TX_DONE))
+		    != 0) {
+			/* check Tx ring */
+			nfe_txeof(sc);
+		}
+
+		if ((r & NFE_IRQ_LINK) != 0) {
+			NFE_READ(sc, NFE_PHY_STATUS);
+			NFE_WRITE(sc, NFE_PHY_STATUS, 0xf);
+			DPRINTF(("%s: link state changed\n",
+			    sc->sc_dev.dv_xname));
+		}
 	}
 
 	NFE_WRITE(sc, NFE_IRQ_MASK, NFE_IRQ_WANTED);
 
-	if (ifp->if_flags & IFF_RUNNING &&
-	    !IF_IS_EMPTY(&ifp->if_snd))
+	if (handled && !IF_IS_EMPTY(&ifp->if_snd))
 		nfe_start(ifp);
 
-	return 1;
+	return handled;
 }
 
 int
@@ -692,32 +706,34 @@
 	struct mbuf *m, *mnew;
 	bus_addr_t physaddr;
 	uint16_t flags;
-	int error, len;
+	int error, len, i;
 
 	desc32 = NULL;
 	desc64 = NULL;
-	for (;;) {
-		data = &sc->rxq.data[sc->rxq.cur];
+	for (i = sc->rxq.cur;; i = NFE_RX_NEXTDESC(i)) {
+		data = &sc->rxq.data[i];
 
 		if (sc->sc_flags & NFE_40BIT_ADDR) {
-			desc64 = &sc->rxq.desc64[sc->rxq.cur];
-			nfe_rxdesc64_sync(sc, desc64, BUS_DMASYNC_POSTREAD);
+			desc64 = &sc->rxq.desc64[i];
+			nfe_rxdesc64_sync(sc, desc64,
+			    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
 
 			flags = le16toh(desc64->flags);
 			len = le16toh(desc64->length) & 0x3fff;
 		} else {
-			desc32 = &sc->rxq.desc32[sc->rxq.cur];
-			nfe_rxdesc32_sync(sc, desc32, BUS_DMASYNC_POSTREAD);
+			desc32 = &sc->rxq.desc32[i];
+			nfe_rxdesc32_sync(sc, desc32,
+			    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
 
 			flags = le16toh(desc32->flags);
 			len = le16toh(desc32->length) & 0x3fff;
 		}
 
-		if (flags & NFE_RX_READY)
+		if ((flags & NFE_RX_READY) != 0)
 			break;
 
 		if ((sc->sc_flags & (NFE_JUMBO_SUP | NFE_40BIT_ADDR)) == 0) {
-			if (!(flags & NFE_RX_VALID_V1))
+			if ((flags & NFE_RX_VALID_V1) == 0)
 				goto skip;
 
 			if ((flags & NFE_RX_FIXME_V1) == NFE_RX_FIXME_V1) {
@@ -725,7 +741,7 @@
 				len--;	/* fix buffer length */
 			}
 		} else {
-			if (!(flags & NFE_RX_VALID_V2))
+			if ((flags & NFE_RX_VALID_V2) == 0)
 				goto skip;
 
 			if ((flags & NFE_RX_FIXME_V2) == NFE_RX_FIXME_V2) {
@@ -767,7 +783,7 @@
 			physaddr = jbuf->physaddr;
 		} else {
 			MCLGET(mnew, M_DONTWAIT);
-			if (!(mnew->m_flags & M_EXT)) {
+			if ((mnew->m_flags & M_EXT) == 0) {
 				m_freem(mnew);
 				ifp->if_ierrors++;
 				goto skip;
@@ -777,15 +793,14 @@
 			    data->map->dm_mapsize, BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(sc->sc_dmat, data->map);
 
-			error = bus_dmamap_load(sc->sc_dmat, data->map,
-			    mtod(mnew, void *), MCLBYTES, NULL,
-			    BUS_DMA_READ | BUS_DMA_NOWAIT);
+			error = bus_dmamap_load_mbuf(sc->sc_dmat, data->map,
+			    mnew, BUS_DMA_READ | BUS_DMA_NOWAIT);
 			if (error != 0) {
 				m_freem(mnew);
 
 				/* try to reload the old mbuf */
-				error = bus_dmamap_load(sc->sc_dmat, data->map,
-				    mtod(data->m, void *), MCLBYTES, NULL,
+				error = bus_dmamap_load_mbuf(sc->sc_dmat,
+				    data->map, data->m,
 				    BUS_DMA_READ | BUS_DMA_NOWAIT);
 				if (error != 0) {
 					/* very unlikely that it will fail.. */
@@ -852,20 +867,23 @@
 			desc32->physaddr = htole32(physaddr);
 		}
 
-skip:		if (sc->sc_flags & NFE_40BIT_ADDR) {
+ skip:
+		if (sc->sc_flags & NFE_40BIT_ADDR) {
 			desc64->length = htole16(sc->rxq.bufsz);
 			desc64->flags = htole16(NFE_RX_READY);
 
-			nfe_rxdesc64_sync(sc, desc64, BUS_DMASYNC_PREWRITE);
+			nfe_rxdesc64_sync(sc, desc64,
+			    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
 		} else {
 			desc32->length = htole16(sc->rxq.bufsz);
 			desc32->flags = htole16(NFE_RX_READY);
 
-			nfe_rxdesc32_sync(sc, desc32, BUS_DMASYNC_PREWRITE);
+			nfe_rxdesc32_sync(sc, desc32,
+			    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
 		}
-
-		sc->rxq.cur = (sc->rxq.cur + 1) % NFE_RX_RING_COUNT;
 	}
+	/* update current RX pointer */
+	sc->rxq.cur = i;
 }
 
 void
@@ -875,29 +893,35 @@
 	struct nfe_desc32 *desc32;
 	struct nfe_desc64 *desc64;
 	struct nfe_tx_data *data = NULL;
+	int i;
 	uint16_t flags;
 
-	while (sc->txq.next != sc->txq.cur) {
+	for (i = sc->txq.next;
+	    sc->txq.queued > 0;
+	    i = NFE_TX_NEXTDESC(i), sc->txq.queued--) {
 		if (sc->sc_flags & NFE_40BIT_ADDR) {
-			desc64 = &sc->txq.desc64[sc->txq.next];
-			nfe_txdesc64_sync(sc, desc64, BUS_DMASYNC_POSTREAD);
+			desc64 = &sc->txq.desc64[i];
+			nfe_txdesc64_sync(sc, desc64,
+			    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
 
 			flags = le16toh(desc64->flags);
 		} else {
-			desc32 = &sc->txq.desc32[sc->txq.next];
-			nfe_txdesc32_sync(sc, desc32, BUS_DMASYNC_POSTREAD);
+			desc32 = &sc->txq.desc32[i];
+			nfe_txdesc32_sync(sc, desc32,
+			    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
 
 			flags = le16toh(desc32->flags);
 		}
 
-		if (flags & NFE_TX_VALID)
+		if ((flags & NFE_TX_VALID) != 0)
 			break;
 
-		data = &sc->txq.data[sc->txq.next];
+		data = &sc->txq.data[i];
 
 		if ((sc->sc_flags & (NFE_JUMBO_SUP | NFE_40BIT_ADDR)) == 0) {
-			if (!(flags & NFE_TX_LASTFRAG_V1) && data->m == NULL)
-				goto skip;
+			if ((flags & NFE_TX_LASTFRAG_V1) == 0 &&
+			    data->m == NULL)
+				continue;
 
 			if ((flags & NFE_TX_ERROR_V1) != 0) {
 				printf("%s: tx v1 error 0x%04x\n",
@@ -906,8 +930,9 @@
 			} else
 				ifp->if_opackets++;
 		} else {
-			if (!(flags & NFE_TX_LASTFRAG_V2) && data->m == NULL)
-				goto skip;
+			if ((flags & NFE_TX_LASTFRAG_V2) == 0 &&
+			    data->m == NULL)
+				continue;
 
 			if ((flags & NFE_TX_ERROR_V2) != 0) {
 				printf("%s: tx v2 error 0x%04x\n",
@@ -920,7 +945,7 @@
 		if (data->m == NULL) {	/* should not get there */
 			printf("%s: last fragment bit w/o associated mbuf!\n",
 			    sc->sc_dev.dv_xname);
-			goto skip;
+			continue;
 		}
 
 		/* last fragment of the mbuf chain transmitted */
@@ -929,16 +954,18 @@
 		bus_dmamap_unload(sc->sc_dmat, data->active);
 		m_freem(data->m);
 		data->m = NULL;
+	}
 
-		ifp->if_timer = 0;
+	sc->txq.next = i;
 
-skip:		sc->txq.queued--;
-		sc->txq.next = (sc->txq.next + 1) % NFE_TX_RING_COUNT;
+	if (sc->txq.queued < NFE_TX_RING_COUNT) {
+		/* at least one slot freed */
+		ifp->if_flags &= ~IFF_OACTIVE;
 	}
 
-	if (data != NULL) {	/* at least one slot freed */
-		ifp->if_flags &= ~IFF_OACTIVE;
-		nfe_start(ifp);
+	if (sc->txq.queued == 0) {
+		/* all queued packets are sent */
+		ifp->if_timer = 0;
 	}
 }
 
@@ -1019,7 +1046,7 @@
 		flags |= NFE_TX_VALID;
 
 		sc->txq.queued++;
-		sc->txq.cur = (sc->txq.cur + 1) % NFE_TX_RING_COUNT;
+		sc->txq.cur = NFE_TX_NEXTDESC(sc->txq.cur);
 	}
 
 	/* the whole mbuf chain has been setup */
@@ -1064,7 +1091,7 @@
 nfe_start(struct ifnet *ifp)
 {
 	struct nfe_softc *sc = ifp->if_softc;
-	int old = sc->txq.cur;
+	int old = sc->txq.queued;
 	struct mbuf *m0;
 
 	for (;;) {
@@ -1085,21 +1112,23 @@
 			bpf_mtap(ifp->if_bpf, m0);
 #endif
 	}
-	if (sc->txq.cur == old)	/* nothing sent */
-		return;
 
-	if (sc->sc_flags & NFE_40BIT_ADDR)
-		nfe_txdesc64_rsync(sc, old, sc->txq.cur, BUS_DMASYNC_PREWRITE);
-	else
-		nfe_txdesc32_rsync(sc, old, sc->txq.cur, BUS_DMASYNC_PREWRITE);
-
-	/* kick Tx */
-	NFE_WRITE(sc, NFE_RXTX_CTL, NFE_RXTX_KICKTX | sc->rxtxctl);
+	if (sc->txq.queued != old) {
+		/* packets are queued */
+		if (sc->sc_flags & NFE_40BIT_ADDR)
+			nfe_txdesc64_rsync(sc, old, sc->txq.cur,
+			    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
+		else
+			nfe_txdesc32_rsync(sc, old, sc->txq.cur,
+			    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
+		/* kick Tx */
+		NFE_WRITE(sc, NFE_RXTX_CTL, NFE_RXTX_KICKTX | sc->rxtxctl);
 
-	/*
-	 * Set a timeout in case the chip goes out to lunch.
-	 */
-	ifp->if_timer = 5;
+		/*
+		 * Set a timeout in case the chip goes out to lunch.
+		 */
+		ifp->if_timer = 5;
+	}
 }
 
 void
Index: if_nfereg.h
===================================================================
RCS file: /cvsroot/src/sys/dev/pci/if_nfereg.h,v
retrieving revision 1.3
diff -u -r1.3 if_nfereg.h
--- if_nfereg.h	9 Jan 2007 10:29:27 -0000	1.3
+++ if_nfereg.h	25 Jan 2007 14:59:48 -0000
@@ -22,6 +22,9 @@
 #define NFE_RX_RING_COUNT	128
 #define NFE_TX_RING_COUNT	64
 
+#define NFE_RX_NEXTDESC(x)	(((x) + 1) & (NFE_RX_RING_COUNT - 1))
+#define NFE_TX_NEXTDESC(x)	(((x) + 1) & (NFE_TX_RING_COUNT - 1))
+
 #define	ETHER_ALIGN		2
 #define NFE_JBYTES		(ETHER_MAX_LEN_JUMBO + ETHER_ALIGN)
 #define NFE_JPOOL_COUNT		(NFE_RX_RING_COUNT + 64)