Subject: ipv6 tx checksum offloading
To: None <tech-net@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-net
Date: 08/06/2005 20:51:05
--NextPart-20050806202421-0170800
Content-Type: Text/Plain; charset=us-ascii

hi,

the attached diffs implement ipv6 tx checksum offloading.
can anyone familiar with ipv6 review?  thanks.

YAMAMOTO Takashi

--NextPart-20050806202421-0170800
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="a.diff"

Index: netinet/tcp_output.c
===================================================================
--- netinet/tcp_output.c	(revision 1285)
+++ netinet/tcp_output.c	(revision 1286)
@@ -1344,27 +1344,14 @@ send:
 #endif
 #ifdef INET6
 	case AF_INET6:
-		/*
-		 * XXX Actually delaying the checksum is Hard
-		 * XXX (well, maybe not for Itojun, but it is
-		 * XXX for me), but we can still take advantage
-		 * XXX of the cached pseudo-header checksum.
-		 */
-		/* equals to hdrlen + len */
-		m->m_pkthdr.len = sizeof(struct ip6_hdr)
-			+ sizeof(struct tcphdr) + optlen + len;
-#ifdef notyet
-		m->m_pkthdr.csum_flags = M_CSUM_TCPv6;
 		m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
-#endif
+		m->m_pkthdr.csum_flags = M_CSUM_TCPv6;
 		if (len + optlen) {
 			/* Fixup the pseudo-header checksum. */
 			/* XXXJRT: Not IPv6 Jumbogram safe. */
 			th->th_sum = in_cksum_addword(th->th_sum,
 			    htons((u_int16_t) (len + optlen)));
 		}
-		th->th_sum = in6_cksum(m, 0, sizeof(struct ip6_hdr),
-		    sizeof(struct tcphdr) + optlen + len);
 		break;
 #endif
 	}
Index: netinet6/ip6_output.c
===================================================================
--- netinet6/ip6_output.c	(revision 1285)
+++ netinet6/ip6_output.c	(revision 1286)
@@ -167,6 +167,8 @@ ip6_output(m0, opt, ro, flags, im6o, so,
 	ip6 = mtod(m, struct ip6_hdr *);
 #endif /* IPSEC */
 
+	M_CSUM_DATA_IPv6_HL_SET(m->m_pkthdr.csum_data, sizeof(struct ip6_hdr));
+
 #define MAKE_EXTHDR(hp, mp)						\
     do {								\
 	if (hp) {							\
@@ -248,6 +250,12 @@ ip6_output(m0, opt, ro, flags, im6o, so,
   skippolicycheck:;
 #endif /* IPSEC */
 
+	if (needipsec &&
+	    (m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
+		in6_delayed_cksum(m);
+		m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
+	}
+
 	/*
 	 * Calculate the total length of the extension header chain.
 	 * Keep the length of the unfragmentable part for fragmentation.
@@ -294,6 +302,7 @@ ip6_output(m0, opt, ro, flags, im6o, so,
 		ip6 = mtod(m, struct ip6_hdr *);
 		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
 			goto freehdrs;
+		optlen += 8; /* XXX JUMBOOPTLEN */
 		ip6->ip6_plen = 0;
 	} else
 		ip6->ip6_plen = htons(plen);
@@ -353,6 +362,9 @@ ip6_output(m0, opt, ro, flags, im6o, so,
 		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
 		    IPPROTO_ROUTING);
 
+		M_CSUM_DATA_IPv6_HL_SET(m->m_pkthdr.csum_data,
+		    sizeof(struct ip6_hdr) + optlen);
+
 #ifdef IPSEC
 		if (!needipsec)
 			goto skip_ipsec2;
@@ -868,6 +880,7 @@ skip_ipsec2:;
 	 */
 	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
 		struct in6_ifaddr *ia6;
+		int sw_csum;
 
 		ip6 = mtod(m, struct ip6_hdr *);
 		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
@@ -879,6 +892,13 @@ skip_ipsec2:;
 		/* clean ipsec history once it goes out of the node */
 		ipsec_delaux(m);
 #endif
+
+		sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;
+		if ((sw_csum & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
+			in6_delayed_cksum(m);
+			m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
+		}
+
 		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
 		goto done;
 	}
@@ -947,6 +967,12 @@ skip_ipsec2:;
 			ip6->ip6_nxt = IPPROTO_FRAGMENT;
 		}
 
+		if ((m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6))
+		    != 0) {
+			in6_delayed_cksum(m);
+			m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
+		}
+
 		/*
 		 * Loop through length of segment after first fragment,
 		 * make new header and copy data of each part and link onto
@@ -1089,6 +1115,33 @@ ip6_copyexthdr(mp, hdr, hlen)
 
 	*mp = m;
 	return (0);
+}
+
+/*
+ * Process a delayed payload checksum calculation.
+ */
+void
+in6_delayed_cksum(struct mbuf *m)
+{
+	uint16_t csum, offset;
+
+	KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0);
+	KASSERT((~m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0);
+	KASSERT((m->m_pkthdr.csum_flags
+	    & (M_CSUM_UDPv4|M_CSUM_TCPv4|M_CSUM_TSOv4)) == 0);
+
+	offset = M_CSUM_DATA_IPv6_HL(m->m_pkthdr.csum_data);
+	csum = in6_cksum(m, 0, offset, m->m_pkthdr.len - offset);
+	if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv6) != 0) {
+		csum = 0xffff;
+	}
+
+	offset += M_CSUM_DATA_IPv6_OFFSET(m->m_pkthdr.csum_data);
+	if ((offset + sizeof(csum)) > m->m_len) {
+		m_copyback(m, offset, sizeof(csum), &csum);
+	} else {
+		*(uint16_t *)(mtod(m, caddr_t) + offset) = csum;
+	}
 }
 
 /*
Index: netinet6/udp6_output.c
===================================================================
--- netinet6/udp6_output.c	(revision 1285)
+++ netinet6/udp6_output.c	(revision 1286)
@@ -330,10 +330,10 @@ udp6_output(in6p, m, addr6, control, p)
 		ip6->ip6_src	= *laddr;
 		ip6->ip6_dst	= *faddr;
 
-		if ((udp6->uh_sum = in6_cksum(m, IPPROTO_UDP,
-		    sizeof(struct ip6_hdr), plen)) == 0) {
-			udp6->uh_sum = 0xffff;
-		}
+		udp6->uh_sum = in6_cksum_phdr(laddr, faddr,
+		    htonl(plen), htonl(IPPROTO_UDP));
+		m->m_pkthdr.csum_flags = M_CSUM_UDPv6;
+		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 
 		if (in6p->in6p_flags & IN6P_MINMTU)
 			flags |= IPV6_MINMTU;
Index: netinet6/in6.h
===================================================================
--- netinet6/in6.h	(revision 1285)
+++ netinet6/in6.h	(revision 1286)
@@ -647,6 +647,7 @@ in6_cksum_phdr(const struct in6_addr *sr
 }
 
 int	in6_cksum __P((struct mbuf *, u_int8_t, u_int32_t, u_int32_t));
+void	in6_delayed_cksum __P((struct mbuf *));
 int	in6_localaddr __P((struct in6_addr *));
 int	in6_addrscope __P((struct in6_addr *));
 struct	in6_ifaddr *in6_ifawithscope __P((struct ifnet *, struct in6_addr *));
Index: sys/mbuf.h
===================================================================
--- sys/mbuf.h	(revision 1285)
+++ sys/mbuf.h	(revision 1286)
@@ -189,6 +189,19 @@ struct	pkthdr {
 #define	M_CSUM_DATA_IPv4_OFFSET(x)	((x) & 0xffff)
 
 /*
+ * Macros for M_CSUM_TCPv6 and M_CSUM_UDPv6
+ *
+ * M_CSUM_DATA_IPv6_HL: length of ip6_hdr + ext header.
+ * ie. offset of UDP/TCP header in the packet.
+ *
+ * M_CSUM_DATA_IPv6_OFFSET: offset of the checksum field in UDP/TCP header. 
+ */
+
+#define	M_CSUM_DATA_IPv6_HL(x)		((x) >> 16)
+#define	M_CSUM_DATA_IPv6_HL_SET(x, v)	(x) = ((x) & 0xffff) | ((v) << 16)
+#define	M_CSUM_DATA_IPv6_OFFSET(x)	((x) & 0xffff)
+
+/*
  * Max # of pages we can attach to m_ext.  This is carefully chosen
  * to be able to handle SOSEND_LOAN_CHUNK with our minimum sized page.
  */

--NextPart-20050806202421-0170800
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="b.diff"

Index: dev/pci/if_wm.c
===================================================================
--- dev/pci/if_wm.c	(revision 1286)
+++ dev/pci/if_wm.c	(revision 1287)
@@ -276,6 +276,7 @@ struct wm_softc {
 	struct evcnt sc_ev_rxtusum;	/* TCP/UDP cksums checked in-bound */
 	struct evcnt sc_ev_txipsum;	/* IP checksums comp. out-bound */
 	struct evcnt sc_ev_txtusum;	/* TCP/UDP cksums comp. out-bound */
+	struct evcnt sc_ev_txtusum6;	/* TCP/UDP v6 cksums comp. out-bound */
 	struct evcnt sc_ev_txtso;	/* TCP seg offload out-bound */
 	struct evcnt sc_ev_txtsopain;	/* painful header manip. for TSO */
 
@@ -1220,7 +1221,9 @@ wm_attach(struct device *parent, struct 
 		ifp->if_capabilities |=
 		    IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_IPv4_Rx |
 		    IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv4_Rx |
-		    IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv4_Rx;
+		    IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv4_Rx |
+		    IFCAP_CSUM_TCPv6_Tx |
+		    IFCAP_CSUM_UDPv6_Tx;
 
 	/* 
 	 * If we're a i82544 or greater (except i82547), we can do
@@ -1264,6 +1267,8 @@ wm_attach(struct device *parent, struct 
 	    NULL, sc->sc_dev.dv_xname, "txipsum");
 	evcnt_attach_dynamic(&sc->sc_ev_txtusum, EVCNT_TYPE_MISC,
 	    NULL, sc->sc_dev.dv_xname, "txtusum");
+	evcnt_attach_dynamic(&sc->sc_ev_txtusum6, EVCNT_TYPE_MISC,
+	    NULL, sc->sc_dev.dv_xname, "txtusum6");
 
 	evcnt_attach_dynamic(&sc->sc_ev_txtso, EVCNT_TYPE_MISC,
 	    NULL, sc->sc_dev.dv_xname, "txtso");
@@ -1369,6 +1374,7 @@ wm_tx_offload(struct wm_softc *sc, struc
 	eh = mtod(m0, struct ether_header *);
 	switch (htons(eh->ether_type)) {
 	case ETHERTYPE_IP:
+	case ETHERTYPE_IPV6:
 		offset = ETHER_HDR_LEN;
 		break;
 
@@ -1385,7 +1391,12 @@ wm_tx_offload(struct wm_softc *sc, struc
 		return (0);
 	}
 
-	iphl = M_CSUM_DATA_IPv4_IPHL(m0->m_pkthdr.csum_data);
+	if ((m0->m_pkthdr.csum_flags &
+	    (M_CSUM_TSOv4|M_CSUM_UDPv4|M_CSUM_TCPv4)) != 0) {
+		iphl = M_CSUM_DATA_IPv4_IPHL(m0->m_pkthdr.csum_data);
+	} else {
+		iphl = M_CSUM_DATA_IPv6_HL(m0->m_pkthdr.csum_data);
+	}
 
 	cmd = WTX_CMD_DEXT | WTX_DTYP_D;
 	cmdlen = WTX_CMD_DEXT | WTX_DTYP_C | WTX_CMD_IDE;
@@ -1467,8 +1478,17 @@ wm_tx_offload(struct wm_softc *sc, struc
 		WM_EVCNT_INCR(&sc->sc_ev_txtusum);
 		fields |= WTX_TXSM;
 		tucs = WTX_TCPIP_TUCSS(offset) |
-		   WTX_TCPIP_TUCSO(offset + M_CSUM_DATA_IPv4_OFFSET(m0->m_pkthdr.csum_data)) |
-		   WTX_TCPIP_TUCSE(0) /* rest of packet */;
+		    WTX_TCPIP_TUCSO(offset +
+		    M_CSUM_DATA_IPv4_OFFSET(m0->m_pkthdr.csum_data)) |
+		    WTX_TCPIP_TUCSE(0) /* rest of packet */;
+	} else if ((m0->m_pkthdr.csum_flags &
+	    (M_CSUM_TCPv6|M_CSUM_UDPv6)) != 0) {
+		WM_EVCNT_INCR(&sc->sc_ev_txtusum6);
+		fields |= WTX_TXSM;
+		tucs = WTX_TCPIP_TUCSS(offset) |
+		    WTX_TCPIP_TUCSO(offset +
+		    M_CSUM_DATA_IPv6_OFFSET(m0->m_pkthdr.csum_data)) |
+		    WTX_TCPIP_TUCSE(0) /* rest of packet */;
 	} else {
 		/* Just initialize it to a valid TCP context. */
 		tucs = WTX_TCPIP_TUCSS(offset) |
@@ -1779,7 +1799,8 @@ wm_start(struct ifnet *ifp)
 
 		/* Set up offload parameters for this packet. */
 		if (m0->m_pkthdr.csum_flags &
-		    (M_CSUM_IPv4|M_CSUM_TCPv4|M_CSUM_UDPv4)) {
+		    (M_CSUM_IPv4|M_CSUM_TCPv4|M_CSUM_UDPv4|
+		    M_CSUM_TCPv6|M_CSUM_UDPv6)) {
 			if (wm_tx_offload(sc, txs, &cksumcmd,
 					  &cksumfields) != 0) {
 				/* Error message already displayed. */

--NextPart-20050806202421-0170800--