Subject: patch: handle shared/read-only ipv6+icmp6 mbuf storage
To: None <tech-net@netbsd.org>
From: David Young <dyoung@pobox.com>
List: tech-net
Date: 03/30/2006 14:37:03
--a8Wt8u1KmwUX3Y2C
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

I noticed that an AP would send malformed ICMPv6 Echo replies to a client
station that pinged the all-hosts multicast address, ping6 ff02::1%rtw0.
As it turns out, the IPv6 stack was scribbling all over shared mbufs
produced by the AP internal bridge.

The attached patch makes the IPv6 stack copy IPv6 & ICMPv6 headers
in shared/read-only mbufs, such as mbufs shallow-copied by net80211's
internal bridge, before overwriting them.  I want to get some review
before I check these in.

While I was in icmp6_input(), I rewrote the code that copies a shared mbuf
before feeding it to icmp6_reflect() and the sockets.  The old code was
complicated, wrong, and efficient.  I believe the new code is more simple
and more correct, but it may sometimes copy more bytes than necessary.

I am afraid the kernel may be sprinkled throughout with unsafe uses of
mtod() and m_data.  Just off the top of my head, it seems like we could
benefit from a const version of mtod(), constification of mbufs, and
a richer set of mbuf accessors.  Maybe we can borrow/invent a Coverity
model that groks shared/read-only mbufs?

Dave

-- 
David Young             OJC Technologies
dyoung@ojctech.com      Urbana, IL * (217) 278-3933

--a8Wt8u1KmwUX3Y2C
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=ip6-icmp-echo-patch

Index: icmp6.c
===================================================================
RCS file: /cvsroot/src/sys/netinet6/icmp6.c,v
retrieving revision 1.115
diff -u -p -u -p -r1.115 icmp6.c
--- icmp6.c	5 Mar 2006 23:47:08 -0000	1.115
+++ icmp6.c	30 Mar 2006 19:27:16 -0000
@@ -590,72 +590,48 @@ icmp6_input(mp, offp, proto)
 		 * Copy mbuf to send to two data paths: userland socket(s),
 		 * and to the querier (echo reply).
 		 * m: a copy for socket, n: a copy for querier
+		 *
+		 * If the first mbuf is shared, or the first mbuf is too short,
+		 * copy the first part of the data into a fresh mbuf.
+		 * Otherwise, we will wrongly overwrite both copies.
 		 */
 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
 			/* Give up local */
 			n = m;
 			m = NULL;
-			goto deliverecho;
-		}
-		/*
-		 * If the first mbuf is shared, or the first mbuf is too short,
-		 * copy the first part of the data into a fresh mbuf.
-		 * Otherwise, we will wrongly overwrite both copies.
-		 */
-		if ((n->m_flags & M_EXT) != 0 ||
+		} else if (M_READONLY(n) ||
 		    n->m_len < off + sizeof(struct icmp6_hdr)) {
-			struct mbuf *n0 = n;
-			const int maxlen = sizeof(*nip6) + sizeof(*nicmp6);
-
-			/*
-			 * Prepare an internal mbuf.  m_pullup() doesn't
-			 * always copy the length we specified.
-			 */
-			if (maxlen >= MCLBYTES) {
+			if (off + sizeof(struct icmp6_hdr) <= MHLEN)
+				n = m_pullup(n, off +sizeof(struct icmp6_hdr));
+			else if (n->m_pkthdr.len >= MCLBYTES) {
 				/* Give up remote */
-				m_freem(n0);
+				m_freem(n);
 				break;
-			}
-			MGETHDR(n, M_DONTWAIT, n0->m_type);
-			if (n && maxlen >= MHLEN) {
-				MCLGET(n, M_DONTWAIT);
-				if ((n->m_flags & M_EXT) == 0) {
-					m_free(n);
-					n = NULL;
+			} else {
+				struct mbuf *n0 = n;
+				if (n0->m_pkthdr.len > MHLEN) {
+					n = m_getcl(M_DONTWAIT, n0->m_type,
+					    n0->m_flags);
+				} else
+					MGETHDR(n, M_DONTWAIT, n0->m_type);
+
+				if (n != NULL) {
+					M_MOVE_PKTHDR(n, n0);
+					m_copydata(n0, 0, n0->m_pkthdr.len,
+					    mtod(n, caddr_t));
 				}
+				m_freem(n0);
 			}
 			if (n == NULL) {
 				/* Give up local */
-				m_freem(n0);
 				n = m;
 				m = NULL;
-				goto deliverecho;
 			}
-			M_MOVE_PKTHDR(n, n0);
-			/*
-			 * Copy IPv6 and ICMPv6 only.
-			 */
-			nip6 = mtod(n, struct ip6_hdr *);
-			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
-			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
-			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
-			noff = sizeof(struct ip6_hdr);
-			n->m_len = noff + sizeof(struct icmp6_hdr);
-			/*
-			 * Adjust mbuf.  ip6_plen will be adjusted in
-			 * ip6_output().
-			 * n->m_pkthdr.len == n0->m_pkthdr.len at this point.
-			 */
-			n->m_pkthdr.len += noff + sizeof(struct icmp6_hdr);
-			n->m_pkthdr.len -= (off + sizeof(struct icmp6_hdr));
-			m_adj(n0, off + sizeof(struct icmp6_hdr));
-			n->m_next = n0;
-		} else {
-	 deliverecho:
-			nip6 = mtod(n, struct ip6_hdr *);
-			nicmp6 = (struct icmp6_hdr *)((caddr_t)nip6 + off);
-			noff = off;
 		}
+		nip6 = mtod(n, struct ip6_hdr *);
+		nicmp6 = (struct icmp6_hdr *)((caddr_t)nip6 + off);
+		noff = off;
+
 		nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
 		nicmp6->icmp6_code = 0;
 		if (n) {
Index: ip6_input.c
===================================================================
RCS file: /cvsroot/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.83
diff -u -p -u -p -r1.83 ip6_input.c
--- ip6_input.c	5 Mar 2006 23:47:08 -0000	1.83
+++ ip6_input.c	30 Mar 2006 19:27:16 -0000
@@ -234,7 +234,7 @@ void
 ip6_input(m)
 	struct mbuf *m;
 {
-	struct ip6_hdr *ip6;
+	struct ip6_hdr *ip6, *tmp;
 	int off = sizeof(struct ip6_hdr), nest;
 	u_int32_t plen;
 	u_int32_t rtalert = ~0;
@@ -305,7 +305,27 @@ ip6_input(m)
 		}
 	}
 
-	ip6 = mtod(m, struct ip6_hdr *);
+        /* If we will embed a scope identifier in either the source or
+         * destination address or both, then make them both writable.
+	 * We have to do this to avoid scribbling over read-only/shared
+	 * mbuf storage.
+	 *
+         * XXX It may be possible to do this nearer to the place
+         * XXX where the src & dst are rewritten, per Manuel Bouyer's
+         * XXX suggestion on tech-net.
+	 */
+	tmp = mtod(m, struct ip6_hdr *);
+	if (!(IN6_IS_SCOPE_EMBEDDABLE(&tmp->ip6_src) ||
+	      IN6_IS_SCOPE_EMBEDDABLE(&tmp->ip6_dst)))
+		ip6 = tmp;
+	else if (m_makewritable(&m, 0, sizeof(struct ip6_hdr),
+	    M_DONTWAIT) != 0) {
+		struct ifnet *inifp = m->m_pkthdr.rcvif;
+		ip6stat.ip6s_toosmall++;	/* XXXDCY new stat */
+		in6_ifstat_inc(inifp, ifs6_in_hdrerr);	/* XXXDCY new stat */
+		goto bad;
+	} else
+		ip6 = mtod(m, struct ip6_hdr *);
 
 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 		ip6stat.ip6s_badvers++;

--a8Wt8u1KmwUX3Y2C--