Subject: Re: Wart in tcp_output(), IPv6-related?
To: None <itojun@iijlab.net>
From: Jason R Thorpe <thorpej@wasabisystems.com>
List: tech-net
Date: 04/26/2002 18:40:09
--d9ADC0YsG2v16Js0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
On Sat, Apr 27, 2002 at 09:07:30AM +0900, itojun@iijlab.net wrote:
> i am happy to remove "max_linkhdr + hdrlen + len <= MCLBYTES" line.
> if the line gets removed the "always-copy" behavior will go away.
Attached is the patch I will commit.
> thanks for analysis.
Also thank Matt Thomas :-)
--
-- Jason R. Thorpe <thorpej@wasabisystems.com>
--d9ADC0YsG2v16Js0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=tcp-patch
Index: tcp_output.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_output.c,v
retrieving revision 1.78
diff -u -r1.78 tcp_output.c
--- tcp_output.c 2002/03/01 22:54:09 1.78
+++ tcp_output.c 2002/04/27 01:36:53
@@ -1,3 +1,4 @@
+#define TCP_OUTPUT_COUNTERS
/* $NetBSD: tcp_output.c,v 1.78 2002/03/01 22:54:09 thorpej Exp $ */
/*
@@ -200,6 +201,21 @@
int tcp_cwm = 1;
int tcp_cwm_burstsize = 4;
+#ifdef TCP_OUTPUT_COUNTERS
+#include <sys/device.h>
+
+extern struct evcnt tcp_output_bigheader;
+extern struct evcnt tcp_output_copysmall;
+extern struct evcnt tcp_output_copybig;
+extern struct evcnt tcp_output_refbig;
+
+#define TCP_OUTPUT_COUNTER_INCR(ev) (ev)->ev_count++
+#else
+
+#define TCP_OUTPUT_COUNTER_INCR(ev) /* nothing */
+
+#endif /* TCP_OUTPUT_COUNTERS */
+
static
#ifndef GPROF
__inline
@@ -384,29 +400,43 @@
m->m_data -= hdrlen;
#else
MGETHDR(m, M_DONTWAIT, MT_HEADER);
- if (m != NULL &&
- (max_linkhdr + hdrlen > MHLEN ||
- max_linkhdr + hdrlen + len <= MCLBYTES)) {
+ if (__predict_false(m == NULL))
+ return (ENOBUFS);
+
+ /*
+ * XXX Because other code assumes headers will fit in
+ * XXX one header mbuf.
+ *
+ * (This code should almost *never* be run.)
+ */
+ if (__predict_false((max_linkhdr + hdrlen) > MHLEN)) {
+ TCP_OUTPUT_COUNTER_INCR(&tcp_output_bigheader);
MCLGET(m, M_DONTWAIT);
if ((m->m_flags & M_EXT) == 0) {
m_freem(m);
- m = NULL;
+ return (ENOBUFS);
}
}
- if (m == NULL)
- return (ENOBUFS);
+
m->m_data += max_linkhdr;
m->m_len = hdrlen;
if (len <= M_TRAILINGSPACE(m)) {
m_copydata(so->so_snd.sb_mb, off, (int) len,
mtod(m, caddr_t) + hdrlen);
m->m_len += len;
+ TCP_OUTPUT_COUNTER_INCR(&tcp_output_copysmall);
} else {
m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len);
if (m->m_next == NULL) {
m_freem(m);
return (ENOBUFS);
}
+#ifdef TCP_OUTPUT_COUNTERS
+ if (m->m_next->m_flags & M_EXT)
+ TCP_OUTPUT_COUNTER_INCR(&tcp_output_refbig);
+ else
+ TCP_OUTPUT_COUNTER_INCR(&tcp_output_copybig);
+#endif /* TCP_OUTPUT_COUNTERS */
}
#endif
Index: tcp_subr.c
===================================================================
RCS file: /cvsroot/syssrc/sys/netinet/tcp_subr.c,v
retrieving revision 1.124
diff -u -r1.124 tcp_subr.c
--- tcp_subr.c 2002/03/15 09:25:41 1.124
+++ tcp_subr.c 2002/04/27 01:36:54
@@ -1,3 +1,4 @@
+#define TCP_OUTPUT_COUNTERS
/* $NetBSD: tcp_subr.c,v 1.124 2002/03/15 09:25:41 itojun Exp $ */
/*
@@ -227,6 +228,19 @@
NULL, "tcp", "swcsum");
#endif /* TCP_CSUM_COUNTERS */
+#ifdef TCP_OUTPUT_COUNTERS
+#include <sys/device.h>
+
+struct evcnt tcp_output_bigheader = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+ NULL, "tcp", "output big header");
+struct evcnt tcp_output_copysmall = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+ NULL, "tcp", "output copy small");
+struct evcnt tcp_output_copybig = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+ NULL, "tcp", "output copy big");
+struct evcnt tcp_output_refbig = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+ NULL, "tcp", "output reference big");
+#endif /* TCP_OUTPUT_COUNTERS */
+
/*
* Tcp initialization
*/
@@ -271,6 +285,13 @@
evcnt_attach_static(&tcp_hwcsum_data);
evcnt_attach_static(&tcp_swcsum);
#endif /* TCP_CSUM_COUNTERS */
+
+#ifdef TCP_OUTPUT_COUNTERS
+ evcnt_attach_static(&tcp_output_bigheader);
+ evcnt_attach_static(&tcp_output_copysmall);
+ evcnt_attach_static(&tcp_output_copybig);
+ evcnt_attach_static(&tcp_output_refbig);
+#endif /* TCP_OUTPUT_COUNTERS */
}
/*
--d9ADC0YsG2v16Js0--