Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src Port over the TCP_INFO socket option from FreeBSD, originall...
details: https://anonhg.NetBSD.org/src/rev/bda78a1ad8d7
branches: trunk
changeset: 336141:bda78a1ad8d7
user: he <he%NetBSD.org@localhost>
date: Sat Feb 14 12:57:52 2015 +0000
description:
Port over the TCP_INFO socket option from FreeBSD, originally from
the Linux 2.6 TCP API. This permits the caller to query certain information
about a TCP connection, and is used by pkgsrc's net/iperf3 test program
if available.
This extends struct tcbcb with three fields to count retransmits,
out-of-sequence receives and zero window announcements, and will
therefore warrant a kernel revision bump (done separately).
diffstat:
share/man/man4/tcp.4 | 19 +++++++++++-
sys/netinet/tcp.h | 75 +++++++++++++++++++++++++++++++++++++++++++++++-
sys/netinet/tcp_input.c | 5 +-
sys/netinet/tcp_output.c | 8 +++-
sys/netinet/tcp_subr.c | 7 +++-
sys/netinet/tcp_usrreq.c | 69 ++++++++++++++++++++++++++++++++++++++++++-
sys/netinet/tcp_var.h | 7 +++-
7 files changed, 179 insertions(+), 11 deletions(-)
diffs (truncated from 345 to 300 lines):
diff -r d66971c76620 -r bda78a1ad8d7 share/man/man4/tcp.4
--- a/share/man/man4/tcp.4 Sat Feb 14 10:21:29 2015 +0000
+++ b/share/man/man4/tcp.4 Sat Feb 14 12:57:52 2015 +0000
@@ -1,4 +1,4 @@
-.\" $NetBSD: tcp.4,v 1.29 2013/10/10 12:28:10 christos Exp $
+.\" $NetBSD: tcp.4,v 1.30 2015/02/14 12:57:52 he Exp $
.\" $FreeBSD: tcp.4,v 1.11.2.16 2004/02/16 22:21:47 bms Exp $
.\"
.\" Copyright (c) 1983, 1991, 1993
@@ -243,6 +243,23 @@
This option takes an
.Vt "unsigned int"
value, with a value greater than 0.
+.It Dv TCP_INFO
+Information about a socket's underlying TCP session may be retreived
+by passing the read-only option
+.Dv TPC_INFO
+to
+.Xr getsockopt 2 .
+It accepts a single argument: a pointer to an instance of
+.Vt "struct tcp_info" .
+.Pp
+This API is subject to change; consult the source to determine
+which fields are currently filled out by this option.
+.Nx
+specific additions include
+send window size,
+receive window size,
+and
+bandwidth-controlled window space.
.\" range of 0 to N (where N is the
.\" .Xr sysctl 8
.\" variable
diff -r d66971c76620 -r bda78a1ad8d7 sys/netinet/tcp.h
--- a/sys/netinet/tcp.h Sat Feb 14 10:21:29 2015 +0000
+++ b/sys/netinet/tcp.h Sat Feb 14 12:57:52 2015 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: tcp.h,v 1.30 2012/01/07 20:20:22 christos Exp $ */
+/* $NetBSD: tcp.h,v 1.31 2015/02/14 12:57:53 he Exp $ */
/*
* Copyright (c) 1982, 1986, 1993
@@ -127,7 +127,80 @@
#ifdef notyet
#define TCP_NOOPT 8 /* reserved for FreeBSD compat */
#endif
+#define TCP_INFO 9 /* retrieve tcp_info structure */
#define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */
#define TCP_CONGCTL 0x20 /* selected congestion control */
+#define TCPI_OPT_TIMESTAMPS 0x01
+#define TCPI_OPT_SACK 0x02
+#define TCPI_OPT_WSCALE 0x04
+#define TCPI_OPT_ECN 0x08
+#define TCPI_OPT_TOE 0x10
+
+/*
+ * The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits
+ * the caller to query certain information about the state of a TCP
+ * connection. We provide an overlapping set of fields with the Linux
+ * implementation, but since this is a fixed size structure, room has been
+ * left for growth. In order to maximize potential future compatibility with
+ * the Linux API, the same variable names and order have been adopted, and
+ * padding left to make room for omitted fields in case they are added later.
+ *
+ * XXX: This is currently an unstable ABI/API, in that it is expected to
+ * change.
+ */
+struct tcp_info {
+ uint8_t tcpi_state; /* TCP FSM state. */
+ uint8_t __tcpi_ca_state;
+ uint8_t __tcpi_retransmits;
+ uint8_t __tcpi_probes;
+ uint8_t __tcpi_backoff;
+ uint8_t tcpi_options; /* Options enabled on conn. */
+ uint8_t tcpi_snd_wscale:4, /* RFC1323 send shift value. */
+ tcpi_rcv_wscale:4; /* RFC1323 recv shift value. */
+
+ uint32_t tcpi_rto; /* Retransmission timeout (usec). */
+ uint32_t __tcpi_ato;
+ uint32_t tcpi_snd_mss; /* Max segment size for send. */
+ uint32_t tcpi_rcv_mss; /* Max segment size for receive. */
+
+ uint32_t __tcpi_unacked;
+ uint32_t __tcpi_sacked;
+ uint32_t __tcpi_lost;
+ uint32_t __tcpi_retrans;
+ uint32_t __tcpi_fackets;
+
+ /* Times; measurements in usecs. */
+ uint32_t __tcpi_last_data_sent;
+ uint32_t __tcpi_last_ack_sent; /* Also unimpl. on Linux? */
+ uint32_t tcpi_last_data_recv; /* Time since last recv data. */
+ uint32_t __tcpi_last_ack_recv;
+
+ /* Metrics; variable units. */
+ uint32_t __tcpi_pmtu;
+ uint32_t __tcpi_rcv_ssthresh;
+ uint32_t tcpi_rtt; /* Smoothed RTT in usecs. */
+ uint32_t tcpi_rttvar; /* RTT variance in usecs. */
+ uint32_t tcpi_snd_ssthresh; /* Slow start threshold. */
+ uint32_t tcpi_snd_cwnd; /* Send congestion window. */
+ uint32_t __tcpi_advmss;
+ uint32_t __tcpi_reordering;
+
+ uint32_t __tcpi_rcv_rtt;
+ uint32_t tcpi_rcv_space; /* Advertised recv window. */
+
+ /* FreeBSD/NetBSD extensions to tcp_info. */
+ uint32_t tcpi_snd_wnd; /* Advertised send window. */
+ uint32_t tcpi_snd_bwnd; /* No longer used. */
+ uint32_t tcpi_snd_nxt; /* Next egress seqno */
+ uint32_t tcpi_rcv_nxt; /* Next ingress seqno */
+ uint32_t tcpi_toe_tid; /* HWTID for TOE endpoints */
+ uint32_t tcpi_snd_rexmitpack; /* Retransmitted packets */
+ uint32_t tcpi_rcv_ooopack; /* Out-of-order packets */
+ uint32_t tcpi_snd_zerowin; /* Zero-sized windows sent */
+
+ /* Padding to grow without breaking ABI. */
+ uint32_t __tcpi_pad[26]; /* Padding. */
+};
+
#endif /* !_NETINET_TCP_H_ */
diff -r d66971c76620 -r bda78a1ad8d7 sys/netinet/tcp_input.c
--- a/sys/netinet/tcp_input.c Sat Feb 14 10:21:29 2015 +0000
+++ b/sys/netinet/tcp_input.c Sat Feb 14 12:57:52 2015 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: tcp_input.c,v 1.335 2014/12/02 20:25:47 christos Exp $ */
+/* $NetBSD: tcp_input.c,v 1.336 2015/02/14 12:57:53 he Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -148,7 +148,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.335 2014/12/02 20:25:47 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.336 2015/02/14 12:57:53 he Exp $");
#include "opt_inet.h"
#include "opt_ipsec.h"
@@ -738,6 +738,7 @@
/*
* Update the counters.
*/
+ tp->t_rcvoopack++;
tcps = TCP_STAT_GETREF();
tcps[TCP_STAT_RCVOOPACK]++;
tcps[TCP_STAT_RCVOOBYTE] += rcvoobyte;
diff -r d66971c76620 -r bda78a1ad8d7 sys/netinet/tcp_output.c
--- a/sys/netinet/tcp_output.c Sat Feb 14 10:21:29 2015 +0000
+++ b/sys/netinet/tcp_output.c Sat Feb 14 12:57:52 2015 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: tcp_output.c,v 1.179 2014/11/10 18:52:51 maxv Exp $ */
+/* $NetBSD: tcp_output.c,v 1.180 2015/02/14 12:57:53 he Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -135,7 +135,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tcp_output.c,v 1.179 2014/11/10 18:52:51 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tcp_output.c,v 1.180 2015/02/14 12:57:53 he Exp $");
#include "opt_inet.h"
#include "opt_ipsec.h"
@@ -439,6 +439,7 @@
if (tp->t_force && len == 1)
tcps[TCP_STAT_SNDPROBE]++;
else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+ tp->t_sndrexmitpack++;
tcps[TCP_STAT_SNDREXMITPACK]++;
tcps[TCP_STAT_SNDREXMITBYTE] += len;
} else {
@@ -1401,6 +1402,9 @@
if (win < (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt))
win = (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt);
th->th_win = htons((u_int16_t) (win>>tp->rcv_scale));
+ if (th->th_win == 0) {
+ tp->t_sndzerowin++;
+ }
if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
u_int32_t urp = tp->snd_up - tp->snd_nxt;
if (urp > IP_MAXPACKET)
diff -r d66971c76620 -r bda78a1ad8d7 sys/netinet/tcp_subr.c
--- a/sys/netinet/tcp_subr.c Sat Feb 14 10:21:29 2015 +0000
+++ b/sys/netinet/tcp_subr.c Sat Feb 14 12:57:52 2015 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: tcp_subr.c,v 1.257 2014/11/10 18:52:51 maxv Exp $ */
+/* $NetBSD: tcp_subr.c,v 1.258 2015/02/14 12:57:53 he Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -91,7 +91,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.257 2014/11/10 18:52:51 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.258 2015/02/14 12:57:53 he Exp $");
#include "opt_inet.h"
#include "opt_ipsec.h"
@@ -980,6 +980,9 @@
.t_partialacks = -1,
.t_bytes_acked = 0,
+ .t_sndrexmitpack = 0,
+ .t_rcvoopack = 0,
+ .t_sndzerowin = 0,
};
/*
diff -r d66971c76620 -r bda78a1ad8d7 sys/netinet/tcp_usrreq.c
--- a/sys/netinet/tcp_usrreq.c Sat Feb 14 10:21:29 2015 +0000
+++ b/sys/netinet/tcp_usrreq.c Sat Feb 14 12:57:52 2015 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: tcp_usrreq.c,v 1.202 2014/11/10 18:52:51 maxv Exp $ */
+/* $NetBSD: tcp_usrreq.c,v 1.203 2015/02/14 12:57:53 he Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -99,7 +99,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.202 2014/11/10 18:52:51 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.203 2015/02/14 12:57:53 he Exp $");
#include "opt_inet.h"
#include "opt_ipsec.h"
@@ -119,6 +119,7 @@
#include <sys/domain.h>
#include <sys/sysctl.h>
#include <sys/kauth.h>
+#include <sys/kernel.h>
#include <sys/uidinfo.h>
#include <net/if.h>
@@ -271,6 +272,65 @@
TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle);
}
+/*
+ * Export TCP internal state information via a struct tcp_info, based on the
+ * Linux 2.6 API. Not ABI compatible as our constants are mapped differently
+ * (TCP state machine, etc). We export all information using FreeBSD-native
+ * constants -- for example, the numeric values for tcpi_state will differ
+ * from Linux.
+ */
+static void
+tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
+{
+
+ bzero(ti, sizeof(*ti));
+
+ ti->tcpi_state = tp->t_state;
+ if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
+ ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
+ if (tp->t_flags & TF_SACK_PERMIT)
+ ti->tcpi_options |= TCPI_OPT_SACK;
+ if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
+ ti->tcpi_options |= TCPI_OPT_WSCALE;
+ ti->tcpi_snd_wscale = tp->snd_scale;
+ ti->tcpi_rcv_wscale = tp->rcv_scale;
+ }
+ if (tp->t_flags & TF_ECN_PERMIT) {
+ ti->tcpi_options |= TCPI_OPT_ECN;
+ }
+
+ ti->tcpi_rto = tp->t_rxtcur * tick;
+ ti->tcpi_last_data_recv = (long)(hardclock_ticks -
+ (int)tp->t_rcvtime) * tick;
+ ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
+ ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
+
+ ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
+ /* Linux API wants these in # of segments, apparently */
+ ti->tcpi_snd_cwnd = tp->snd_cwnd / tp->t_segsz;
+ ti->tcpi_snd_wnd = tp->snd_wnd / tp->t_segsz;
+
+ /*
+ * FreeBSD-specific extension fields for tcp_info.
+ */
+ ti->tcpi_rcv_space = tp->rcv_wnd;
+ ti->tcpi_rcv_nxt = tp->rcv_nxt;
+ ti->tcpi_snd_bwnd = 0; /* Unused, kept for compat. */
+ ti->tcpi_snd_nxt = tp->snd_nxt;
+ ti->tcpi_snd_mss = tp->t_segsz;
+ ti->tcpi_rcv_mss = tp->t_segsz;
+#ifdef TF_TOE
+ if (tp->t_flags & TF_TOE)
+ ti->tcpi_options |= TCPI_OPT_TOE;
+#endif
+ /* From the redundant department of redundancies... */
+ ti->__tcpi_retransmits = ti->__tcpi_retrans =
+ ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
+
+ ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
+ ti->tcpi_snd_zerowin = tp->t_sndzerowin;
Home |
Main Index |
Thread Index |
Old Index