Subject: Re: Appropriate byte counting, revisited.
To: None <tech-net@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-net
Date: 10/17/2006 07:08:22
--NextPart-20061017070648-2015000
Content-Type: Text/Plain; charset=us-ascii
> > Please, just use tcp.abc and be done with it! Not every objection should
> > be taken seriously.
>
> ok, maybe i'll do so...
> if anyone has any serious problem with "tcp.abc" beyond tastes,
> please speak up (again).
here's a patch.
i omitted ack prediction part of the original patch because
it's a separate change.
YAMAMOTO Takashi
--NextPart-20061017070648-2015000
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="a.diff"
Index: tcp_subr.c
===================================================================
--- tcp_subr.c (revision 1830)
+++ tcp_subr.c (revision 1838)
@@ -202,6 +202,8 @@ int tcp_compat_42 = 0;
int tcp_rst_ppslim = 100; /* 100pps */
int tcp_ackdrop_ppslim = 100; /* 100pps */
int tcp_do_loopback_cksum = 0;
+int tcp_do_abc = 1; /* RFC3465 Appropriate byte counting. */
+int tcp_abc_aggressive = 1; /* 1: L=2*SMSS 0: L=1*SMSS */
int tcp_sack_tp_maxholes = 32;
int tcp_sack_globalmaxholes = 1024;
int tcp_sack_globalholes = 0;
@@ -935,6 +937,7 @@ static struct tcpcb tcpcb_template = {
.snd_numholes = 0,
.t_partialacks = -1,
+ .t_bytes_acked = 0,
};
/*
@@ -1647,8 +1650,10 @@ tcp_quench(struct inpcb *inp, int errno
{
struct tcpcb *tp = intotcpcb(inp);
- if (tp)
+ if (tp) {
tp->snd_cwnd = tp->t_segsz;
+ tp->t_bytes_acked = 0;
+ }
}
#endif
@@ -1658,8 +1663,10 @@ tcp6_quench(struct in6pcb *in6p, int err
{
struct tcpcb *tp = in6totcpcb(in6p);
- if (tp)
+ if (tp) {
tp->snd_cwnd = tp->t_segsz;
+ tp->t_bytes_acked = 0;
+ }
}
#endif
Index: tcp_var.h
===================================================================
--- tcp_var.h (revision 1830)
+++ tcp_var.h (revision 1838)
@@ -290,6 +290,9 @@ struct tcpcb {
u_int32_t ts_timebase; /* our timebase */
tcp_seq last_ack_sent;
+/* RFC 3465 variables */
+ u_long t_bytes_acked; /* ABC "bytes_acked" parameter */
+
/* SACK stuff */
#define TCP_SACK_MAX 3
#define TCPSACK_NONE 0
@@ -754,6 +757,8 @@ extern int tcp_ecn_maxretries; /* Max EC
extern int tcp_sack_tp_maxholes; /* Max holes per connection. */
extern int tcp_sack_globalmaxholes; /* Max holes per system. */
extern int tcp_sack_globalholes; /* Number of holes present. */
+extern int tcp_do_abc; /* RFC3465 ABC enabled/disabled? */
+extern int tcp_abc_aggressive; /* 1: L=2*SMSS 0: L=1*SMSS */
extern int tcp_rst_ppslim;
extern int tcp_ackdrop_ppslim;
Index: tcp_usrreq.c
===================================================================
--- tcp_usrreq.c (revision 1830)
+++ tcp_usrreq.c (revision 1838)
@@ -1431,6 +1431,7 @@ sysctl_net_inet_tcp_setup2(struct sysctl
{
int ecn_node, congctl_node;
const struct sysctlnode *sack_node, *node;
+ const struct sysctlnode *abc_node;
#ifdef TCP_DEBUG
extern struct tcp_debug tcp_debug[TCP_NDEBUG];
extern int tcp_debx;
@@ -1736,6 +1737,23 @@ sysctl_net_inet_tcp_setup2(struct sysctl
CTL_EOL);
#endif
+ /* ABC subtree */
+
+ sysctl_createv(clog, 0, NULL, &abc_node,
+ CTLFLAG_PERMANENT, CTLTYPE_NODE, "abc",
+ SYSCTL_DESCR("RFC3465 Appropriate Byte Counting (ABC)"),
+ NULL, 0, NULL, 0,
+ CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
+ sysctl_createv(clog, 0, &abc_node, NULL,
+ CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
+ CTLTYPE_INT, "enable",
+ SYSCTL_DESCR("Enable RFC3465 Appropriate Byte Counting"),
+ NULL, 0, &tcp_do_abc, 0, CTL_CREATE, CTL_EOL);
+ sysctl_createv(clog, 0, &abc_node, NULL,
+ CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
+ CTLTYPE_INT, "aggressive",
+ SYSCTL_DESCR("1: L=2*SMSS 0: L=1*SMSS"),
+ NULL, 0, &tcp_abc_aggressive, 0, CTL_CREATE, CTL_EOL);
}
/*
Index: tcp_congctl.c
===================================================================
--- tcp_congctl.c (revision 1830)
+++ tcp_congctl.c (revision 1838)
@@ -465,6 +465,7 @@ tcp_reno_slow_retransmit(struct tcpcb *t
tp->snd_ssthresh = win * tp->t_segsz;
tp->t_partialacks = -1;
tp->t_dupacks = 0;
+ tp->t_bytes_acked = 0;
}
static void
@@ -485,6 +486,7 @@ tcp_reno_fast_retransmit_newack(struct t
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_partialacks = -1;
tp->t_dupacks = 0;
+ tp->t_bytes_acked = 0;
}
}
@@ -493,17 +495,53 @@ tcp_reno_newack(struct tcpcb *tp, struct
{
/*
* When new data is acked, open the congestion window.
- * If the window gives us less than ssthresh packets
- * in flight, open exponentially (segsz per packet).
- * Otherwise open linearly: segsz per window
- * (segsz^2 / cwnd per packet).
*/
u_int cw = tp->snd_cwnd;
u_int incr = tp->t_segsz;
- if (cw >= tp->snd_ssthresh)
- incr = incr * incr / cw;
+ if (tcp_do_abc) {
+
+ /*
+ * RFC 3465 Appropriate Byte Counting (ABC)
+ */
+
+ int acked = th->th_ack - tp->snd_una;
+
+ if (cw >= tp->snd_ssthresh) {
+ tp->t_bytes_acked += acked;
+ if (tp->t_bytes_acked >= cw) {
+ /* Time to increase the window. */
+ tp->t_bytes_acked -= cw;
+ } else {
+ /* No need to increase yet. */
+ incr = 0;
+ }
+ } else {
+ /*
+ * use 2*SMSS or 1*SMSS for the "L" param,
+ * depending on sysctl setting.
+ *
+ * (See RFC 3465 2.3 Choosing the Limit)
+ */
+ u_int abc_lim;
+
+ abc_lim = (tcp_abc_aggressive == 0) ? incr : incr * 2;
+ incr = min(acked, abc_lim);
+ }
+ } else {
+
+ /*
+ * If the window gives us less than ssthresh packets
+ * in flight, open exponentially (segsz per packet).
+ * Otherwise open linearly: segsz per window
+ * (segsz^2 / cwnd per packet).
+ */
+
+ if (cw >= tp->snd_ssthresh) {
+ incr = incr * incr / cw;
+ }
+ }
tp->snd_cwnd = min(cw + incr, TCP_MAXWIN << tp->snd_scale);
}
@@ -601,6 +639,7 @@ tcp_newreno_fast_retransmit_newack(struc
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_partialacks = -1;
tp->t_dupacks = 0;
+ tp->t_bytes_acked = 0;
}
}
--NextPart-20061017070648-2015000--