Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/netinet Implement retransmit logic for the SYN cache eng...
details: https://anonhg.NetBSD.org/src/rev/7eb0647b815a
branches: trunk
changeset: 472409:7eb0647b815a
user: thorpej <thorpej%NetBSD.org@localhost>
date: Thu Apr 29 03:54:22 1999 +0000
description:
Implement retransmit logic for the SYN cache engine. Fixes a rare condition
where one side can think a connection exists, where the other side thinks
the connection was never established.
The original problem was first reported by Ty Sarna in PR #5909. The
original fix I made to the code didn't cover all cases. The problem this
fix addresses was reported by Christoph Badura via private e-mail.
Many thanks to Bill Sommerfeld for helping me to test this code, and
for finding a subtle bug.
diffstat:
sys/netinet/in_proto.c | 5 +-
sys/netinet/tcp_input.c | 248 +++++++++++++++++++++++++++++------------------
sys/netinet/tcp_var.h | 23 ++-
3 files changed, 168 insertions(+), 108 deletions(-)
diffs (truncated from 532 to 300 lines):
diff -r cb3cc27e7d04 -r 7eb0647b815a sys/netinet/in_proto.c
--- a/sys/netinet/in_proto.c Thu Apr 29 03:38:39 1999 +0000
+++ b/sys/netinet/in_proto.c Thu Apr 29 03:54:22 1999 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: in_proto.c,v 1.29 1999/01/14 01:16:55 thorpej Exp $ */
+/* $NetBSD: in_proto.c,v 1.30 1999/04/29 03:54:22 thorpej Exp $ */
/*
* Copyright (c) 1982, 1986, 1993
@@ -188,5 +188,4 @@
int tcp_syn_cache_limit = TCP_SYN_HASH_SIZE*TCP_SYN_BUCKET_SIZE;
int tcp_syn_bucket_limit = 3*TCP_SYN_BUCKET_SIZE;
struct syn_cache_head tcp_syn_cache[TCP_SYN_HASH_SIZE];
-int tcp_syn_cache_interval = 8; /* runs timer every 4 seconds */
-int tcp_syn_cache_timeo = TCPTV_KEEP_INIT;
+int tcp_syn_cache_interval = 1; /* runs timer twice a second */
diff -r cb3cc27e7d04 -r 7eb0647b815a sys/netinet/tcp_input.c
--- a/sys/netinet/tcp_input.c Thu Apr 29 03:38:39 1999 +0000
+++ b/sys/netinet/tcp_input.c Thu Apr 29 03:54:22 1999 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: tcp_input.c,v 1.79 1999/04/22 01:32:30 simonb Exp $ */
+/* $NetBSD: tcp_input.c,v 1.80 1999/04/29 03:54:22 thorpej Exp $ */
/*-
* Copyright (c) 1997, 1998, 1999 The NetBSD Foundation, Inc.
@@ -1846,13 +1846,11 @@
((((sa)->s_addr^syn_hash1)*(((((u_int32_t)(dp))<<16) + \
((u_int32_t)(sp)))^syn_hash2)))
-LIST_HEAD(, syn_cache_head) tcp_syn_cache_queue;
-
-#define SYN_CACHE_RM(sc, scp) \
+#define SYN_CACHE_RM(sc) \
do { \
- TAILQ_REMOVE(&(scp)->sch_queue, (sc), sc_queue); \
- if (--(scp)->sch_length == 0) \
- LIST_REMOVE((scp), sch_headq); \
+ LIST_REMOVE((sc), sc_bucketq); \
+ tcp_syn_cache[(sc)->sc_bucketidx].sch_length--; \
+ TAILQ_REMOVE(&tcp_syn_cache_timeq[(sc)->sc_rxtshift], (sc), sc_timeq); \
syn_cache_count--; \
} while (0)
@@ -1867,17 +1865,33 @@
struct pool syn_cache_pool;
+/*
+ * We don't estimate RTT with SYNs, so each packet starts with the default
+ * RTT and each timer queue has a fixed timeout value. This allows us to
+ * optimize the timer queues somewhat.
+ */
+#define SYN_CACHE_TIMER_ARM(sc) \
+do { \
+ TCPT_RANGESET((sc)->sc_rxtcur, \
+ TCPTV_SRTTDFLT * tcp_backoff[(sc)->sc_rxtshift], TCPTV_MIN, \
+ TCPTV_REXMTMAX); \
+ PRT_SLOW_ARM((sc)->sc_rexmt, (sc)->sc_rxtcur); \
+} while (0)
+
+TAILQ_HEAD(, syn_cache) tcp_syn_cache_timeq[TCP_MAXRXTSHIFT + 1];
+
void
syn_cache_init()
{
int i;
- /* Initialize the hash bucket queues. */
+ /* Initialize the hash buckets. */
for (i = 0; i < tcp_syn_cache_size; i++)
- TAILQ_INIT(&tcp_syn_cache[i].sch_queue);
+ LIST_INIT(&tcp_syn_cache[i].sch_bucket);
- /* Initialize the active hash bucket cache. */
- LIST_INIT(&tcp_syn_cache_queue);
+ /* Initialize the timer queues. */
+ for (i = 0; i <= TCP_MAXRXTSHIFT; i++)
+ TAILQ_INIT(&tcp_syn_cache_timeq[i]);
/* Initialize the syn cache pool. */
pool_init(&syn_cache_pool, sizeof(struct syn_cache), 0, 0, 0,
@@ -1888,9 +1902,9 @@
syn_cache_insert(sc)
struct syn_cache *sc;
{
- struct syn_cache_head *scp, *scp2, *sce;
+ struct syn_cache_head *scp;
struct syn_cache *sc2;
- int s;
+ int s, i;
/*
* If there are no entries in the hash table, reinitialize
@@ -1904,7 +1918,8 @@
}
sc->sc_hash = SYN_HASH(&sc->sc_src, sc->sc_sport, sc->sc_dport);
- scp = &tcp_syn_cache[sc->sc_hash % tcp_syn_cache_size];
+ sc->sc_bucketidx = sc->sc_hash % tcp_syn_cache_size;
+ scp = &tcp_syn_cache[sc->sc_bucketidx];
/*
* Make sure that we don't overflow the per-bucket
@@ -1914,44 +1929,71 @@
if (scp->sch_length >= tcp_syn_bucket_limit) {
tcpstat.tcps_sc_bucketoverflow++;
/*
- * The bucket is full. Toss the first (i.e. oldest)
- * element in this bucket.
+ * The bucket is full. Toss the oldest element in the
+ * bucket. This will be the entry with our bucket
+ * index closest to the front of the timer queue with
+ * the largest timeout value.
+ *
+ * Note: This timer queue traversal may be expensive, so
+ * we hope that this doesn't happen very often. It is
+ * much more likely that we'll overflow the entire
+ * cache, which is much easier to handle; see below.
*/
- sc2 = TAILQ_FIRST(&scp->sch_queue);
- SYN_CACHE_RM(sc2, scp);
- SYN_CACHE_PUT(sc2);
+ for (i = TCP_MAXRXTSHIFT; i >= 0; i--) {
+ for (sc2 = TAILQ_FIRST(&tcp_syn_cache_timeq[i]);
+ sc2 != NULL;
+ sc2 = TAILQ_NEXT(sc2, sc_timeq)) {
+ if (sc2->sc_bucketidx == sc->sc_bucketidx) {
+ SYN_CACHE_RM(sc2);
+ SYN_CACHE_PUT(sc2);
+ goto insert; /* 2 level break */
+ }
+ }
+ }
+#ifdef DIAGNOSTIC
+ /*
+ * This should never happen; we should always find an
+ * entry in our bucket.
+ */
+ panic("syn_cache_insert: bucketoverflow: impossible");
+#endif
} else if (syn_cache_count >= tcp_syn_cache_limit) {
tcpstat.tcps_sc_overflowed++;
/*
- * The cache is full. Toss the first (i.e. oldest)
- * element in the first non-empty bucket we can find.
+ * The cache is full. Toss the oldest entry in the
+ * entire cache. This is the front entry in the
+ * first non-empty timer queue with the largest
+ * timeout value.
*/
- scp2 = scp;
- if (TAILQ_FIRST(&scp2->sch_queue) == NULL) {
- sce = &tcp_syn_cache[tcp_syn_cache_size];
- for (++scp2; scp2 != scp; scp2++) {
- if (scp2 >= sce)
- scp2 = &tcp_syn_cache[0];
- if (TAILQ_FIRST(&scp2->sch_queue) != NULL)
- break;
- }
+ for (i = TCP_MAXRXTSHIFT; i >= 0; i--) {
+ sc2 = TAILQ_FIRST(&tcp_syn_cache_timeq[i]);
+ if (sc2 == NULL)
+ continue;
+ SYN_CACHE_RM(sc2);
+ SYN_CACHE_PUT(sc2);
+ goto insert; /* symmetry with above */
}
- sc2 = TAILQ_FIRST(&scp2->sch_queue);
- if (sc2 == NULL) {
- SYN_CACHE_PUT(sc);
- return;
- }
- SYN_CACHE_RM(sc2, scp2);
- SYN_CACHE_PUT(sc2);
+#ifdef DIAGNOSTIC
+ /*
+ * This should never happen; we should always find an
+ * entry in the cache.
+ */
+ panic("syn_cache_insert: cache overflow: impossible");
+#endif
}
- /* Set entry's timer. */
- PRT_SLOW_ARM(sc->sc_timer, tcp_syn_cache_timeo);
+ insert:
+ /*
+ * Initialize the entry's timer.
+ */
+ sc->sc_rxttot = 0;
+ sc->sc_rxtshift = 0;
+ SYN_CACHE_TIMER_ARM(sc);
+ TAILQ_INSERT_TAIL(&tcp_syn_cache_timeq[sc->sc_rxtshift], sc, sc_timeq);
/* Put it into the bucket. */
- TAILQ_INSERT_TAIL(&scp->sch_queue, sc, sc_queue);
- if (++scp->sch_length == 1)
- LIST_INSERT_HEAD(&tcp_syn_cache_queue, scp, sch_headq);
+ LIST_INSERT_HEAD(&scp->sch_bucket, sc, sc_bucketq);
+ scp->sch_length++;
syn_cache_count++;
tcpstat.tcps_sc_added++;
@@ -1959,31 +2001,64 @@
}
/*
- * Walk down the cache list, looking for expired entries in each bucket.
+ * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted.
+ * If we have retransmitted an entry the maximum number of times, expire
+ * that entry.
*/
void
syn_cache_timer()
{
- struct syn_cache_head *scp, *nscp;
struct syn_cache *sc, *nsc;
- int s;
+ int i, s;
s = splsoftnet();
- for (scp = LIST_FIRST(&tcp_syn_cache_queue); scp != NULL; scp = nscp) {
-#ifdef DIAGNOSTIC
- if (TAILQ_FIRST(&scp->sch_queue) == NULL)
- panic("syn_cache_timer: queue inconsistency");
-#endif
- nscp = LIST_NEXT(scp, sch_headq);
- for (sc = TAILQ_FIRST(&scp->sch_queue);
- sc != NULL && PRT_SLOW_ISEXPIRED(sc->sc_timer);
+
+ /*
+ * First, get all the entries that need to be retransmitted, or
+ * must be expired due to exceeding the initial keepalive time.
+ */
+ for (i = 0; i < TCP_MAXRXTSHIFT; i++) {
+ for (sc = TAILQ_FIRST(&tcp_syn_cache_timeq[i]);
+ sc != NULL && PRT_SLOW_ISEXPIRED(sc->sc_rexmt);
sc = nsc) {
- nsc = TAILQ_NEXT(sc, sc_queue);
- tcpstat.tcps_sc_timed_out++;
- SYN_CACHE_RM(sc, scp);
- SYN_CACHE_PUT(sc);
+ nsc = TAILQ_NEXT(sc, sc_timeq);
+
+ /*
+ * Compute the total amount of time this entry has
+ * been on a queue. If this entry has been on longer
+ * than the keep alive timer would allow, expire it.
+ */
+ sc->sc_rxttot += sc->sc_rxtcur;
+ if (sc->sc_rxttot >= TCPTV_KEEP_INIT) {
+ tcpstat.tcps_sc_timed_out++;
+ SYN_CACHE_RM(sc);
+ SYN_CACHE_PUT(sc);
+ continue;
+ }
+
+ tcpstat.tcps_sc_retransmitted++;
+ (void) syn_cache_respond(sc, NULL);
+
+ /* Advance this entry onto the next timer queue. */
+ TAILQ_REMOVE(&tcp_syn_cache_timeq[i], sc, sc_timeq);
+ sc->sc_rxtshift = i + 1;
+ SYN_CACHE_TIMER_ARM(sc);
+ TAILQ_INSERT_TAIL(&tcp_syn_cache_timeq[sc->sc_rxtshift],
+ sc, sc_timeq);
}
}
+
+ /*
+ * Now get all the entries that are expired due to too many
+ * retransmissions.
+ */
+ for (sc = TAILQ_FIRST(&tcp_syn_cache_timeq[TCP_MAXRXTSHIFT]);
+ sc != NULL && PRT_SLOW_ISEXPIRED(sc->sc_rexmt);
+ sc = nsc) {
+ tcpstat.tcps_sc_timed_out++;
+ SYN_CACHE_RM(sc);
+ SYN_CACHE_PUT(sc);
+ }
splx(s);
}
@@ -2005,8 +2080,8 @@
scp = &tcp_syn_cache[hash % tcp_syn_cache_size];
*headp = scp;
s = splsoftnet();
- for (sc = TAILQ_FIRST(&scp->sch_queue); sc != NULL;
- sc = TAILQ_NEXT(sc, sc_queue)) {
+ for (sc = LIST_FIRST(&scp->sch_bucket); sc != NULL;
+ sc = LIST_NEXT(sc, sc_bucketq)) {
if (sc->sc_hash != hash)
continue;
if (sc->sc_src.s_addr == ti->ti_src.s_addr &&
@@ -2056,7 +2131,6 @@
register struct tcpiphdr *ti;
struct sockaddr_in *sin;
struct mbuf *am;
- long win;
int s;
Home |
Main Index |
Thread Index |
Old Index