Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/netinet Split tcp_timers() into multiple functions, one ...



details:   https://anonhg.NetBSD.org/src/rev/b00cb53e25f2
branches:  trunk
changeset: 514796:b00cb53e25f2
user:      thorpej <thorpej%NetBSD.org@localhost>
date:      Mon Sep 10 20:15:14 2001 +0000

description:
Split tcp_timers() into multiple functions, one for each timer,
and call it directly from tcp_slowtimo() (via a table) rather
than going through tcp_userreq().

This will allow us to call TCP timers directly from callouts,
in a future revision.

diffstat:

 sys/netinet/tcp_timer.c  |  568 ++++++++++++++++++++++++++++------------------
 sys/netinet/tcp_timer.h  |    6 +-
 sys/netinet/tcp_usrreq.c |   11 +-
 sys/netinet/tcp_var.h    |    4 +-
 4 files changed, 347 insertions(+), 242 deletions(-)

diffs (truncated from 691 to 300 lines):

diff -r d35cdbeab5bd -r b00cb53e25f2 sys/netinet/tcp_timer.c
--- a/sys/netinet/tcp_timer.c   Mon Sep 10 20:13:17 2001 +0000
+++ b/sys/netinet/tcp_timer.c   Mon Sep 10 20:15:14 2001 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: tcp_timer.c,v 1.50 2001/09/10 15:23:10 thorpej Exp $   */
+/*     $NetBSD: tcp_timer.c,v 1.51 2001/09/10 20:15:14 thorpej Exp $   */
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -102,6 +102,7 @@
  */
 
 #include "opt_inet.h"
+#include "opt_tcp_debug.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -136,6 +137,9 @@
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcpip.h>
+#ifdef TCP_DEBUG
+#include <netinet/tcp_debug.h>
+#endif
 
 int    tcp_keepidle = TCPTV_KEEP_IDLE;
 int    tcp_keepintvl = TCPTV_KEEPINTVL;
@@ -149,6 +153,18 @@
  */
 int    tcp_delack_ticks = 0;
 
+void   tcp_timer_rexmt(void *);
+void   tcp_timer_persist(void *);
+void   tcp_timer_keep(void *);
+void   tcp_timer_2msl(void *);
+
+tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = {
+       tcp_timer_rexmt,
+       tcp_timer_persist,
+       tcp_timer_keep,
+       tcp_timer_2msl,
+};
+
 /*
  * Callout to process delayed ACKs for a TCPCB.
  */
@@ -209,10 +225,7 @@
                for (i = 0; i < TCPT_NTIMERS; i++) {
                        if (TCP_TIMER_ISEXPIRED(tp, i)) {
                                TCP_TIMER_DISARM(tp, i);
-                               (void) tcp_usrreq(tp->t_inpcb->inp_socket,
-                                   PRU_SLOWTIMO, (struct mbuf *)0,
-                                   (struct mbuf *)i, (struct mbuf *)0,
-                                   (struct proc *)0);
+                               (*(tcp_timer_funcs[i]))(tp);
                                /* XXX NOT MP SAFE */
                                if ((ninp == (void *)&tcbtable.inpt_queue &&
                                    tcbtable.inpt_queue.cqh_last != inp) ||
@@ -239,10 +252,7 @@
                for (i = 0; i < TCPT_NTIMERS; i++) {
                        if (TCP_TIMER_ISEXPIRED(tp, i)) {
                                TCP_TIMER_DISARM(tp, i);
-                               (void) tcp_usrreq(tp->t_in6pcb->in6p_socket,
-                                   PRU_SLOWTIMO, (struct mbuf *)0,
-                                   (struct mbuf *)i, (struct mbuf *)0,
-                                   (struct proc *)0);
+                               (*(tcp_timer_funcs[i]))(tp);
                                /* XXX NOT MP SAFE */
                                if ((nin6p == (void *)&tcb6 &&
                                    tcb6.in6p_prev != in6p) ||
@@ -289,19 +299,321 @@
 /*
  * TCP timer processing.
  */
-struct tcpcb *
-tcp_timers(tp, timer)
-       struct tcpcb *tp;
-       int timer;
+
+void
+tcp_timer_rexmt(void *arg)
 {
-       short   rto;
+       struct tcpcb *tp = arg;
+       uint32_t rto;
+       int s;
+#ifdef TCP_DEBUG
+       struct socket *so;
+       short ostate;
+#endif
+
+       s = splsoftnet();
+
+#ifdef TCP_DEBUG
+#ifdef INET
+       if (tp->t_inpcb)
+               so = tp->t_inpcb->inp_socket;
+#endif
+#ifdef INET6
+       if (tp->t_in6pcb)
+               so = tp->t_in6pcb->in6p_socket;
+#endif
+       ostate = tp->t_state;
+#endif /* TCP_DEBUG */
+
+       /*
+        * Retransmission timer went off.  Message has not
+        * been acked within retransmit interval.  Back off
+        * to a longer retransmit interval and retransmit one segment.
+        */
+
+       if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
+               tp->t_rxtshift = TCP_MAXRXTSHIFT;
+               tcpstat.tcps_timeoutdrop++;
+               tp = tcp_drop(tp, tp->t_softerror ?
+                   tp->t_softerror : ETIMEDOUT);
+               goto out;
+       }
+       tcpstat.tcps_rexmttimeo++;
+       rto = TCP_REXMTVAL(tp);
+       if (rto < tp->t_rttmin)
+               rto = tp->t_rttmin;
+       TCPT_RANGESET(tp->t_rxtcur, rto * tcp_backoff[tp->t_rxtshift],
+           tp->t_rttmin, TCPTV_REXMTMAX);
+       TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
+#if 0
+       /* 
+        * If we are losing and we are trying path MTU discovery,
+        * try turning it off.  This will avoid black holes in
+        * the network which suppress or fail to send "packet
+        * too big" ICMP messages.  We should ideally do
+        * lots more sophisticated searching to find the right
+        * value here...
+        */
+       if (ip_mtudisc && tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) {
+               struct rtentry *rt = NULL;
+
+#ifdef INET
+               if (tp->t_inpcb)
+                       rt = in_pcbrtentry(tp->t_inpcb);
+#endif
+#ifdef INET6
+               if (tp->t_in6pcb)
+                       rt = in6_pcbrtentry(tp->t_in6pcb);
+#endif
 
-#ifdef DIAGNOSTIC
-       if (tp->t_inpcb && tp->t_in6pcb)
-               panic("tcp_timers: both t_inpcb and t_in6pcb are set");
+               /* XXX:  Black hole recovery code goes here */
+       }
+#endif /* 0 */
+       /*
+        * If losing, let the lower level know and try for
+        * a better route.  Also, if we backed off this far,
+        * our srtt estimate is probably bogus.  Clobber it
+        * so we'll take the next rtt measurement as our srtt;
+        * move the current srtt into rttvar to keep the current
+        * retransmit times until then.
+        */
+       if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
+#ifdef INET
+               if (tp->t_inpcb)
+                       in_losing(tp->t_inpcb);
+#endif
+#ifdef INET6
+               if (tp->t_in6pcb)
+                       in6_losing(tp->t_in6pcb);
+#endif
+               tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
+               tp->t_srtt = 0;
+       }
+       tp->snd_nxt = tp->snd_una;
+       /*
+        * If timing a segment in this window, stop the timer.
+        */
+       tp->t_rtttime = 0;
+       /*
+        * Remember if we are retransmitting a SYN, because if
+        * we do, set the initial congestion window must be set
+        * to 1 segment.
+        */
+       if (tp->t_state == TCPS_SYN_SENT)
+               tp->t_flags |= TF_SYN_REXMT;
+       /*
+        * Close the congestion window down to one segment
+        * (we'll open it by one segment for each ack we get).
+        * Since we probably have a window's worth of unacked
+        * data accumulated, this "slow start" keeps us from
+        * dumping all that data as back-to-back packets (which
+        * might overwhelm an intermediate gateway).
+        *
+        * There are two phases to the opening: Initially we
+        * open by one mss on each ack.  This makes the window
+        * size increase exponentially with time.  If the
+        * window is larger than the path can handle, this
+        * exponential growth results in dropped packet(s)
+        * almost immediately.  To get more time between 
+        * drops but still "push" the network to take advantage
+        * of improving conditions, we switch from exponential
+        * to linear window opening at some threshhold size.
+        * For a threshhold, we use half the current window
+        * size, truncated to a multiple of the mss.
+        *
+        * (the minimum cwnd that will give us exponential
+        * growth is 2 mss.  We don't allow the threshhold
+        * to go below this.)
+        */
+       {
+       u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_segsz;
+       if (win < 2)
+               win = 2;
+       /* Loss Window MUST be one segment. */
+       tp->snd_cwnd = tp->t_segsz;
+       tp->snd_ssthresh = win * tp->t_segsz;
+       tp->t_dupacks = 0;
+       }
+       (void) tcp_output(tp);
+
+ out:
+#ifdef TCP_DEBUG
+       if (tp && so->so_options & SO_DEBUG)
+               tcp_trace(TA_USER, ostate, tp, NULL,
+                   PRU_SLOWTIMO | (TCPT_REXMT << 8));
+#endif
+       splx(s);
+}
+
+void
+tcp_timer_persist(void *arg)
+{
+       struct tcpcb *tp = arg;
+       struct socket *so;
+       uint32_t rto;
+       int s;
+#ifdef TCP_DEBUG
+       short ostate;
 #endif
 
-       switch (timer) {
+       s = splsoftnet();
+
+#ifdef INET
+       if (tp->t_inpcb)
+               so = tp->t_inpcb->inp_socket;
+#endif
+#ifdef INET6
+       if (tp->t_in6pcb)
+               so = tp->t_in6pcb->in6p_socket;
+#endif
+
+#ifdef TCP_DEBUG
+       ostate = tp->t_state;
+#endif
+
+       /*
+        * Persistance timer into zero window.
+        * Force a byte to be output, if possible.
+        */
+
+       /*
+        * Hack: if the peer is dead/unreachable, we do not
+        * time out if the window is closed.  After a full
+        * backoff, drop the connection if the idle time
+        * (no responses to probes) reaches the maximum
+        * backoff that we would use if retransmitting.
+        */
+       rto = TCP_REXMTVAL(tp);
+       if (rto < tp->t_rttmin)
+               rto = tp->t_rttmin;
+       if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
+           ((tcp_now - tp->t_rcvtime) >= tcp_maxpersistidle ||
+           (tcp_now - tp->t_rcvtime) >= rto * tcp_totbackoff)) {
+               tcpstat.tcps_persistdrops++;
+               tp = tcp_drop(tp, ETIMEDOUT);
+               goto out;
+       }
+       tcpstat.tcps_persisttimeo++;
+       tcp_setpersist(tp);
+       tp->t_force = 1;
+       (void) tcp_output(tp);
+       tp->t_force = 0;
+
+ out:
+#ifdef TCP_DEBUG
+       if (tp && so->so_options & SO_DEBUG)
+               tcp_trace(TA_USER, ostate, tp, NULL,
+                   PRU_SLOWTIMO | (TCPT_PERSIST << 8));
+#endif
+       splx(s);
+}
+
+void
+tcp_timer_keep(void *arg)
+{
+       struct tcpcb *tp = arg;
+       struct socket *so;
+       int s;
+#ifdef TCP_DEBUG
+       short ostate;



Home | Main Index | Thread Index | Old Index