Subject: pool/spl cleanup in netinet, netinet6, net
To: None <tech-kern@netbsd.org, tech-net@netbsd.org>
From: Thor Lancelot Simon <tls@rek.tjls.com>
List: tech-kern
Date: 07/19/2006 17:27:26
The patch below attempts to clean up problems in net, netinet, and
netinet6 where pool_get or pool_put may be called from an interrupt
context but where there was no protection with spl*/splx.
I'd very much appreciate review and comments. I am not sure I got the
right spl levels everywhere. In particular, I am suspicious about the
fragment reassembly code, where most allocations _seem_ to be protected
by the IPQ lock (which goes to splvm() to lock) but some frees (and
some removals of elements from datastructures!) are in functions with
just IPQ_CHECK() at the head. I've tried to protect these frees with
splvm but I am not sure that's right -- I'd actually like it if someone
more familiar with the reassembly code would look at its use of pools.
Index: net/if_bridge.c
===================================================================
RCS file: /cvsroot/src/sys/net/if_bridge.c,v
retrieving revision 1.29.2.2
diff -u -r1.29.2.2 if_bridge.c
--- net/if_bridge.c 12 Feb 2006 16:49:50 -0000 1.29.2.2
+++ net/if_bridge.c 19 Jul 2006 21:22:38 -0000
@@ -1621,7 +1621,7 @@
struct ifnet *dst_if, int setflags, uint8_t flags)
{
struct bridge_rtnode *brt;
- int error;
+ int error, s;
/*
* A route for this destination might already exist. If so,
@@ -1636,7 +1636,9 @@
* initialize the expiration time and Ethernet
* address.
*/
+ s = splnet();
brt = pool_get(&bridge_rtnode_pool, PR_NOWAIT);
+ splx(s);
if (brt == NULL)
return (ENOMEM);
@@ -1646,7 +1648,9 @@
memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
+ s = splnet();
pool_put(&bridge_rtnode_pool, brt);
+ splx(s);
return (error);
}
}
@@ -1950,12 +1954,15 @@
void
bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
{
+ int s = splnet();
LIST_REMOVE(brt, brt_hash);
LIST_REMOVE(brt, brt_list);
sc->sc_brtcnt--;
pool_put(&bridge_rtnode_pool, brt);
+
+ splx(s);
}
#if defined(BRIDGE_IPF) && defined(PFIL_HOOKS)
Index: net/route.c
===================================================================
RCS file: /cvsroot/src/sys/net/route.c,v
retrieving revision 1.65
diff -u -r1.65 route.c
--- net/route.c 26 Feb 2005 22:45:09 -0000 1.65
+++ net/route.c 19 Jul 2006 21:22:38 -0000
@@ -614,6 +614,7 @@
senderr(error);
ifa = info->rti_ifa;
makeroute:
+ /* Already at splsoftnet() so pool_get/pool_put are safe */
rt = pool_get(&rtentry_pool, PR_NOWAIT);
if (rt == 0)
senderr(ENOBUFS);
@@ -895,6 +896,7 @@
TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
if (destroy)
RTTIMER_CALLOUT(r);
+ /* we are already at splsoftnet */
pool_put(&rttimer_pool, r);
if (rtq->rtq_count > 0)
rtq->rtq_count--;
@@ -937,6 +939,7 @@
r->rtt_queue->rtq_count--;
else
printf("rt_timer_remove_all: rtq_count reached 0\n");
+ /* we are already at splsoftnet */
pool_put(&rttimer_pool, r);
}
}
@@ -967,12 +970,16 @@
r->rtt_queue->rtq_count--;
else
printf("rt_timer_add: rtq_count reached 0\n");
+ s = splsoftnet();
pool_put(&rttimer_pool, r);
+ splx(s);
break; /* only one per list, so we can quit... */
}
}
+ s = splsoftnet();
r = pool_get(&rttimer_pool, PR_NOWAIT);
+ splx(s);
if (r == NULL)
return (ENOBUFS);
Bzero(r, sizeof(*r));
Index: netinet/igmp.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/igmp.c,v
retrieving revision 1.41
diff -u -r1.41 igmp.c
--- netinet/igmp.c 3 Feb 2005 03:49:01 -0000 1.41
+++ netinet/igmp.c 19 Jul 2006 21:22:38 -0000
@@ -80,6 +80,7 @@
{
struct router_info *rti;
+ /* this function is called at splsoftnet() */
LIST_FOREACH(rti, &rti_head, rti_link) {
if (rti->rti_ifp == inm->inm_ifp) {
inm->inm_rti = rti;
@@ -104,6 +105,7 @@
rti_find(struct ifnet *ifp)
{
struct router_info *rti;
+ int s = splsoftnet();
LIST_FOREACH(rti, &rti_head, rti_link) {
if (rti->rti_ifp == ifp)
@@ -111,16 +113,19 @@
}
rti = pool_get(&igmp_rti_pool, PR_NOWAIT);
- if (rti == NULL)
+ if (rti == NULL) {
+ splx(s);
return NULL;
+ }
rti->rti_ifp = ifp;
rti->rti_type = IGMP_v2_ROUTER;
LIST_INSERT_HEAD(&rti_head, rti, rti_link);
+ splx(s);
return (rti);
}
static void
-rti_delete(struct ifnet *ifp)
+rti_delete(struct ifnet *ifp) /* MUST be called at splsoftnet */
{
struct router_info *rti;
@@ -572,8 +577,7 @@
}
void
-igmp_purgeif(struct ifnet *ifp)
+igmp_purgeif(struct ifnet *ifp) /* MUST be called at splsoftnet() */
{
-
- rti_delete(ifp);
+ rti_delete(ifp); /* manipulates pools */
}
Index: netinet/in.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/in.c,v
retrieving revision 1.104.2.1
diff -u -r1.104.2.1 in.c
--- netinet/in.c 2 Apr 2006 18:06:01 -0000 1.104.2.1
+++ netinet/in.c 19 Jul 2006 21:22:38 -0000
@@ -576,7 +576,7 @@
}
void
-in_purgeif(struct ifnet *ifp)
+in_purgeif(struct ifnet *ifp) /* MUST be called at splsoftnet() */
{
struct ifaddr *ifa, *nifa;
@@ -587,7 +587,7 @@
in_purgeaddr(ifa, ifp);
}
- igmp_purgeif(ifp);
+ igmp_purgeif(ifp); /* manipulates pools */
#ifdef MROUTING
ip_mrouter_detach(ifp);
#endif
Index: netinet/in_pcb.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/in_pcb.c,v
retrieving revision 1.98
diff -u -r1.98 in_pcb.c
--- netinet/in_pcb.c 3 Feb 2005 03:49:01 -0000 1.98
+++ netinet/in_pcb.c 19 Jul 2006 21:22:38 -0000
@@ -184,7 +184,9 @@
int error;
#endif
+ s = splnet();
inp = pool_get(&inpcb_pool, PR_NOWAIT);
+ splx(s);
if (inp == NULL)
return (ENOBUFS);
bzero((caddr_t)inp, sizeof(*inp));
@@ -195,7 +197,9 @@
#if defined(IPSEC) || defined(FAST_IPSEC)
error = ipsec_init_pcbpolicy(so, &inp->inp_sp);
if (error != 0) {
+ s = splnet();
pool_put(&inpcb_pool, inp);
+ splx(s);
return error;
}
#endif
@@ -493,8 +497,8 @@
LIST_REMOVE(&inp->inp_head, inph_lhash);
CIRCLEQ_REMOVE(&inp->inp_table->inpt_queue, &inp->inp_head,
inph_queue);
- splx(s);
pool_put(&inpcb_pool, inp);
+ splx(s);
}
void
Index: netinet/ip_flow.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/ip_flow.c,v
retrieving revision 1.29
diff -u -r1.29 ip_flow.c
--- netinet/ip_flow.c 3 Feb 2005 22:43:34 -0000 1.29
+++ netinet/ip_flow.c 19 Jul 2006 21:22:38 -0000
@@ -291,7 +291,9 @@
ipflow_addstats(ipf);
RTFREE(ipf->ipf_ro.ro_rt);
ipflow_inuse--;
+ s = splnet();
pool_put(&ipflow_pool, ipf);
+ splx(s);
}
struct ipflow *
@@ -385,7 +387,9 @@
if (ipflow_inuse >= ip_maxflows) {
ipf = ipflow_reap(1);
} else {
+ s = splnet()
ipf = pool_get(&ipflow_pool, PR_NOWAIT);
+ splx(s);
if (ipf == NULL)
return;
ipflow_inuse++;
Index: netinet/ip_input.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/ip_input.c,v
retrieving revision 1.212.2.2
diff -u -r1.212.2.2 ip_input.c
--- netinet/ip_input.c 6 May 2005 08:40:14 -0000 1.212.2.2
+++ netinet/ip_input.c 19 Jul 2006 21:22:38 -0000
@@ -483,12 +483,13 @@
int downmatch;
int checkif;
int srcrt = 0;
+ int s;
u_int hash;
#ifdef FAST_IPSEC
struct m_tag *mtag;
struct tdb_ident *tdbi;
struct secpolicy *sp;
- int s, error;
+ int error;
#endif /* FAST_IPSEC */
MCLAIM(m, &ip_rx_mowner);
@@ -937,7 +938,9 @@
*/
if (mff || ip->ip_off != htons(0)) {
ipstat.ips_fragments++;
+ s = splvm();
ipqe = pool_get(&ipqent_pool, PR_NOWAIT);
+ splx(s);
if (ipqe == NULL) {
ipstat.ips_rcvmemdrop++;
IPQ_UNLOCK();
@@ -1050,7 +1053,7 @@
struct ip *ip;
struct mbuf *t;
int hlen = ipqe->ipqe_ip->ip_hl << 2;
- int i, next;
+ int i, next, s;
IPQ_LOCK_CHECK();
@@ -1155,7 +1158,9 @@
nq = TAILQ_NEXT(q, ipqe_q);
m_freem(q->ipqe_m);
TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q);
+ s = splvm();
pool_put(&ipqent_pool, q);
+ splx(s);
fp->ipq_nfrags--;
ip_nfrags--;
}
@@ -1196,11 +1201,15 @@
m->m_next = 0;
m_cat(m, t);
nq = TAILQ_NEXT(q, ipqe_q);
+ s = splvm();
pool_put(&ipqent_pool, q);
+ splx(s);
for (q = nq; q != NULL; q = nq) {
t = q->ipqe_m;
nq = TAILQ_NEXT(q, ipqe_q);
+ s = splvm();
pool_put(&ipqent_pool, q);
+ splx(s);
m_cat(m, t);
}
ip_nfrags -= fp->ipq_nfrags;
@@ -1235,7 +1244,9 @@
ip_nfrags--;
ipstat.ips_fragdropped++;
m_freem(m);
+ s = splvm();
pool_put(&ipqent_pool, ipqe);
+ splx(s);
return (0);
}
@@ -1248,6 +1259,7 @@
{
struct ipqent *q, *p;
u_int nfrags = 0;
+ int s;
IPQ_LOCK_CHECK();
@@ -1256,7 +1268,9 @@
m_freem(q->ipqe_m);
nfrags++;
TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q);
+ s = splvm();
pool_put(&ipqent_pool, q);
+ splx(s);
}
if (nfrags != fp->ipq_nfrags)
Index: netinet/raw_ip.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/raw_ip.c,v
retrieving revision 1.86
diff -u -r1.86 raw_ip.c
--- netinet/raw_ip.c 11 Mar 2005 06:16:16 -0000 1.86
+++ netinet/raw_ip.c 19 Jul 2006 21:22:38 -0000
@@ -514,6 +514,8 @@
return (in_control(so, (long)m, (caddr_t)nam,
(struct ifnet *)control, p));
+ s = splsoftnet();
+
if (req == PRU_PURGEIF) {
in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control);
in_purgeif((struct ifnet *)control);
@@ -521,7 +523,6 @@
return (0);
}
- s = splsoftnet();
inp = sotoinpcb(so);
#ifdef DIAGNOSTIC
if (req != PRU_SEND && req != PRU_SENDOOB && control)
Index: netinet/tcp_input.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_input.c,v
retrieving revision 1.224.2.2
diff -u -r1.224.2.2 tcp_input.c
--- netinet/tcp_input.c 28 Apr 2005 11:02:08 -0000 1.224.2.2
+++ netinet/tcp_input.c 19 Jul 2006 21:22:38 -0000
@@ -3298,7 +3298,7 @@
panic("syn_cache_insert: bucketoverflow: impossible");
#endif
SYN_CACHE_RM(sc2);
- SYN_CACHE_PUT(sc2);
+ SYN_CACHE_PUT(sc2); /* calls pool_put but see spl above */
} else if (syn_cache_count >= tcp_syn_cache_limit) {
struct syn_cache_head *scp2, *sce;
@@ -3332,7 +3332,7 @@
}
sc2 = TAILQ_FIRST(&scp2->sch_bucket);
SYN_CACHE_RM(sc2);
- SYN_CACHE_PUT(sc2);
+ SYN_CACHE_PUT(sc2); /* calls pool_put but see spl above */
}
/*
@@ -3402,7 +3402,7 @@
dropit:
tcpstat.tcps_sc_timed_out++;
SYN_CACHE_RM(sc);
- SYN_CACHE_PUT(sc);
+ SYN_CACHE_PUT(sc); /* calls pool_put but see spl above */
splx(s);
}
@@ -3427,7 +3427,7 @@
panic("invalid sc_tp in syn_cache_cleanup");
#endif
SYN_CACHE_RM(sc);
- SYN_CACHE_PUT(sc);
+ SYN_CACHE_PUT(sc); /* calls pool_put but see spl above */
}
/* just for safety */
LIST_INIT(&tp->t_sc);
@@ -3772,7 +3772,9 @@
tp->t_dupacks = 0;
tcpstat.tcps_sc_completed++;
+ s = splsoftnet();
SYN_CACHE_PUT(sc);
+ splx(s);
return (so);
resetandabort:
@@ -3780,7 +3782,9 @@
abort:
if (so != NULL)
(void) soabort(so);
+ s = splsoftnet();
SYN_CACHE_PUT(sc);
+ splx(s);
tcpstat.tcps_sc_aborted++;
return ((struct socket *)(-1));
}
@@ -3808,9 +3812,9 @@
return;
}
SYN_CACHE_RM(sc);
- splx(s);
tcpstat.tcps_sc_reset++;
- SYN_CACHE_PUT(sc);
+ SYN_CACHE_PUT(sc); /* calls pool_put but see spl above */
+ splx(s);
}
void
@@ -3847,9 +3851,9 @@
}
SYN_CACHE_RM(sc);
- splx(s);
tcpstat.tcps_sc_unreach++;
- SYN_CACHE_PUT(sc);
+ SYN_CACHE_PUT(sc); /* calls pool_put but see spl above */
+ splx(s);
}
/*
@@ -3877,6 +3881,7 @@
struct syn_cache_head *scp;
struct mbuf *ipopts;
struct tcp_opt_info opti;
+ int s;
tp = sototcpcb(so);
@@ -3948,7 +3953,9 @@
return (1);
}
+ s = splsoftnet();
sc = pool_get(&syn_cache_pool, PR_NOWAIT);
+ splx(s);
if (sc == NULL) {
if (ipopts)
(void) m_free(ipopts);
@@ -4026,7 +4033,9 @@
tcpstat.tcps_sndacks++;
tcpstat.tcps_sndtotal++;
} else {
+ s = splsoftnet();
SYN_CACHE_PUT(sc);
+ splx(s);
tcpstat.tcps_sc_dropped++;
}
return (1);
Index: netinet/tcp_sack.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_sack.c,v
retrieving revision 1.10.2.3
diff -u -r1.10.2.3 tcp_sack.c
--- netinet/tcp_sack.c 11 May 2005 18:08:51 -0000 1.10.2.3
+++ netinet/tcp_sack.c 19 Jul 2006 21:22:38 -0000
@@ -190,7 +190,7 @@
struct sackhole *cur = NULL;
struct sackhole *tmp = NULL;
u_int32_t *lp = (u_int32_t *) (cp + 2);
- int i, j, num_sack_blks;
+ int i, j, num_sack_blks, s;
tcp_seq left, right, acked;
/*
@@ -246,6 +246,9 @@
t_sack_block[j].right = right;
}
+ /* XXX: Investigate making this a bit more fine-grained. */
+ s = splsoftnet();
+
/* Update the scoreboard. */
cur = TAILQ_FIRST(&tp->snd_holes);
for (i = 0; i < num_sack_blks; i++) {
@@ -260,12 +263,14 @@
if (TAILQ_EMPTY(&tp->snd_holes)) {
/* First hole. */
if (tcp_sack_globalholes >= tcp_sack_globalmaxholes) {
+ splx(s);
return;
}
cur = (struct sackhole *)
pool_get(&sackhole_pool, PR_NOWAIT);
if (cur == NULL) {
/* ENOBUFS, bail out*/
+ splx(s);
return;
}
cur->start = th->th_ack;
@@ -328,12 +333,14 @@
tcp_sack_globalmaxholes ||
tp->snd_numholes >=
tcp_sack_tp_maxholes) {
+ splx(s);
return;
}
tmp = (struct sackhole *)
pool_get(&sackhole_pool, PR_NOWAIT);
if (tmp == NULL) {
/* ENOBUFS, bail out. */
+ splx(s);
return;
}
tmp->start = sack->right;
@@ -359,6 +366,7 @@
tcp_sack_globalmaxholes ||
tp->snd_numholes >=
tcp_sack_tp_maxholes) {
+ splx(s);
return;
}
tmp = (struct sackhole *)
@@ -377,6 +385,8 @@
tp->rcv_lastsack = sack->right;
}
}
+
+ splx(s);
}
void
@@ -387,6 +397,9 @@
th->th_ack : tp->snd_una;
struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
struct sackhole *tmp;
+ int s;
+
+ s = splsoftnet();
while (cur) {
if (SEQ_LEQ(cur->end, lastack)) {
@@ -405,12 +418,17 @@
break;
}
+
+ splx(s);
}
void
tcp_free_sackholes(struct tcpcb *tp)
{
struct sackhole *sack;
+ int s;
+
+ s = splsoftnet();
/* Free up the SACK hole list. */
while (!TAILQ_EMPTY(&tp->snd_holes)) {
@@ -421,6 +439,8 @@
}
tp->snd_numholes = 0;
+
+ splx(s);
}
/*
Index: netinet/tcp_subr.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.187.2.3
diff -u -r1.187.2.3 tcp_subr.c
--- netinet/tcp_subr.c 6 May 2005 08:39:52 -0000 1.187.2.3
+++ netinet/tcp_subr.c 19 Jul 2006 21:22:38 -0000
@@ -959,7 +959,7 @@
int i;
/* XXX Consider using a pool_cache for speed. */
- tp = pool_get(&tcpcb_pool, PR_NOWAIT);
+ tp = pool_get(&tcpcb_pool, PR_NOWAIT); /* splsoftnet via tcp_usrreq */
if (tp == NULL)
return (NULL);
memcpy(tp, &tcpcb_template, sizeof(*tp));
@@ -1002,7 +1002,7 @@
}
#endif /* INET6 */
default:
- pool_put(&tcpcb_pool, tp);
+ pool_put(&tcpcb_pool, tp); /* splsoftnet via tcp_usrreq */
return (NULL);
}
@@ -1075,7 +1075,7 @@
/* not quite there yet -- count separately? */
return dead;
tcpstat.tcps_delayed_free++;
- pool_put(&tcpcb_pool, tp);
+ pool_put(&tcpcb_pool, tp); /* splsoftnet via tcp_timer.c */
}
return dead;
}
@@ -1098,6 +1098,7 @@
struct rtentry *rt;
#endif
struct route *ro;
+ int s;
inp = tp->t_inpcb;
#ifdef INET6
@@ -1203,7 +1204,11 @@
if (tcp_timers_invoking(tp))
tp->t_flags |= TF_DEAD;
else
+ {
+ s = splsoftnet(); /* do we trust all callers of tcp_close()? */
pool_put(&tcpcb_pool, tp);
+ splx(s);
+ }
if (inp) {
inp->inp_ppcb = 0;
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.100.2.2
diff -u -r1.100.2.2 tcp_usrreq.c
--- netinet/tcp_usrreq.c 6 May 2005 08:35:27 -0000 1.100.2.2
+++ netinet/tcp_usrreq.c 19 Jul 2006 21:22:38 -0000
@@ -199,6 +199,8 @@
}
}
+ s = splsoftnet();
+
if (req == PRU_PURGEIF) {
switch (family) {
#ifdef INET
@@ -221,7 +223,6 @@
return (0);
}
- s = splsoftnet();
switch (family) {
#ifdef INET
case PF_INET:
Index: netinet/udp_usrreq.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.134.2.6
diff -u -r1.134.2.6 udp_usrreq.c
--- netinet/udp_usrreq.c 29 Dec 2005 16:10:18 -0000 1.134.2.6
+++ netinet/udp_usrreq.c 19 Jul 2006 21:22:38 -0000
@@ -1136,14 +1136,16 @@
return (in_control(so, (long)m, (caddr_t)nam,
(struct ifnet *)control, p));
+ s = splsoftnet();
+
if (req == PRU_PURGEIF) {
in_pcbpurgeif0(&udbtable, (struct ifnet *)control);
in_purgeif((struct ifnet *)control);
in_pcbpurgeif(&udbtable, (struct ifnet *)control);
+ splx(s);
return (0);
}
- s = splsoftnet();
inp = sotoinpcb(so);
#ifdef DIAGNOSTIC
if (req != PRU_SEND && req != PRU_SENDOOB && control)
Index: netinet6/in6_pcb.c
===================================================================
RCS file: /cvsroot/src/sys/netinet6/in6_pcb.c,v
retrieving revision 1.66
diff -u -r1.66 in6_pcb.c
--- netinet6/in6_pcb.c 4 Dec 2004 16:10:25 -0000 1.66
+++ netinet6/in6_pcb.c 19 Jul 2006 21:22:38 -0000
@@ -151,7 +151,9 @@
int error;
#endif
+ s = splnet();
in6p = pool_get(&in6pcb_pool, PR_NOWAIT);
+ splx(s);
if (in6p == NULL)
return (ENOBUFS);
bzero((caddr_t)in6p, sizeof(*in6p));
@@ -163,7 +165,9 @@
#if defined(IPSEC) || defined(FAST_IPSEC)
error = ipsec_init_pcbpolicy(so, &in6p->in6p_sp);
if (error != 0) {
+ s = splnet();
pool_put(&in6pcb_pool, in6p);
+ splx(s);
return error;
}
#endif /* IPSEC */
@@ -516,8 +520,8 @@
LIST_REMOVE(&in6p->in6p_head, inph_lhash);
CIRCLEQ_REMOVE(&in6p->in6p_table->inpt_queue, &in6p->in6p_head,
inph_queue);
- splx(s);
pool_put(&in6pcb_pool, in6p);
+ splx(s);
}
void