tech-kern archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: fix gif(4)'s softint(9) contract violation [was Re: RFC: gif(4) MP-ify]
Date: Fri, 22 Jan 2016 13:37:23 +0900
From: Kengo NAKAHARA <k-nakahara%iij.ad.jp@localhost>
I implement this processing. Here is the patch.
http://netbsd.org/~knakahara/fix-gif-softint/fix-gif-softint.patch
The patch includes reverting cvs kern_softint.c:r1.42. Furthermore,
the patch fixes the race between encap[46]_lookup(rnh->rnh_matchaddr)
and encap_detach(rnh->rnh_deladdr) keeping packet processing path
lockless.
# I think radix tree must be required lock.
However, this patch doesn't make gif(4) mp-ify yet. I will send gif(4)
mp-ify patch later.
Could you comment this patch?
Your patch has two largely independent functional changes in it:
1. fix the softint disestablish issue by adding a pause operation, and
2. make ip_encap MP-scalable by using a pserialized list instead of a
giant-locked radix tree.
Part (1) can be done a little more easily, I think -- I've attached a
compile-tested patch that does only part (1), by calling encap_detach
before softint_disestablish. It doesn't seem to require any special
cooperation from gif_output or gifintr. I probably overlooked
something, though.
Part (2) makes ip_encap scale well to many cores, but it stops
ip_encap from scaling well to many tunnels, because it removes the
radix tree. Maybe that's OK -- maybe today, scaling to many cores is
more important than scaling to many tunnels. But there's a comment in
the old code suggesting that scaling to many tunnels may be important:
/*
* The code will use radix table for tunnel lookup, for
* tunnels registered with encap_attach() with a addr/mask pair.
* Faster on machines with thousands of tunnel registerations (= interfaces).
...
Do you have a plan for how to address scaling to many tunnels too?
One heavy-handed but easy way that we might keep the radix tree *and*
scale to many cores would be to simply drop encapsulated packets while
anyone is reconfiguring a tunnel and modifying the radix tree.
I think you could do something like the following. I've also added a
reference count to each tunnel in this sketch. I did this because
without the reference count, it's not clear to me how to ensure that
the tunnel will continue to exist after the pserialize_read_exit.
static struct {
kmutex_t lock;
struct lwp *changing;
kcondvar_t cv;
pserialize_t psz;
LIST_HEAD(, encaptab) head;
} encaptab __cacheline_aligned;
encap4_input(m)
{
s = pserialize_read_enter();
if (encaptab.changing) {
/*
* Someone is reconfiguring a tunnel. Drop packets
* until they're done.
*/
pserialize_read_exit(s);
m_freem(m);
return;
}
membar_consumer();
/*
* Radix tree and encaptab list are now stable on this CPU
* until pserialize_read_exit.
*/
match = encap4_lookup(m, off, proto, INBOUND);
pserialize_read_exit(s);
if (match) {
esw = match->esw;
if (esw && esw->encapsw4.pr_input)
(*esw->encapsw4.pr_input)(m, off, proto, match);
else
m_freem(m);
encap_release(match);
return;
}
rip_input(m, off, proto);
}
encap4_lookup(...)
{
...
rn = rnh->rnh_matchaddr(...);
if (rn && (rn->rn_flags & RNF_ROOT) == 0) {
...
}
LIST_FOREACH(ep, &encaptab.head, chain) {
membar_datadep_consumer();
...
}
if (match != NULL)
encap_acquire(match);
return match;
}
encap_acquire(ep)
{
refcount_inc(&ep->refcount);
}
encap_release(ep)
{
refcount_dec_broadcast(&ep->refcount);
}
encap_change_enter()
{
mutex_enter(&encaptab.lock);
while (encaptab.changing)
cv_wait(&encaptab.cv, &encaptab.lock);
encaptab.changing = curlwp;
/* Wait for all CPUs to notice that we're changing. */
pserialize_perform(encaptab.psz);
mutex_exit(&encaptab.lock);
}
encap_change_exit()
{
mutex_enter(&encaptab.lock);
KASSERT(encaptab.changing == curlwp);
encaptab.changing = NULL;
cv_broadcast(&encaptab.cv);
mutex_exit(&encaptab.lock);
}
encap_add(ep)
{
encap_change_enter();
LIST_INSERT_HEAD(&encaptab.head, ep, chain);
... rnh->rnh_addaddr(...) ...
encap_change_exit();
}
encap_remove(ep)
{
encap_change_enter();
LIST_REMOVE(ep, chain);
/* Wait for all CPUs to stop looking up this ep. */
pserialize_perform(encaptab.psz);
... rnh->rnh_deladdr(...) ...
encap_change_exit();
/* Wait for all users in pr_input to finish. */
refcount_dec_drain(&ep->refcount);
}
Index: sys/net/if_gif.c
===================================================================
RCS file: /cvsroot/src/sys/net/if_gif.c,v
retrieving revision 1.105
diff -p -u -r1.105 if_gif.c
--- sys/net/if_gif.c 18 Jan 2016 06:08:26 -0000 1.105
+++ sys/net/if_gif.c 22 Jan 2016 15:34:43 -0000
@@ -99,6 +99,7 @@ static int gif_clone_destroy(struct ifne
static int gif_check_nesting(struct ifnet *, struct mbuf *);
static int gif_encap_attach(struct gif_softc *);
+static int gif_encap_pause(struct gif_softc *);
static int gif_encap_detach(struct gif_softc *);
static struct if_clone gif_cloner =
@@ -750,6 +751,33 @@ gif_encap_attach(struct gif_softc *sc)
}
static int
+gif_encap_pause(struct gif_softc *sc)
+{
+ int error;
+
+ if (sc == NULL || sc->gif_psrc == NULL)
+ return EINVAL;
+
+ switch (sc->gif_psrc->sa_family) {
+#ifdef INET
+ case AF_INET:
+ error = in_gif_pause(sc);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ error = in6_gif_pause(sc);
+ break;
+#endif
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return error;
+}
+
+static int
gif_encap_detach(struct gif_softc *sc)
{
int error;
@@ -815,7 +843,11 @@ gif_set_tunnel(struct ifnet *ifp, struct
return ENOMEM;
}
- /* Firstly, clear old configurations. */
+ /* Prevent further scheduling of the softint. */
+ if (sc->gif_psrc)
+ (void)gif_encap_pause(sc);
+
+ /* Disestablish the softint. */
if (sc->gif_si) {
osrc = sc->gif_psrc;
odst = sc->gif_pdst;
@@ -843,12 +875,14 @@ gif_set_tunnel(struct ifnet *ifp, struct
osrc = NULL;
odst = NULL;
}
+
+ /* Detach encap. */
/* XXX we can detach from both, but be polite just in case */
if (sc->gif_psrc)
(void)gif_encap_detach(sc);
/*
- * Secondly, try to set new configurations.
+ * Try to set new configurations.
* If the setup failed, rollback to old configurations.
*/
do {
Index: sys/netinet/in_gif.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/in_gif.c,v
retrieving revision 1.70
diff -p -u -r1.70 in_gif.c
--- sys/netinet/in_gif.c 22 Jan 2016 05:15:10 -0000 1.70
+++ sys/netinet/in_gif.c 22 Jan 2016 15:34:43 -0000
@@ -385,7 +385,7 @@ in_gif_attach(struct gif_softc *sc)
}
int
-in_gif_detach(struct gif_softc *sc)
+in_gif_pause(struct gif_softc *sc)
{
int error;
@@ -393,7 +393,14 @@ in_gif_detach(struct gif_softc *sc)
if (error == 0)
sc->encap_cookie4 = NULL;
+ return error;
+}
+
+int
+in_gif_detach(struct gif_softc *sc)
+{
+
rtcache_free(&sc->gif_ro);
- return error;
+ return 0;
}
Index: sys/netinet/in_gif.h
===================================================================
RCS file: /cvsroot/src/sys/netinet/in_gif.h,v
retrieving revision 1.14
diff -p -u -r1.14 in_gif.h
--- sys/netinet/in_gif.h 23 Nov 2006 04:07:07 -0000 1.14
+++ sys/netinet/in_gif.h 22 Jan 2016 15:34:43 -0000
@@ -44,6 +44,7 @@ int in_gif_output(struct ifnet *, int, s
int gif_encapcheck4(struct mbuf *, int, int, void *);
#endif
int in_gif_attach(struct gif_softc *);
+int in_gif_pause(struct gif_softc *);
int in_gif_detach(struct gif_softc *);
#endif /* !_NETINET_IN_GIF_H_ */
Index: sys/netinet6/in6_gif.c
===================================================================
RCS file: /cvsroot/src/sys/netinet6/in6_gif.c,v
retrieving revision 1.67
diff -p -u -r1.67 in6_gif.c
--- sys/netinet6/in6_gif.c 22 Jan 2016 05:15:10 -0000 1.67
+++ sys/netinet6/in6_gif.c 22 Jan 2016 15:34:43 -0000
@@ -384,7 +384,7 @@ in6_gif_attach(struct gif_softc *sc)
}
int
-in6_gif_detach(struct gif_softc *sc)
+in6_gif_pause(struct gif_softc *sc)
{
int error;
@@ -392,9 +392,16 @@ in6_gif_detach(struct gif_softc *sc)
if (error == 0)
sc->encap_cookie6 = NULL;
+ return error;
+}
+
+int
+in6_gif_detach(struct gif_softc *sc)
+{
+
rtcache_free(&sc->gif_ro);
- return error;
+ return 0;
}
void *
Index: sys/netinet6/in6_gif.h
===================================================================
RCS file: /cvsroot/src/sys/netinet6/in6_gif.h,v
retrieving revision 1.13
diff -p -u -r1.13 in6_gif.h
--- sys/netinet6/in6_gif.h 24 Apr 2008 11:38:38 -0000 1.13
+++ sys/netinet6/in6_gif.h 22 Jan 2016 15:34:43 -0000
@@ -44,6 +44,7 @@ int in6_gif_output(struct ifnet *, int,
int gif_encapcheck6(struct mbuf *, int, int, void *);
#endif
int in6_gif_attach(struct gif_softc *);
+int in6_gif_pause(struct gif_softc *);
int in6_gif_detach(struct gif_softc *);
void *in6_gif_ctlinput(int, const struct sockaddr *, void *);
Home |
Main Index |
Thread Index |
Old Index