Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/net/npf npf_session_setnat: fix the race condition when ...



details:   https://anonhg.NetBSD.org/src/rev/0b44a0a721c3
branches:  trunk
changeset: 790988:0b44a0a721c3
user:      rmind <rmind%NetBSD.org@localhost>
date:      Tue Oct 29 16:39:10 2013 +0000

description:
npf_session_setnat: fix the race condition when the old connection is still
being expired while a new/duplicate is being created.

diffstat:

 sys/net/npf/npf_impl.h    |    4 +-
 sys/net/npf/npf_nat.c     |    6 +-
 sys/net/npf/npf_session.c |  110 ++++++++++++++++++++++++---------------------
 3 files changed, 63 insertions(+), 57 deletions(-)

diffs (truncated from 302 to 300 lines):

diff -r 92ac87fb3897 -r 0b44a0a721c3 sys/net/npf/npf_impl.h
--- a/sys/net/npf/npf_impl.h    Tue Oct 29 16:19:28 2013 +0000
+++ b/sys/net/npf/npf_impl.h    Tue Oct 29 16:39:10 2013 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: npf_impl.h,v 1.34 2013/10/27 16:22:08 rmind Exp $      */
+/*     $NetBSD: npf_impl.h,v 1.35 2013/10/29 16:39:10 rmind Exp $      */
 
 /*-
  * Copyright (c) 2009-2013 The NetBSD Foundation, Inc.
@@ -290,7 +290,7 @@
 void           npf_session_expire(npf_session_t *);
 bool           npf_session_pass(const npf_session_t *, npf_rproc_t **);
 void           npf_session_setpass(npf_session_t *, npf_rproc_t *);
-int            npf_session_setnat(npf_session_t *, npf_nat_t *, const int);
+int            npf_session_setnat(npf_session_t *, npf_nat_t *, u_int);
 npf_nat_t *    npf_session_retnat(npf_session_t *, const int, bool *);
 
 void           npf_session_load(npf_sehash_t *);
diff -r 92ac87fb3897 -r 0b44a0a721c3 sys/net/npf/npf_nat.c
--- a/sys/net/npf/npf_nat.c     Tue Oct 29 16:19:28 2013 +0000
+++ b/sys/net/npf/npf_nat.c     Tue Oct 29 16:39:10 2013 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: npf_nat.c,v 1.20 2013/06/02 02:20:04 rmind Exp $       */
+/*     $NetBSD: npf_nat.c,v 1.21 2013/10/29 16:39:10 rmind Exp $       */
 
 /*-
  * Copyright (c) 2010-2013 The NetBSD Foundation, Inc.
@@ -76,7 +76,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.20 2013/06/02 02:20:04 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.21 2013/10/29 16:39:10 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -677,7 +677,7 @@
                 * Note: packet now has a translated address in the cache.
                 */
                nt->nt_session = se;
-               error = npf_session_setnat(se, nt, di);
+               error = npf_session_setnat(se, nt, np->n_type);
 out:
                if (error) {
                        /* If session was for NAT only - expire it. */
diff -r 92ac87fb3897 -r 0b44a0a721c3 sys/net/npf/npf_session.c
--- a/sys/net/npf/npf_session.c Tue Oct 29 16:19:28 2013 +0000
+++ b/sys/net/npf/npf_session.c Tue Oct 29 16:39:10 2013 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: npf_session.c,v 1.25 2013/09/26 00:24:36 rmind Exp $   */
+/*     $NetBSD: npf_session.c,v 1.26 2013/10/29 16:39:10 rmind Exp $   */
 
 /*-
  * Copyright (c) 2010-2013 The NetBSD Foundation, Inc.
@@ -92,7 +92,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.25 2013/09/26 00:24:36 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.26 2013/10/29 16:39:10 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -153,7 +153,7 @@
                uint16_t        if_idx;
        } s_common_id;
        /* Flags and the protocol state. */
-       int                     s_flags;
+       u_int                   s_flags;
        npf_state_t             s_state;
        /* Association of rule procedure data. */
        npf_rproc_t *           s_rproc;
@@ -176,18 +176,20 @@
 };
 
 /*
- * Session flags:
- * - PFIL_IN and PFIL_OUT values are reserved for direction.
- * - SE_ACTIVE: session is active i.e. visible on inspection.
- * - SE_PASS: a "pass" session.
- * - SE_EXPIRE: explicitly expire the session.
- * - SE_REMOVING: session is being removed (indicate need to enter G/C list).
+ * Session flags: PFIL_IN and PFIL_OUT values are reserved for direction.
  */
 CTASSERT(PFIL_ALL == (0x001 | 0x002));
-#define        SE_ACTIVE               0x004
-#define        SE_PASS                 0x008
-#define        SE_EXPIRE               0x010
-#define        SE_REMOVING             0x020
+#define        SE_ACTIVE               0x004   /* visible on inspection */
+#define        SE_PASS                 0x008   /* perform implicit passing */
+#define        SE_EXPIRE               0x010   /* explicitly expire */
+
+/*
+ * Flags to indicate removal of forwards/backwards session entries or
+ * completion of session removal itself (i.e. both entries).
+ */
+#define        SE_REMFORW              0x020
+#define        SE_REMBACK              0x040
+#define        SE_REMOVED              (SE_REMFORW | SE_REMBACK)
 
 /*
  * Session tracking state: disabled (off), enabled (on) or flush request.
@@ -488,7 +490,7 @@
        npf_sentry_t senkey, *sen;
        npf_session_t *se;
        npf_sehash_t *sh;
-       int flags;
+       u_int flags;
 
        if (!npf_session_fillent(npc, &senkey)) {
                return NULL;
@@ -699,7 +701,6 @@
 static void
 npf_session_destroy(npf_session_t *se)
 {
-
        if (se->s_nat) {
                /* Release any NAT related structures. */
                npf_nat_expire(se->s_nat);
@@ -723,7 +724,7 @@
  * and re-insert session entry accordingly.
  */
 int
-npf_session_setnat(npf_session_t *se, npf_nat_t *nt, const int di)
+npf_session_setnat(npf_session_t *se, npf_nat_t *nt, u_int ntype)
 {
        npf_sehash_t *sh;
        npf_sentry_t *sen;
@@ -735,56 +736,60 @@
 
        /* First, atomically check and associate NAT entry. */
        if (atomic_cas_ptr(&se->s_nat, NULL, nt) != NULL) {
-               /* Race: see below for description. */
+               /* Race with a duplicate packet. */
                npf_stats_inc(NPF_STAT_RACE_NAT);
                return EISCONN;
        }
 
-       /*
-        * Update, re-hash and re-insert "backwards" entry, according to
-        * the translation.  First, remove the entry from tree.  Note that
-        * a duplicate packet may establish a duplicate session while lock
-        * will be released.  In such case, caller will drop this packet
-        * and structures associated with it.  Such race condition should
-        * never happen in practice, though.
-        */
        sen = &se->s_back_entry;
        sh = sess_hash_bucket(sess_hashtbl, &se->s_common_id, sen);
 
+       /*
+        * Note: once the lock is release, the session might be a G/C
+        * target, therefore keep SE_REMBACK bit set until re-insert.
+        */
        rw_enter(&sh->sh_lock, RW_WRITER);
        rb_tree_remove_node(&sh->sh_tree, sen);
        sh->sh_count--;
        rw_exit(&sh->sh_lock);
 
        /*
-        * New source/destination and hash.  Note that source/destination
-        * are inverted, since we are handling "backwards" entry.
+        * Update the source/destination IDs and rehash.  Note that we are
+        * handling the "backwards" entry, therefore the opposite mapping.
         */
        npf_nat_gettrans(nt, &taddr, &tport);
-       if (di == PFIL_OUT) {
-               /* NPF_NATOUT: source in "forwards" = destination. */
+       switch (ntype) {
+       case NPF_NATOUT:
+               /* Source in "forwards" => destination. */
                memcpy(&sen->se_dst_addr, taddr, sen->se_alen);
-               if (tport) {
+               if (tport)
                        sen->se_dst_id = tport;
-               }
-       } else {
-               /* NPF_NATIN: destination in "forwards" = source. */
+               break;
+       case NPF_NATIN:
+               /* Destination in "forwards" => source. */
                memcpy(&sen->se_src_addr, taddr, sen->se_alen);
-               if (tport) {
+               if (tport)
                        sen->se_src_id = tport;
-               }
+               break;
        }
        sh = sess_hash_bucket(sess_hashtbl, &se->s_common_id, sen);
 
-       /* Insert into the new bucket. */
+       /*
+        * Insert the entry back into a potentially new bucket.
+        *
+        * Note: synchronise with the G/C thread here for a case when the
+        * old session is still being expired while a duplicate is being
+        * created here.  This race condition is rare.
+        */
        rw_enter(&sh->sh_lock, RW_WRITER);
-       ok = (rb_tree_insert_node(&sh->sh_tree, sen) == sen);
+       ok = rb_tree_insert_node(&sh->sh_tree, sen) == sen;
        if (__predict_true(ok)) {
                sh->sh_count++;
                NPF_PRINTF(("NPF: se %p assoc with nat %p\n", se, se->s_nat));
        } else {
-               /* FIXMEgc */
-               printf("npf_session_setnat: Houston, we've had a problem.\n");
+               /* Race: mark a removed entry and explicitly expire. */
+               atomic_or_uint(&se->s_flags, SE_REMBACK | SE_EXPIRE);
+               npf_stats_inc(NPF_STAT_RACE_NAT);
        }
        rw_exit(&sh->sh_lock);
        return ok ? 0 : EISCONN;
@@ -796,7 +801,6 @@
 void
 npf_session_expire(npf_session_t *se)
 {
-
        /* KASSERT(se->s_refcnt > 0); XXX: npf_nat_freepolicy() */
        atomic_or_uint(&se->s_flags, SE_EXPIRE);
 }
@@ -807,7 +811,6 @@
 bool
 npf_session_pass(const npf_session_t *se, npf_rproc_t **rp)
 {
-
        KASSERT(se->s_refcnt > 0);
        if ((se->s_flags & SE_PASS) != 0) {
                *rp = se->s_rproc;
@@ -843,7 +846,6 @@
 void
 npf_session_release(npf_session_t *se)
 {
-
        KASSERT(se->s_refcnt > 0);
        if ((se->s_flags & SE_ACTIVE) == 0) {
                /* Activate: after this point, session is globally visible. */
@@ -859,7 +861,6 @@
 npf_nat_t *
 npf_session_retnat(npf_session_t *se, const int di, bool *forw)
 {
-
        KASSERT(se->s_refcnt > 0);
        *forw = (se->s_flags & PFIL_ALL) == di;
        return se->s_nat;
@@ -904,6 +905,7 @@
                if (sh->sh_count == 0) {
                        continue;
                }
+
                rw_enter(&sh->sh_lock, RW_WRITER);
                /* For each (left -> right) ... */
                sen = rb_tree_iterate(&sh->sh_tree, NULL, RB_DIR_LEFT);
@@ -914,25 +916,27 @@
                        se = sen->se_backptr;
                        nsen = rb_tree_iterate(&sh->sh_tree, sen, RB_DIR_RIGHT);
                        if (!npf_session_expired(se, &tsnow) && !flushall) {
-                               KASSERT((se->s_flags & SE_REMOVING) == 0);
+                               KASSERT((se->s_flags & SE_REMOVED) == 0);
                                sen = nsen;
                                continue;
                        }
 
-                       /* Expired - remove from the tree. */
+                       /* Expired: remove from the tree. */
+                       atomic_or_uint(&se->s_flags, SE_EXPIRE);
                        rb_tree_remove_node(&sh->sh_tree, sen);
                        sh->sh_count--;
 
                        /*
-                        * Set removal bit when the first entry is removed.
-                        * If already set, then second entry has been removed,
-                        * therefore move the session into the G/C list.
+                        * Remove the session and move it to the G/C list,
+                        * if we are removing the forwards entry.  The list
+                        * is protected by its bucket lock.
                         */
-                       if (se->s_flags & SE_REMOVING) {
+                       if (&se->s_forw_entry == sen) {
+                               atomic_or_uint(&se->s_flags, SE_REMFORW);
                                LIST_REMOVE(se, s_list);
                                LIST_INSERT_HEAD(gc_list, se, s_list);
                        } else {
-                               atomic_or_uint(&se->s_flags, SE_REMOVING);
+                               atomic_or_uint(&se->s_flags, SE_REMBACK);
                        }
 
                        /* Next.. */
@@ -972,9 +976,11 @@
         */
        se = LIST_FIRST(&sess_gc_list);
        while (se != NULL) {
+               bool removed = (se->s_flags & SE_REMOVED) == SE_REMOVED;
+
                nse = LIST_NEXT(se, s_list);
-               if (se->s_refcnt == 0) {
-                       /* Destroy only if no references. */
+               if (removed && se->s_refcnt == 0) {
+                       /* Destroy only if removed and no references. */
                        LIST_REMOVE(se, s_list);



Home | Main Index | Thread Index | Old Index