Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys Sleep queues & turnstiles:



details:   https://anonhg.NetBSD.org/src/rev/1a20df986e4c
branches:  trunk
changeset: 461280:1a20df986e4c
user:      ad <ad%NetBSD.org@localhost>
date:      Thu Nov 21 18:56:55 2019 +0000

description:
Sleep queues & turnstiles:

- Avoid false sharing.
- Make the turnstile hash function more suitable.
- Increase turnstile hash table size.
- Make amends by having only one set of system wide sleep queue hash locks.

diffstat:

 sys/kern/kern_sleepq.c    |  30 ++++++++-------
 sys/kern/kern_turnstile.c |  84 +++++++++++++++++++++++++++-------------------
 sys/sys/sleepq.h          |  26 +++++++-------
 3 files changed, 78 insertions(+), 62 deletions(-)

diffs (truncated from 363 to 300 lines):

diff -r f1124ce13efd -r 1a20df986e4c sys/kern/kern_sleepq.c
--- a/sys/kern/kern_sleepq.c    Thu Nov 21 18:46:40 2019 +0000
+++ b/sys/kern/kern_sleepq.c    Thu Nov 21 18:56:55 2019 +0000
@@ -1,7 +1,7 @@
-/*     $NetBSD: kern_sleepq.c,v 1.51 2016/07/03 14:24:58 christos Exp $        */
+/*     $NetBSD: kern_sleepq.c,v 1.52 2019/11/21 18:56:55 ad Exp $      */
 
 /*-
- * Copyright (c) 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
+ * Copyright (c) 2006, 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.51 2016/07/03 14:24:58 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.52 2019/11/21 18:56:55 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -65,7 +65,8 @@
 static int     sleepq_sigtoerror(lwp_t *, int);
 
 /* General purpose sleep table, used by mtsleep() and condition variables. */
-sleeptab_t     sleeptab        __cacheline_aligned;
+sleeptab_t     sleeptab __cacheline_aligned;
+kmutex_t       *sleepq_locks[SLEEPTAB_HASH_SIZE] __read_mostly;
 
 /*
  * sleeptab_init:
@@ -75,14 +76,11 @@
 void
 sleeptab_init(sleeptab_t *st)
 {
-       sleepq_t *sq;
        int i;
 
        for (i = 0; i < SLEEPTAB_HASH_SIZE; i++) {
-               sq = &st->st_queues[i].st_queue;
-               st->st_queues[i].st_mutex =
-                   mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
-               sleepq_init(sq);
+               sleepq_locks[i] = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
+               sleepq_init(&st->st_queue[i]);
        }
 }
 
@@ -266,8 +264,12 @@
                /* The LWP and sleep queue are now unlocked. */
                if (timo) {
                        /*
-                        * Even if the callout appears to have fired, we need to
-                        * stop it in order to synchronise with other CPUs.
+                        * Even if the callout appears to have fired, we
+                        * need to stop it in order to synchronise with
+                        * other CPUs.  It's important that we do this in
+                        * this LWP's context, and not during wakeup, in
+                        * order to keep the callout & its cache lines
+                        * co-located on the CPU with the LWP.
                         */
                        if (callout_halt(&l->l_timeout_ch, NULL))
                                error = EWOULDBLOCK;
@@ -333,10 +335,10 @@
  *
  *     Remove an LWP from its sleep queue and set it runnable again. 
  *     sleepq_unsleep() is called with the LWP's mutex held, and will
- *     always release it.
+ *     release it if "unlock" is true.
  */
 void
-sleepq_unsleep(lwp_t *l, bool cleanup)
+sleepq_unsleep(lwp_t *l, bool unlock)
 {
        sleepq_t *sq = l->l_sleepq;
        kmutex_t *mp = l->l_mutex;
@@ -345,7 +347,7 @@
        KASSERT(l->l_wchan != NULL);
 
        sleepq_remove(sq, l);
-       if (cleanup) {
+       if (unlock) {
                mutex_spin_exit(mp);
        }
 }
diff -r f1124ce13efd -r 1a20df986e4c sys/kern/kern_turnstile.c
--- a/sys/kern/kern_turnstile.c Thu Nov 21 18:46:40 2019 +0000
+++ b/sys/kern/kern_turnstile.c Thu Nov 21 18:56:55 2019 +0000
@@ -1,7 +1,7 @@
-/*     $NetBSD: kern_turnstile.c,v 1.32 2012/06/15 13:51:40 yamt Exp $ */
+/*     $NetBSD: kern_turnstile.c,v 1.33 2019/11/21 18:56:55 ad Exp $   */
 
 /*-
- * Copyright (c) 2002, 2006, 2007, 2009 The NetBSD Foundation, Inc.
+ * Copyright (c) 2002, 2006, 2007, 2009, 2019 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -56,11 +56,11 @@
  * grabs a free turnstile off the free list.  Otherwise, it can take back
  * the active turnstile from the lock (thus deactivating the turnstile).
  *
- * Turnstiles are the place to do priority inheritence.
+ * Turnstiles are where we do priority inheritence.
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.32 2012/06/15 13:51:40 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.33 2019/11/21 18:56:55 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/lockdebug.h>
@@ -69,17 +69,23 @@
 #include <sys/sleepq.h>
 #include <sys/systm.h>
 
-#define        TS_HASH_SIZE    64
+/*
+ * Shift of 6 aligns to typical cache line size of 64 bytes;  there's no
+ * point having two turnstile locks to back two lock objects that share one
+ * cache line.
+ */
+#define        TS_HASH_SIZE    128
 #define        TS_HASH_MASK    (TS_HASH_SIZE - 1)
-#define        TS_HASH(obj)    (((uintptr_t)(obj) >> 3) & TS_HASH_MASK)
+#define        TS_HASH(obj)    (((uintptr_t)(obj) >> 6) & TS_HASH_MASK)
 
-static tschain_t       turnstile_tab[TS_HASH_SIZE]     __cacheline_aligned;
-pool_cache_t           turnstile_cache                 __read_mostly;
+/* Keep the chains and mutex pointers apart to prevent false sharing. */
+static tschain_t       turnstile_chains[TS_HASH_SIZE] __cacheline_aligned;
+static kmutex_t                *turnstile_locks[TS_HASH_SIZE] __read_mostly;
+pool_cache_t           turnstile_cache __read_mostly;
+extern turnstile_t     turnstile0 __cacheline_aligned;
 
 static int             turnstile_ctor(void *, void *, int);
 
-extern turnstile_t     turnstile0;
-
 /*
  * turnstile_init:
  *
@@ -88,17 +94,15 @@
 void
 turnstile_init(void)
 {
-       tschain_t *tc;
        int i;
 
        for (i = 0; i < TS_HASH_SIZE; i++) {
-               tc = &turnstile_tab[i];
-               LIST_INIT(&tc->tc_chain);
-               tc->tc_mutex = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
+               LIST_INIT(&turnstile_chains[i]);
+               turnstile_locks[i] = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
        }
 
-       turnstile_cache = pool_cache_init(sizeof(turnstile_t), 0, 0, 0,
-           "tstilepl", NULL, IPL_NONE, turnstile_ctor, NULL, NULL);
+       turnstile_cache = pool_cache_init(sizeof(turnstile_t), coherency_unit,
+           0, 0, "tstile", NULL, IPL_NONE, turnstile_ctor, NULL, NULL);
        KASSERT(turnstile_cache != NULL);
 
        (void)turnstile_ctor(NULL, &turnstile0, 0);
@@ -165,11 +169,13 @@
 {
        turnstile_t *ts;
        tschain_t *tc;
+       u_int hash;
 
-       tc = &turnstile_tab[TS_HASH(obj)];
-       mutex_spin_enter(tc->tc_mutex);
+       hash = TS_HASH(obj);
+       tc = &turnstile_chains[hash];
+       mutex_spin_enter(turnstile_locks[hash]);
 
-       LIST_FOREACH(ts, &tc->tc_chain, ts_chain)
+       LIST_FOREACH(ts, tc, ts_chain)
                if (ts->ts_obj == obj)
                        return (ts);
 
@@ -188,10 +194,8 @@
 void
 turnstile_exit(wchan_t obj)
 {
-       tschain_t *tc;
 
-       tc = &turnstile_tab[TS_HASH(obj)];
-       mutex_spin_exit(tc->tc_mutex);
+       mutex_spin_exit(turnstile_locks[TS_HASH(obj)]);
 }
 
 /*
@@ -370,13 +374,17 @@
        lwp_t * const l = curlwp; /* cached curlwp */
        turnstile_t *ots;
        tschain_t *tc;
+       kmutex_t *lock;
        sleepq_t *sq;
        pri_t obase;
+       u_int hash;
 
-       tc = &turnstile_tab[TS_HASH(obj)];
+       hash = TS_HASH(obj);
+       tc = &turnstile_chains[hash];
+       lock = turnstile_locks[hash];
 
        KASSERT(q == TS_READER_Q || q == TS_WRITER_Q);
-       KASSERT(mutex_owned(tc->tc_mutex));
+       KASSERT(mutex_owned(lock));
        KASSERT(l != NULL && l->l_ts != NULL);
 
        if (ts == NULL) {
@@ -390,7 +398,7 @@
                        TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q]));
                ts->ts_obj = obj;
                ts->ts_inheritor = NULL;
-               LIST_INSERT_HEAD(&tc->tc_chain, ts, ts_chain);
+               LIST_INSERT_HEAD(tc, ts, ts_chain);
        } else {
                /*
                 * Object already has a turnstile.  Put our turnstile
@@ -411,8 +419,8 @@
 
        sq = &ts->ts_sleepq[q];
        ts->ts_waiters[q]++;
-       sleepq_enter(sq, l, tc->tc_mutex);
-       LOCKDEBUG_BARRIER(tc->tc_mutex, 1);
+       sleepq_enter(sq, l, lock);
+       LOCKDEBUG_BARRIER(lock, 1);
        l->l_kpriority = true;
        obase = l->l_kpribase;
        if (obase < PRI_KTHREAD)
@@ -425,7 +433,7 @@
         * to be interrupted while in a state of flux.
         */
        KPREEMPT_DISABLE(l);
-       KASSERT(tc->tc_mutex == l->l_mutex);
+       KASSERT(lock == l->l_mutex);
        turnstile_lendpri(l);
        sleepq_block(0, false);
        l->l_kpribase = obase;
@@ -442,15 +450,17 @@
 turnstile_wakeup(turnstile_t *ts, int q, int count, lwp_t *nl)
 {
        sleepq_t *sq;
-       tschain_t *tc;
+       kmutex_t *lock;
+       u_int hash;
        lwp_t *l;
 
-       tc = &turnstile_tab[TS_HASH(ts->ts_obj)];
+       hash = TS_HASH(ts->ts_obj);
+       lock = turnstile_locks[hash];
        sq = &ts->ts_sleepq[q];
 
        KASSERT(q == TS_READER_Q || q == TS_WRITER_Q);
        KASSERT(count > 0 && count <= TS_WAITERS(ts, q));
-       KASSERT(mutex_owned(tc->tc_mutex));
+       KASSERT(mutex_owned(lock));
        KASSERT(ts->ts_inheritor == curlwp || ts->ts_inheritor == NULL);
 
        /*
@@ -478,7 +488,7 @@
                        turnstile_remove(ts, l, q);
                }
        }
-       mutex_spin_exit(tc->tc_mutex);
+       mutex_spin_exit(lock);
 }
 
 /*
@@ -523,15 +533,19 @@
        turnstile_t *ts;
        tschain_t *tc;
        sleepq_t *rsq, *wsq;
+       kmutex_t *lock;
+       u_int hash;
        lwp_t *l;
 
-       tc = &turnstile_tab[TS_HASH(obj)];
+       hash = TS_HASH(obj);
+       tc = &turnstile_chains[hash];
+       lock = turnstile_locks[hash];
 
-       LIST_FOREACH(ts, &tc->tc_chain, ts_chain)
+       LIST_FOREACH(ts, tc, ts_chain)
                if (ts->ts_obj == obj)
                        break;
 
-       (*pr)("Turnstile chain at %p.\n", tc);
+       (*pr)("Turnstile chain at %p with mutex %p.\n", tc, lock);
        if (ts == NULL) {
                (*pr)("=> No active turnstile for this lock.\n");
                return;
diff -r f1124ce13efd -r 1a20df986e4c sys/sys/sleepq.h
--- a/sys/sys/sleepq.h  Thu Nov 21 18:46:40 2019 +0000
+++ b/sys/sys/sleepq.h  Thu Nov 21 18:56:55 2019 +0000
@@ -1,7 +1,7 @@



Home | Main Index | Thread Index | Old Index