[src/trunk]: src/sys/kern - sched_idle(): if a migration took place, rather t...

To: source-changes-hg%NetBSD.org@localhost
Subject: [src/trunk]: src/sys/kern - sched_idle(): if a migration took place, rather t...
From: ad <ad%NetBSD.org@localhost>
Date: Sat, 01 May 2021 03:53:49 +0000

details:   https://anonhg.NetBSD.org/src/rev/2718340c1404
branches:  trunk
changeset: 970830:2718340c1404
user:      ad <ad%NetBSD.org@localhost>
date:      Sat Apr 04 20:17:58 2020 +0000

description:
- sched_idle(): if a migration took place, rather than going idle briefly
  to avoid stealing back the LWP, remember the target CPU and avoid it.

- sched_preempted(): only choose a totally idle CPU; in the interests of
  overall throughput it's better to make use of SMT / slow CPUs.

diffstat:

 sys/kern/kern_runq.c |  54 +++++++++++++++++++++++++++++----------------------
 1 files changed, 31 insertions(+), 23 deletions(-)

diffs (167 lines):

diff -r ddd65bbe236b -r 2718340c1404 sys/kern/kern_runq.c
--- a/sys/kern/kern_runq.c      Sat Apr 04 19:50:53 2020 +0000
+++ b/sys/kern/kern_runq.c      Sat Apr 04 20:17:58 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: kern_runq.c,v 1.64 2020/03/26 19:25:07 ad Exp $        */
+/*     $NetBSD: kern_runq.c,v 1.65 2020/04/04 20:17:58 ad Exp $        */
 
 /*-
  * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc.
@@ -56,7 +56,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.64 2020/03/26 19:25:07 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.65 2020/04/04 20:17:58 ad Exp $");
 
 #include "opt_dtrace.h"
 
@@ -340,17 +340,17 @@
        }
 
        /*
-        * Things start as soon as we touch ci_want_resched: x86 for example
-        * has an instruction that monitors the memory cell it's in.  We
-        * want to drop the schedstate lock in advance, otherwise the remote
-        * CPU can awaken and immediately block on the lock.
+        * Things can start as soon as ci_want_resched is touched: x86 has
+        * an instruction that monitors the memory cell it's in.  Drop the
+        * schedstate lock in advance, otherwise the remote CPU can awaken
+        * and immediately block on the lock.
         */
        if (__predict_true(unlock)) {
                spc_unlock(ci);
        }
 
        /*
-        * The caller will always have a second scheduler lock held: either
+        * The caller almost always has a second scheduler lock held: either
         * the running LWP lock (spc_lwplock), or a sleep queue lock.  That
         * keeps preemption disabled, which among other things ensures all
         * LWPs involved won't be freed while we're here (see lwp_dtor()).
@@ -361,8 +361,10 @@
                n = atomic_cas_uint(&ci->ci_want_resched, o, o | f);
                if (__predict_true(o == n)) {
                        /*
-                        * We're the first.  If we're in process context on
-                        * the same CPU, we can avoid the visit to trap().
+                        * We're the first to set a resched on the CPU.  Try
+                        * to avoid causing a needless trip through trap()
+                        * to handle an AST fault, if it's known the LWP
+                        * will either block or go through userret() soon.
                         */
                        if (l != curlwp || cpu_intr_p()) {
                                cpu_need_resched(ci, l, f);
@@ -680,9 +682,10 @@
 }
 
 /*
- * Called from sched_idle() to handle migration.
+ * Called from sched_idle() to handle migration.  Return the CPU that we
+ * pushed the LWP to (may be NULL).
  */
-static void
+static struct cpu_info *
 sched_idle_migrate(void)
 {
        struct cpu_info *ci = curcpu(), *tci = NULL;
@@ -748,13 +751,14 @@
                sched_resched_lwp(l, true);
                /* tci now unlocked */
                spc_unlock(ci);
-               return;
+               return tci;
        }
        if (dlock == true) {
                KASSERT(tci != NULL);
                spc_unlock(tci);
        }
        spc_unlock(ci);
+       return NULL;
 }
 
 /*
@@ -785,21 +789,22 @@
 void
 sched_idle(void)
 {
-       struct cpu_info *ci = curcpu(), *inner, *outer, *first, *tci = NULL;
+       struct cpu_info *ci, *inner, *outer, *first, *tci, *mci;
        struct schedstate_percpu *spc, *tspc;
        struct lwp *l;
 
+       ci = curcpu();
        spc = &ci->ci_schedstate;
+       tci = NULL;
+       mci = NULL;
 
        /*
         * Handle LWP migrations off this CPU to another.  If there a is
-        * migration to do then go idle afterwards (we'll wake again soon),
-        * as we don't want to instantly steal back the LWP we just moved
-        * out.
+        * migration to do then remember the CPU the LWP was sent to, and
+        * don't steal the LWP back from that CPU below.
         */
        if (spc->spc_migrating != NULL) {
-               sched_idle_migrate();
-               return;
+               mci = sched_idle_migrate();
        }
 
        /* If this CPU is offline, or we have an LWP to run, we're done. */
@@ -812,7 +817,7 @@
                /* Try to help our siblings out. */
                tci = ci->ci_sibling[CPUREL_CORE];
                while (tci != ci) {
-                       if (sched_steal(ci, tci)) {
+                       if (tci != mci && sched_steal(ci, tci)) {
                                return;
                        }
                        tci = tci->ci_sibling[CPUREL_CORE];
@@ -849,7 +854,8 @@
                do {
                        /* Don't hit the locks unless needed. */
                        tspc = &inner->ci_schedstate;
-                       if (ci == inner || spc->spc_psid != tspc->spc_psid ||
+                       if (ci == inner || ci == mci ||
+                           spc->spc_psid != tspc->spc_psid ||
                            tspc->spc_mcount < min_catch) {
                                continue;
                        }
@@ -874,6 +880,7 @@
 void
 sched_preempted(struct lwp *l)
 {
+       const int flags = SPCF_IDLE | SPCF_1STCLASS;
        struct schedstate_percpu *tspc;
        struct cpu_info *ci, *tci;
 
@@ -903,7 +910,6 @@
         */
        tci = ci->ci_sibling[CPUREL_CORE];
        while (tci != ci) {
-               const int flags = SPCF_IDLE | SPCF_1STCLASS;
                tspc = &tci->ci_schedstate;
                if ((tspc->spc_flags & flags) == flags &&
                    sched_migratable(l, tci)) {
@@ -928,7 +934,9 @@
        } else {
                /*
                 * Try to find a better CPU to take it, but don't move to
-                * another 2nd class CPU; there's not much point.
+                * another 2nd class CPU, and don't move to a non-idle CPU,
+                * because that would prevent SMT being used to maximise
+                * throughput.
                 *
                 * Search in the current CPU package in order to try and
                 * keep L2/L3 cache locality, but expand to include the
@@ -936,7 +944,7 @@
                 */
                tci = sched_bestcpu(l, l->l_cpu);
                if (tci != ci &&
-                   (tci->ci_schedstate.spc_flags & SPCF_1STCLASS) != 0) {
+                   (tci->ci_schedstate.spc_flags & flags) == flags) {
                        l->l_target_cpu = tci;
                }
        }

Prev by Date: [src/trunk]: src Add support for lazily generating a "global thread ID" for a...
Next by Date: [src/trunk]: src/sys/sys NetBSD 9.99.53 -- struct lwp changed.
Previous by Thread: [src/trunk]: src Add support for lazily generating a "global thread ID" for a...
Next by Thread: [src/trunk]: src/sys/sys NetBSD 9.99.53 -- struct lwp changed.
Indexes:

Home | Main Index | Thread Index | Old Index