Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys - Add some more failsafes to the CPU topology stuff, and...
details: https://anonhg.NetBSD.org/src/rev/5c2bd0dfad2c
branches: trunk
changeset: 1005305:5c2bd0dfad2c
user: ad <ad%NetBSD.org@localhost>
date: Tue Dec 03 22:28:41 2019 +0000
description:
- Add some more failsafes to the CPU topology stuff, and build a 3rd
circular list of peer CPUs in other packages, so we might scroll through
them in the scheduler when looking to distribute or steal jobs.
- Fold the run queue data structure into spc_schedstate. Makes kern_runq.c
a far more pleasant place to work.
- Remove the code in sched_nextlwp() that tries to steal jobs from other
CPUs. It's not needed, because we do the very same thing in the idle LWP
anyway. Outside the VM system this was one of the the main causes of L3
cache misses I saw during builds. On my machine, this change yields a
60%-70% drop in time on the "hackbench" benchmark (there's clearly a bit
more going on here, but basically being less aggressive helps).
diffstat:
sys/kern/kern_cpu.c | 176 +++++++++++++++++++++++++++++----------
sys/kern/kern_runq.c | 225 ++++++++++++++++----------------------------------
sys/sys/cpu_data.h | 21 ++--
sys/sys/sched.h | 18 +++-
4 files changed, 229 insertions(+), 211 deletions(-)
diffs (truncated from 883 to 300 lines):
diff -r 079d0be3e57c -r 5c2bd0dfad2c sys/kern/kern_cpu.c
--- a/sys/kern/kern_cpu.c Tue Dec 03 22:22:36 2019 +0000
+++ b/sys/kern/kern_cpu.c Tue Dec 03 22:28:41 2019 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: kern_cpu.c,v 1.79 2019/12/02 23:22:43 ad Exp $ */
+/* $NetBSD: kern_cpu.c,v 1.80 2019/12/03 22:28:41 ad Exp $ */
/*-
* Copyright (c) 2007, 2008, 2009, 2010, 2012, 2019 The NetBSD Foundation, Inc.
@@ -56,7 +56,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v 1.79 2019/12/02 23:22:43 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v 1.80 2019/12/03 22:28:41 ad Exp $");
#include "opt_cpu_ucode.h"
@@ -595,35 +595,118 @@
void
cpu_topology_set(struct cpu_info *ci, int package_id, int core_id, int smt_id)
{
+ enum cpu_rel rel;
cpu_topology_present = true;
ci->ci_package_id = package_id;
ci->ci_core_id = core_id;
ci->ci_smt_id = smt_id;
- ci->ci_package_cpus = ci;
- ci->ci_npackage_cpus = 1;
- ci->ci_core_cpus = ci;
- ci->ci_ncore_cpus = 1;
+ for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
+ ci->ci_sibling[rel] = ci;
+ ci->ci_nsibling[rel] = 1;
+ }
+}
+
+/*
+ * Link a CPU into the given circular list.
+ */
+static void
+cpu_topology_link(struct cpu_info *ci, struct cpu_info *ci2, enum cpu_rel rel)
+{
+ struct cpu_info *ci3;
+
+ /* Walk to the end of the existing circular list and append. */
+ for (ci3 = ci2;; ci3 = ci3->ci_sibling[rel]) {
+ ci3->ci_nsibling[rel]++;
+ if (ci3->ci_sibling[rel] == ci2) {
+ break;
+ }
+ }
+ ci->ci_sibling[rel] = ci2;
+ ci3->ci_sibling[rel] = ci;
+ ci->ci_nsibling[rel] = ci3->ci_nsibling[rel];
+}
+
+/*
+ * Find peer CPUs in other packages.
+ */
+static void
+cpu_topology_peers(void)
+{
+ CPU_INFO_ITERATOR cii, cii2;
+ struct cpu_info *ci, *ci2;
+
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ if (ci->ci_nsibling[CPUREL_PEER] > 1) {
+ /* Already linked. */
+ continue;
+ }
+ for (CPU_INFO_FOREACH(cii2, ci2)) {
+ if (ci != ci2 &&
+ ci->ci_package_id != ci2->ci_package_id &&
+ ci->ci_core_id == ci2->ci_core_id &&
+ ci->ci_smt_id == ci2->ci_smt_id) {
+ cpu_topology_link(ci, ci2, CPUREL_PEER);
+ break;
+ }
+ }
+ }
+}
+
+/*
+ * Print out the toplogy lists.
+ */
+static void
+cpu_topology_print(void)
+{
+#ifdef DEBUG
+ CPU_INFO_ITERATOR cii;
+ struct cpu_info *ci, *ci2;
+ const char *names[] = { "core", "package", "peer" };
+ enum cpu_rel rel;
+ int i;
+
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
+ printf("%s has %dx %s siblings: ", cpu_name(ci),
+ ci->ci_nsibling[rel], names[rel]);
+ ci2 = ci->ci_sibling[rel];
+ i = 0;
+ do {
+ printf(" %s", cpu_name(ci2));
+ ci2 = ci2->ci_sibling[rel];
+ } while (++i < 64 && ci2 != ci->ci_sibling[rel]);
+ if (i == 64) {
+ printf(" GAVE UP");
+ }
+ printf("\n");
+ }
+ }
+#endif /* DEBUG */
}
/*
* Fake up toplogy info if we have none, or if what we got was bogus.
+ * Don't override ci_package_id, etc, if cpu_topology_present is set.
+ * MD code also uses these.
*/
static void
cpu_topology_fake(void)
{
CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
+ enum cpu_rel rel;
for (CPU_INFO_FOREACH(cii, ci)) {
- ci->ci_package_id = cpu_index(ci);
- ci->ci_core_id = 0;
- ci->ci_smt_id = 0;
- ci->ci_ncore_cpus = 1;
- ci->ci_core_cpus = ci;
- ci->ci_package_cpus = ci;
- ci->ci_npackage_cpus = 1;
+ for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
+ ci->ci_sibling[rel] = ci;
+ ci->ci_nsibling[rel] = 1;
+ }
+ if (!cpu_topology_present) {
+ ci->ci_package_id = cpu_index(ci);
+ }
}
+ cpu_topology_print();
}
/*
@@ -634,20 +717,16 @@
cpu_topology_init(void)
{
CPU_INFO_ITERATOR cii, cii2;
- struct cpu_info *ci, *ci2, *ci3;
+ struct cpu_info *ci, *ci2;
+ int ncore, npackage, npeer;
+ bool symmetric;
if (!cpu_topology_present) {
cpu_topology_fake();
return;
}
- for (CPU_INFO_FOREACH(cii, ci)) {
- ci->ci_ncore_cpus = 1;
- ci->ci_core_cpus = ci;
- ci->ci_package_cpus = ci;
- ci->ci_npackage_cpus = 1;
- }
-
+ /* Find siblings in same core and package. */
for (CPU_INFO_FOREACH(cii, ci)) {
for (CPU_INFO_FOREACH(cii2, ci2)) {
/* Avoid bad things happening. */
@@ -664,39 +743,42 @@
ci2->ci_package_id != ci->ci_package_id) {
continue;
}
- /*
- * Find CPUs in the same core. Walk to the end of
- * the existing circular list and append.
- */
- if (ci->ci_ncore_cpus == 1 &&
+ /* Find CPUs in the same core. */
+ if (ci->ci_nsibling[CPUREL_CORE] == 1 &&
ci->ci_core_id == ci2->ci_core_id) {
- for (ci3 = ci2;; ci3 = ci3->ci_core_cpus) {
- ci3->ci_ncore_cpus++;
- if (ci3->ci_core_cpus == ci2) {
- break;
- }
- }
- ci->ci_core_cpus = ci2;
- ci3->ci_core_cpus = ci;
- ci->ci_ncore_cpus = ci3->ci_ncore_cpus;
+ cpu_topology_link(ci, ci2, CPUREL_CORE);
}
- /* Same, but for package. */
- if (ci->ci_npackage_cpus == 1) {
- for (ci3 = ci2;; ci3 = ci3->ci_package_cpus) {
- ci3->ci_npackage_cpus++;
- if (ci3->ci_package_cpus == ci2) {
- break;
- }
- }
- ci->ci_package_cpus = ci2;
- ci3->ci_package_cpus = ci;
- ci->ci_npackage_cpus = ci3->ci_npackage_cpus;
+ /* Find CPUs in the same package. */
+ if (ci->ci_nsibling[CPUREL_PACKAGE] == 1) {
+ cpu_topology_link(ci, ci2, CPUREL_PACKAGE);
}
- if (ci->ci_ncore_cpus > 1 && ci->ci_npackage_cpus > 1) {
+ if (ci->ci_nsibling[CPUREL_CORE] > 1 &&
+ ci->ci_nsibling[CPUREL_PACKAGE] > 1) {
break;
}
}
}
+
+ /* Find peers in other packages. */
+ cpu_topology_peers();
+
+ /* Determine whether the topology is bogus/symmetric. */
+ npackage = curcpu()->ci_nsibling[CPUREL_PACKAGE];
+ ncore = curcpu()->ci_nsibling[CPUREL_CORE];
+ npeer = curcpu()->ci_nsibling[CPUREL_PEER];
+ symmetric = true;
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ if (npackage != ci->ci_nsibling[CPUREL_PACKAGE] ||
+ ncore != ci->ci_nsibling[CPUREL_CORE] ||
+ npeer != ci->ci_nsibling[CPUREL_PEER]) {
+ symmetric = false;
+ }
+ }
+ cpu_topology_print();
+ if (symmetric == false) {
+ printf("cpu_topology_init: not symmetric, faking it\n");
+ cpu_topology_fake();
+ }
}
#ifdef CPU_UCODE
diff -r 079d0be3e57c -r 5c2bd0dfad2c sys/kern/kern_runq.c
--- a/sys/kern/kern_runq.c Tue Dec 03 22:22:36 2019 +0000
+++ b/sys/kern/kern_runq.c Tue Dec 03 22:28:41 2019 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: kern_runq.c,v 1.52 2019/12/01 15:34:46 ad Exp $ */
+/* $NetBSD: kern_runq.c,v 1.53 2019/12/03 22:28:41 ad Exp $ */
/*-
* Copyright (c) 2019 The NetBSD Foundation, Inc.
@@ -56,7 +56,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.52 2019/12/01 15:34:46 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.53 2019/12/03 22:28:41 ad Exp $");
#include "opt_dtrace.h"
@@ -79,15 +79,6 @@
#include <sys/atomic.h>
/*
- * Priority related definitions.
- */
-#define PRI_TS_COUNT (NPRI_USER)
-#define PRI_RT_COUNT (PRI_COUNT - PRI_TS_COUNT)
-#define PRI_HTS_RANGE (PRI_TS_COUNT / 10)
-
-#define PRI_HIGHEST_TS (MAXPRI_USER)
-
-/*
* Bits per map.
*/
#define BITMAP_BITS (32)
@@ -95,34 +86,9 @@
#define BITMAP_MSB (0x80000000U)
#define BITMAP_MASK (BITMAP_BITS - 1)
-/*
- * Structures, runqueue.
- */
-
const int schedppq = 1;
-typedef struct {
- TAILQ_HEAD(, lwp) q_head;
-} queue_t;
-
-typedef struct {
- /* Bitmap */
- uint32_t r_bitmap[PRI_COUNT >> BITMAP_SHIFT];
- /* Counters */
- u_int r_count; /* Count of the threads */
- u_int r_avgcount; /* Average count of threads (* 256) */
- u_int r_mcount; /* Count of migratable threads */
- /* Runqueues */
- queue_t r_rt_queue[PRI_RT_COUNT];
- queue_t r_ts_queue[PRI_TS_COUNT];
Home |
Main Index |
Thread Index |
Old Index