Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/aarch64 Part II of ad's aarch64 performance improve...
details: https://anonhg.NetBSD.org/src/rev/640b7a5ea62e
branches: trunk
changeset: 937228:640b7a5ea62e
user: skrll <skrll%NetBSD.org@localhost>
date: Wed Aug 12 13:19:35 2020 +0000
description:
Part II of ad's aarch64 performance improvements (cpu_switch.S bugs are
all mine)
- Use tpidr_el1 to hold curlwp and not curcpu, because curlwp is accessed
much more often by MI code. It also makes curlwp preemption safe and
allows aarch64_curlwp() to be a const function (curcpu must be volatile).
- Make ASTs operate per-LWP rather than per-CPU, otherwise sometimes LWPs
can see spurious ASTs (which doesn't cause a problem, it just means some
time may be wasted).
- Use plain stores to set/clear ASTs. Make sure ASTs are always set on the
same CPU as the target LWP, and delivered via IPI if posted from a remote
CPU so that they are resolved quickly.
- Add some cache line padding to struct cpu_info, to match x86.
- Add a memory barrier in a couple of places where ci_curlwp is set. This
is needed whenever an LWP that is resuming on the CPU could hold an
adaptive mutex. The barrier needs to drain the CPU's store buffer, so
that the update to ci_curlwp becomes globally visible before the LWP can
resume and call mutex_exit(). By my reading of the ARM docs it looks like
the instruction I used will do the right thing, but I'm not 100% sure.
diffstat:
sys/arch/aarch64/aarch64/copyinout.S | 7 +--
sys/arch/aarch64/aarch64/cpu_machdep.c | 23 ++++++++++++--
sys/arch/aarch64/aarch64/cpuswitch.S | 52 ++++++++++++++++++--------------
sys/arch/aarch64/aarch64/db_machdep.c | 6 +--
sys/arch/aarch64/aarch64/fusu.S | 7 +--
sys/arch/aarch64/aarch64/genassym.cf | 4 +-
sys/arch/aarch64/aarch64/idle_machdep.S | 10 +++--
sys/arch/aarch64/aarch64/locore.S | 25 ++++++++-------
sys/arch/aarch64/aarch64/vectors.S | 5 +-
sys/arch/aarch64/include/cpu.h | 48 +++++++++++++++++++++--------
sys/arch/aarch64/include/proc.h | 3 +-
11 files changed, 115 insertions(+), 75 deletions(-)
diffs (truncated from 545 to 300 lines):
diff -r 4f77423b78b4 -r 640b7a5ea62e sys/arch/aarch64/aarch64/copyinout.S
--- a/sys/arch/aarch64/aarch64/copyinout.S Wed Aug 12 12:59:57 2020 +0000
+++ b/sys/arch/aarch64/aarch64/copyinout.S Wed Aug 12 13:19:35 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: copyinout.S,v 1.14 2020/08/06 06:49:55 ryo Exp $ */
+/* $NetBSD: copyinout.S,v 1.15 2020/08/12 13:19:35 skrll Exp $ */
/*-
* Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -33,7 +33,7 @@
#include <aarch64/asm.h>
#include "assym.h"
-RCSID("$NetBSD: copyinout.S,v 1.14 2020/08/06 06:49:55 ryo Exp $");
+RCSID("$NetBSD: copyinout.S,v 1.15 2020/08/12 13:19:35 skrll Exp $");
#ifdef ARMV81_PAN
#define PAN_ENABLE \
@@ -80,8 +80,7 @@
.macro exit_cpu_onfault
/* curlwp->l_md.md_onfault = NULL */
- mrs x0, tpidr_el1 /* curcpu */
- ldr x0, [x0, #CI_CURLWP] /* x0 = curlwp */
+ mrs x0, tpidr_el1 /* x0 = curlwp */
str xzr, [x0, #L_MD_ONFAULT] /* lwp->l_md_onfault = NULL */
9:
PAN_ENABLE /* enable PAN */
diff -r 4f77423b78b4 -r 640b7a5ea62e sys/arch/aarch64/aarch64/cpu_machdep.c
--- a/sys/arch/aarch64/aarch64/cpu_machdep.c Wed Aug 12 12:59:57 2020 +0000
+++ b/sys/arch/aarch64/aarch64/cpu_machdep.c Wed Aug 12 13:19:35 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu_machdep.c,v 1.10 2020/05/21 05:41:40 ryo Exp $ */
+/* $NetBSD: cpu_machdep.c,v 1.11 2020/08/12 13:19:35 skrll Exp $ */
/*-
* Copyright (c) 2014, 2019 The NetBSD Foundation, Inc.
@@ -31,7 +31,7 @@
#include <sys/cdefs.h>
-__KERNEL_RCSID(1, "$NetBSD: cpu_machdep.c,v 1.10 2020/05/21 05:41:40 ryo Exp $");
+__KERNEL_RCSID(1, "$NetBSD: cpu_machdep.c,v 1.11 2020/08/12 13:19:35 skrll Exp $");
#include "opt_multiprocessor.h"
@@ -261,7 +261,7 @@
intr_ipi_send(ci->ci_kcpuset, IPI_AST);
#endif
} else {
- setsoftast(ci); /* force call to ast() */
+ l->l_md.md_astpending = 1;
}
}
@@ -272,7 +272,22 @@
KASSERT(l->l_cpu == curcpu());
l->l_pflag |= LP_OWEUPC;
- setsoftast(l->l_cpu);
+ l->l_md.md_astpending = 1;
+}
+
+void
+cpu_signotify(struct lwp *l)
+{
+
+ KASSERT(kpreempt_disabled());
+
+ if (l->l_cpu != curcpu()) {
+#ifdef MULTIPROCESSOR
+ intr_ipi_send(l->l_cpu->ci_kcpuset, IPI_AST);
+#endif
+ } else {
+ l->l_md.md_astpending = 1;
+ }
}
#ifdef __HAVE_PREEMPTION
diff -r 4f77423b78b4 -r 640b7a5ea62e sys/arch/aarch64/aarch64/cpuswitch.S
--- a/sys/arch/aarch64/aarch64/cpuswitch.S Wed Aug 12 12:59:57 2020 +0000
+++ b/sys/arch/aarch64/aarch64/cpuswitch.S Wed Aug 12 13:19:35 2020 +0000
@@ -1,7 +1,7 @@
-/* $NetBSD: cpuswitch.S,v 1.24 2020/08/06 06:49:55 ryo Exp $ */
+/* $NetBSD: cpuswitch.S,v 1.25 2020/08/12 13:19:35 skrll Exp $ */
/*-
- * Copyright (c) 2014 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014, 2020 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -38,7 +38,7 @@
#include "opt_ddb.h"
#include "opt_kasan.h"
-RCSID("$NetBSD: cpuswitch.S,v 1.24 2020/08/06 06:49:55 ryo Exp $")
+RCSID("$NetBSD: cpuswitch.S,v 1.25 2020/08/12 13:19:35 skrll Exp $")
ARMV8_DEFINE_OPTIONS
@@ -83,11 +83,9 @@
#endif
ldr x5, [x1, #L_MD_CPACR] /* get cpacr_el1 */
- mrs x3, tpidr_el1
DISABLE_INTERRUPT
mov sp, x4 /* restore stack pointer */
msr cpacr_el1, x5 /* restore cpacr_el1 */
- str x1, [x3, #CI_CURLWP] /* switch curlwp to new lwp */
#ifdef ARMV83_PAC
/* Switch the PAC key. */
@@ -118,6 +116,10 @@
1:
#endif
+ msr tpidr_el1, x1 /* switch curlwp to new lwp */
+ ldr x3, [x1, #L_CPU]
+ str x1, [x3, #CI_CURLWP] /* switch curlwp to new lwp */
+ dmb st /* see comments in kern_mutex.c */
ENABLE_INTERRUPT
/*
@@ -161,8 +163,7 @@
stp x27, x28, [sp, #TF_X27]
stp x29, x2, [sp, #TF_X29] /* tf->lr = softint_cleanup; */
- mrs x20, tpidr_el1 /* x20 := curcpu() */
- ldr x19, [x20, #CI_CURLWP] /* x19 := curcpu()->ci_curlwp */
+ mrs x19, tpidr_el1 /* x19 := curlwp */
mov x4, sp
mrs x5, cpacr_el1
@@ -180,9 +181,13 @@
ldr x4, [x0, #L_MD_UTF]
DISABLE_INTERRUPT
+ ldr x20, [x19, #L_CPU] /* x20 := curlwp->l_cpu */
+
/* onto new stack */
sub sp, x4, #TF_SIZE /* new sp := softlwp->l_md_utf - 1 */
+ msr tpidr_el1, x0 /* curlwp = softlwp; */
str x0, [x20, #CI_CURLWP] /* curcpu()->ci_curlwp = softlwp; */
+ /* no need for memory barrier here */
mov x5, #CPACR_FPEN_NONE
msr cpacr_el1, x5 /* cpacr_el1 = CPACR_FPEN_NONE */
@@ -203,7 +208,6 @@
mov x0, x19 /* x0 := pinned_lwp */
bl _C_LABEL(softint_dispatch)
- mrs x20, tpidr_el1
ldr x6, [x19, #L_PCB] /* x6 = lwp_getpcb(curlwp) */
ldr x4, [x6, #PCB_TF] /* x4 := pinned_lwp->l_addr->pcb_tf */
#ifdef DDB
@@ -212,7 +216,11 @@
ldr x5, [x19, #L_MD_CPACR] /* x5 := pinned_lwp->l_md_cpacr */
DISABLE_INTERRUPT
- str x19, [x20, #CI_CURLWP] /* curcpu()->ci_curlwp := x19 */
+ msr tpidr_el1, x19 /* curlwp = pinned_lwp */
+ ldr x3, [x19, #L_CPU] /* x3 = curlwp->l_cpu */
+ str x19, [x3, #CI_CURLWP] /* curlwp->l_cpu->ci_curlwp := x19 */
+ dmb st /* see comments in kern_mutex.c */
+
mov sp, x4 /* restore pinned_lwp sp */
msr cpacr_el1, x5 /* restore pinned_lwp cpacr */
@@ -249,10 +257,11 @@
ENTRY_NP(softint_cleanup)
mov lr, x20 /* restore original lr */
- mrs x20, tpidr_el1 /* curcpu() */
- ldr w2, [x20, #CI_MTX_COUNT]/* ->ci_mtx_count */
+ mrs x20, tpidr_el1 /* curlwp */
+ ldr x3, [x20, #L_CPU] /* curcpu */
+ ldr w2, [x3, #CI_MTX_COUNT] /* ->ci_mtx_count */
add w2, w2, #1
- str w2, [x20, #CI_MTX_COUNT]
+ str w2, [x3, #CI_MTX_COUNT]
msr daif, x19 /* restore interrupt mask */
ldp x19, x20, [sp], #16 /* restore */
@@ -366,15 +375,13 @@
ENTRY_NP(el0_trap_exit)
DISABLE_INTERRUPT /* make sure I|F marked */
1:
- /* while (curcpu()->ci_astpending & __BIT(0)) { */
+ /* while (curlwp->l_md.md_astpending != 0) { */
mrs x8, tpidr_el1
- ldr w9, [x8, #CI_ASTPENDING]
- tbz w9, #0, 9f
+ ldr w9, [x8, #L_MD_ASTPENDING]
+ cbz w9, 9f
- /* atomic_and_uint(&curcpu()->ci_astpending, ~__BIT(0)); */
- mov w1, #~__BIT(0)
- add x0, x8, #CI_ASTPENDING
- bl _C_LABEL(atomic_and_uint);
+ /* curlwp->l_md.md_astpending = 0; */
+ str xzr, [x8, #L_MD_ASTPENDING]
/* trap_doast(tf); */
ENABLE_INTERRUPT
@@ -384,8 +391,8 @@
b 1b
/* } */
9:
- mrs x8, tpidr_el1
- ldr x9, [x8, #CI_CURLWP]
+
+ mrs x9, tpidr_el1
ldr x23, [x9, #L_MD_CPACR]
msr cpacr_el1, x23 /* FP unit EL0 handover */
isb /* necessary? */
@@ -446,8 +453,7 @@
* int cpu_set_onfault(struct faultbuf *fb)
*/
ENTRY_NP(cpu_set_onfault)
- mrs x3, tpidr_el1
- ldr x2, [x3, #CI_CURLWP] /* curlwp = curcpu()->ci_curlwp */
+ mrs x2, tpidr_el1 /* x2 = curlwp */
str x0, [x2, #L_MD_ONFAULT] /* l_md.md_onfault = fb */
stp x19, x20, [x0, #(FB_X19 * 8)]
diff -r 4f77423b78b4 -r 640b7a5ea62e sys/arch/aarch64/aarch64/db_machdep.c
--- a/sys/arch/aarch64/aarch64/db_machdep.c Wed Aug 12 12:59:57 2020 +0000
+++ b/sys/arch/aarch64/aarch64/db_machdep.c Wed Aug 12 13:19:35 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: db_machdep.c,v 1.25 2020/07/02 11:10:48 jmcneill Exp $ */
+/* $NetBSD: db_machdep.c,v 1.26 2020/08/12 13:19:35 skrll Exp $ */
/*-
* Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: db_machdep.c,v 1.25 2020/07/02 11:10:48 jmcneill Exp $");
+__KERNEL_RCSID(0, "$NetBSD: db_machdep.c,v 1.26 2020/08/12 13:19:35 skrll Exp $");
#ifdef _KERNEL_OPT
#include "opt_compat_netbsd32.h"
@@ -322,8 +322,6 @@
&ci->ci_cpl, cpuid, cpuinfobuf.ci_cpl);
db_printf("%p cpu[%lu].ci_softints = 0x%08x\n",
&ci->ci_softints, cpuid, cpuinfobuf.ci_softints);
- db_printf("%p cpu[%lu].ci_astpending = 0x%08x\n",
- &ci->ci_astpending, cpuid, cpuinfobuf.ci_astpending);
db_printf("%p cpu[%lu].ci_intr_depth = %u\n",
&ci->ci_intr_depth, cpuid, cpuinfobuf.ci_intr_depth);
db_printf("%p cpu[%lu].ci_biglock_count = %u\n",
diff -r 4f77423b78b4 -r 640b7a5ea62e sys/arch/aarch64/aarch64/fusu.S
--- a/sys/arch/aarch64/aarch64/fusu.S Wed Aug 12 12:59:57 2020 +0000
+++ b/sys/arch/aarch64/aarch64/fusu.S Wed Aug 12 13:19:35 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: fusu.S,v 1.9 2020/08/06 06:49:55 ryo Exp $ */
+/* $NetBSD: fusu.S,v 1.10 2020/08/12 13:19:35 skrll Exp $ */
/*-
* Copyright (c) 2014, 2019 The NetBSD Foundation, Inc.
@@ -32,7 +32,7 @@
#include <aarch64/asm.h>
#include "assym.h"
-RCSID("$NetBSD: fusu.S,v 1.9 2020/08/06 06:49:55 ryo Exp $");
+RCSID("$NetBSD: fusu.S,v 1.10 2020/08/12 13:19:35 skrll Exp $");
#ifdef ARMV81_PAN
#define PAN_ENABLE \
@@ -73,8 +73,7 @@
.macro exit_cpu_onfault
/* curlwp->l_md.md_onfault = NULL */
- mrs x1, tpidr_el1 /* curcpu */
- ldr x1, [x1, #CI_CURLWP] /* x1 = curlwp */
+ mrs x1, tpidr_el1 /* x1 = curlwp */
str xzr, [x1, #L_MD_ONFAULT] /* lwp->l_md_onfault = NULL */
9:
PAN_ENABLE /* enable PAN */
diff -r 4f77423b78b4 -r 640b7a5ea62e sys/arch/aarch64/aarch64/genassym.cf
--- a/sys/arch/aarch64/aarch64/genassym.cf Wed Aug 12 12:59:57 2020 +0000
+++ b/sys/arch/aarch64/aarch64/genassym.cf Wed Aug 12 13:19:35 2020 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: genassym.cf,v 1.29 2020/08/06 06:49:55 ryo Exp $
+# $NetBSD: genassym.cf,v 1.30 2020/08/12 13:19:35 skrll Exp $
#-
# Copyright (c) 2014 The NetBSD Foundation, Inc.
# All rights reserved.
@@ -154,6 +154,7 @@
define L_MD_UTF offsetof(struct lwp, l_md.md_utf)
define L_MD_CPACR offsetof(struct lwp, l_md.md_cpacr)
define L_MD_ONFAULT offsetof(struct lwp, l_md.md_onfault)
+define L_MD_ASTPENDING offsetof(struct lwp, l_md.md_astpending)
define L_MD_IA_KERN offsetof(struct lwp, l_md.md_ia_kern)
define L_MD_IA_USER offsetof(struct lwp, l_md.md_ia_user)
define L_MD_IB_USER offsetof(struct lwp, l_md.md_ib_user)
@@ -288,7 +289,6 @@
define CI_CPUID offsetof(struct cpu_info, ci_cpuid)
Home |
Main Index |
Thread Index |
Old Index