Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch x86: Expand on comments on ordering around stores t...
details: https://anonhg.NetBSD.org/src/rev/76744682b320
branches: trunk
changeset: 373794:76744682b320
user: riastradh <riastradh%NetBSD.org@localhost>
date: Wed Mar 01 08:38:50 2023 +0000
description:
x86: Expand on comments on ordering around stores to ci_curlwp.
No functional change intended.
PR kern/57240
diffstat:
sys/arch/amd64/amd64/locore.S | 30 +++++++++++++++++++++++++--
sys/arch/amd64/amd64/spl.S | 44 +++++++++++++++++++++++++++++++++++++---
sys/arch/i386/i386/locore.S | 32 ++++++++++++++++++++++++++---
sys/arch/i386/i386/spl.S | 46 ++++++++++++++++++++++++++++++++++++++----
4 files changed, 136 insertions(+), 16 deletions(-)
diffs (236 lines):
diff -r ec68e520a95a -r 76744682b320 sys/arch/amd64/amd64/locore.S
--- a/sys/arch/amd64/amd64/locore.S Wed Mar 01 08:18:39 2023 +0000
+++ b/sys/arch/amd64/amd64/locore.S Wed Mar 01 08:38:50 2023 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: locore.S,v 1.216 2023/02/25 18:04:42 riastradh Exp $ */
+/* $NetBSD: locore.S,v 1.217 2023/03/01 08:38:50 riastradh Exp $ */
/*
* Copyright-o-rama!
@@ -1177,8 +1177,32 @@
movq PCB_RBP(%r14),%rbp
/*
- * Set curlwp. This must be globally visible in order to permit
- * non-interlocked mutex release.
+ * Issue XCHG, rather than MOV, to set ci_curlwp := newlwp in
+ * order to coordinate mutex_exit on this CPU with
+ * mutex_vector_enter on another CPU.
+ *
+ * 1. Any prior mutex_exit by oldlwp must be visible to other
+ * CPUs before we set ci_curlwp := newlwp on this one,
+ * requiring a store-before-store barrier.
+ *
+ * (This is always guaranteed by the x86 memory model, TSO,
+ * but other architectures require a explicit barrier before
+ * the store to ci->ci_curlwp.)
+ *
+ * 2. ci_curlwp := newlwp must be visible on all other CPUs
+ * before any subsequent mutex_exit by newlwp can even test
+ * whether there might be waiters, requiring a
+ * store-before-load barrier.
+ *
+ * (This is the only ordering x86 TSO ever requires any kind
+ * of barrier for -- in this case, we take advantage of the
+ * sequential consistency implied by XCHG to obviate the
+ * need for MFENCE or something.)
+ *
+ * See kern_mutex.c for details -- this is necessary for
+ * adaptive mutexes to detect whether the lwp is on the CPU in
+ * order to safely block without requiring atomic r/m/w in
+ * mutex_exit.
*/
movq %r12,%rcx
xchgq %rcx,CPUVAR(CURLWP)
diff -r ec68e520a95a -r 76744682b320 sys/arch/amd64/amd64/spl.S
--- a/sys/arch/amd64/amd64/spl.S Wed Mar 01 08:18:39 2023 +0000
+++ b/sys/arch/amd64/amd64/spl.S Wed Mar 01 08:38:50 2023 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: spl.S,v 1.48 2022/09/07 00:40:18 knakahara Exp $ */
+/* $NetBSD: spl.S,v 1.49 2023/03/01 08:38:50 riastradh Exp $ */
/*
* Copyright (c) 2003 Wasabi Systems, Inc.
@@ -116,6 +116,20 @@
movq IS_LWP(%rax),%rdi /* switch to handler LWP */
movq L_PCB(%rdi),%rdx
movq L_PCB(%r15),%rcx
+ /*
+ * Simple MOV to set curlwp to softlwp. See below on ordering
+ * required to restore softlwp like cpu_switchto.
+ *
+ * 1. Don't need store-before-store barrier because x86 is TSO.
+ *
+ * 2. Don't need store-before-load barrier because when we
+ * enter a softint lwp, it can't be holding any mutexes, so
+ * it can't release any until after it has acquired them, so
+ * we need not participate in the protocol with
+ * mutex_vector_enter barriers here.
+ *
+ * Hence no need for XCHG or barriers around MOV.
+ */
movq %rdi,CPUVAR(CURLWP)
#ifdef KASAN
@@ -158,9 +172,31 @@
movq PCB_RSP(%rcx),%rsp
/*
- * for non-interlocked mutex release to work safely the change
- * to ci_curlwp must not languish in the store buffer. therefore
- * we use XCHG and not MOV here. see kern_mutex.c.
+ * Use XCHG, not MOV, to coordinate mutex_exit on this CPU with
+ * mutex_vector_enter on another CPU.
+ *
+ * 1. Any prior mutex_exit by the softint must be visible to
+ * other CPUs before we restore curlwp on this one,
+ * requiring store-before-store ordering.
+ *
+ * (This is always guaranteed by the x86 memory model, TSO,
+ * but other architectures require a explicit barrier before
+ * the store to ci->ci_curlwp.)
+ *
+ * 2. Restoring curlwp must be visible on all other CPUs before
+ * any subsequent mutex_exit on this one can even test
+ * whether there might be waiters, requiring
+ * store-before-load ordering.
+ *
+ * (This is the only ordering x86 TSO ever requires any kind
+ * of barrier for -- in this case, we take advantage of the
+ * sequential consistency implied by XCHG to obviate the
+ * need for MFENCE or something.)
+ *
+ * See kern_mutex.c for details -- this is necessary for
+ * adaptive mutexes to detect whether the lwp is on the CPU in
+ * order to safely block without requiring atomic r/m/w in
+ * mutex_exit. See also cpu_switchto.
*/
xchgq %r15,CPUVAR(CURLWP) /* restore curlwp */
popq %r15 /* unwind switchframe */
diff -r ec68e520a95a -r 76744682b320 sys/arch/i386/i386/locore.S
--- a/sys/arch/i386/i386/locore.S Wed Mar 01 08:18:39 2023 +0000
+++ b/sys/arch/i386/i386/locore.S Wed Mar 01 08:38:50 2023 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: locore.S,v 1.192 2023/02/25 18:35:54 riastradh Exp $ */
+/* $NetBSD: locore.S,v 1.193 2023/03/01 08:38:50 riastradh Exp $ */
/*
* Copyright-o-rama!
@@ -128,7 +128,7 @@
*/
#include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.192 2023/02/25 18:35:54 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.193 2023/03/01 08:38:50 riastradh Exp $");
#include "opt_copy_symtab.h"
#include "opt_ddb.h"
@@ -1401,8 +1401,32 @@
movl PCB_ESP(%ebx),%esp
/*
- * Set curlwp. This must be globally visible in order to permit
- * non-interlocked mutex release.
+ * Issue XCHG, rather than MOV, to set ci_curlwp := newlwp in
+ * order to coordinate mutex_exit on this CPU with
+ * mutex_vector_enter on another CPU.
+ *
+ * 1. Any prior mutex_exit by oldlwp must be visible to other
+ * CPUs before we set ci_curlwp := newlwp on this one,
+ * requiring a store-before-store barrier.
+ *
+ * (This is always guaranteed by the x86 memory model, TSO,
+ * but other architectures require a explicit barrier before
+ * the store to ci->ci_curlwp.)
+ *
+ * 2. ci_curlwp := newlwp must be visible on all other CPUs
+ * before any subsequent mutex_exit by newlwp can even test
+ * whether there might be waiters, requiring a
+ * store-before-load barrier.
+ *
+ * (This is the only ordering x86 TSO ever requires any kind
+ * of barrier for -- in this case, we take advantage of the
+ * sequential consistency implied by XCHG to obviate the
+ * need for MFENCE or something.)
+ *
+ * See kern_mutex.c for details -- this is necessary for
+ * adaptive mutexes to detect whether the lwp is on the CPU in
+ * order to safely block without requiring atomic r/m/w in
+ * mutex_exit.
*/
movl %edi,%ecx
xchgl %ecx,CPUVAR(CURLWP)
diff -r ec68e520a95a -r 76744682b320 sys/arch/i386/i386/spl.S
--- a/sys/arch/i386/i386/spl.S Wed Mar 01 08:18:39 2023 +0000
+++ b/sys/arch/i386/i386/spl.S Wed Mar 01 08:38:50 2023 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: spl.S,v 1.57 2022/09/08 06:57:44 knakahara Exp $ */
+/* $NetBSD: spl.S,v 1.58 2023/03/01 08:38:50 riastradh Exp $ */
/*
* Copyright (c) 1998, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
*/
#include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: spl.S,v 1.57 2022/09/08 06:57:44 knakahara Exp $");
+__KERNEL_RCSID(0, "$NetBSD: spl.S,v 1.58 2023/03/01 08:38:50 riastradh Exp $");
#include "opt_ddb.h"
#include "opt_spldebug.h"
@@ -384,6 +384,20 @@
STI(%esi)
movl CPUVAR(CURLWP),%esi
movl IS_LWP(%eax),%edi /* switch to handler LWP */
+ /*
+ * Simple MOV to set curlwp to softlwp. See below on ordering
+ * required to restore softlwp like cpu_switchto.
+ *
+ * 1. Don't need store-before-store barrier because x86 is TSO.
+ *
+ * 2. Don't need store-before-load barrier because when we
+ * enter a softint lwp, it can't be holding any mutexes, so
+ * it can't release any until after it has acquired them, so
+ * we need not participate in the protocol with
+ * mutex_vector_enter barriers here.
+ *
+ * Hence no need for XCHG or barriers around MOV.
+ */
movl %edi,CPUVAR(CURLWP)
movl L_PCB(%edi),%edx
movl L_PCB(%esi),%ecx
@@ -399,9 +413,31 @@
movl PCB_ESP(%ecx),%esp
/*
- * for non-interlocked mutex release to work safely the change
- * to ci_curlwp must not languish in the store buffer. therefore
- * we use XCHG and not MOV here. see kern_mutex.c.
+ * Use XCHG, not MOV, to coordinate mutex_exit on this CPU with
+ * mutex_vector_enter on another CPU.
+ *
+ * 1. Any prior mutex_exit by the softint must be visible to
+ * other CPUs before we restore curlwp on this one,
+ * requiring store-before-store ordering.
+ *
+ * (This is always guaranteed by the x86 memory model, TSO,
+ * but other architectures require a explicit barrier before
+ * the store to ci->ci_curlwp.)
+ *
+ * 2. Restoring curlwp must be visible on all other CPUs before
+ * any subsequent mutex_exit on this one can even test
+ * whether there might be waiters, requiring
+ * store-before-load ordering.
+ *
+ * (This is the only ordering x86 TSO ever requires any kind
+ * of barrier for -- in this case, we take advantage of the
+ * sequential consistency implied by XCHG to obviate the
+ * need for MFENCE or something.)
+ *
+ * See kern_mutex.c for details -- this is necessary for
+ * adaptive mutexes to detect whether the lwp is on the CPU in
+ * order to safely block without requiring atomic r/m/w in
+ * mutex_exit. See also cpu_switchto.
*/
xchgl %esi,CPUVAR(CURLWP) /* restore ci_curlwp */
popl %edi /* unwind switchframe */
Home |
Main Index |
Thread Index |
Old Index