Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch x86: Expand on comments on ordering around stores t...



details:   https://anonhg.NetBSD.org/src/rev/76744682b320
branches:  trunk
changeset: 373794:76744682b320
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Wed Mar 01 08:38:50 2023 +0000

description:
x86: Expand on comments on ordering around stores to ci_curlwp.

No functional change intended.

PR kern/57240

diffstat:

 sys/arch/amd64/amd64/locore.S |  30 +++++++++++++++++++++++++--
 sys/arch/amd64/amd64/spl.S    |  44 +++++++++++++++++++++++++++++++++++++---
 sys/arch/i386/i386/locore.S   |  32 ++++++++++++++++++++++++++---
 sys/arch/i386/i386/spl.S      |  46 ++++++++++++++++++++++++++++++++++++++----
 4 files changed, 136 insertions(+), 16 deletions(-)

diffs (236 lines):

diff -r ec68e520a95a -r 76744682b320 sys/arch/amd64/amd64/locore.S
--- a/sys/arch/amd64/amd64/locore.S     Wed Mar 01 08:18:39 2023 +0000
+++ b/sys/arch/amd64/amd64/locore.S     Wed Mar 01 08:38:50 2023 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: locore.S,v 1.216 2023/02/25 18:04:42 riastradh Exp $   */
+/*     $NetBSD: locore.S,v 1.217 2023/03/01 08:38:50 riastradh Exp $   */
 
 /*
  * Copyright-o-rama!
@@ -1177,8 +1177,32 @@
        movq    PCB_RBP(%r14),%rbp
 
        /*
-        * Set curlwp.  This must be globally visible in order to permit
-        * non-interlocked mutex release.
+        * Issue XCHG, rather than MOV, to set ci_curlwp := newlwp in
+        * order to coordinate mutex_exit on this CPU with
+        * mutex_vector_enter on another CPU.
+        *
+        * 1. Any prior mutex_exit by oldlwp must be visible to other
+        *    CPUs before we set ci_curlwp := newlwp on this one,
+        *    requiring a store-before-store barrier.
+        *
+        *    (This is always guaranteed by the x86 memory model, TSO,
+        *    but other architectures require a explicit barrier before
+        *    the store to ci->ci_curlwp.)
+        *
+        * 2. ci_curlwp := newlwp must be visible on all other CPUs
+        *    before any subsequent mutex_exit by newlwp can even test
+        *    whether there might be waiters, requiring a
+        *    store-before-load barrier.
+        *
+        *    (This is the only ordering x86 TSO ever requires any kind
+        *    of barrier for -- in this case, we take advantage of the
+        *    sequential consistency implied by XCHG to obviate the
+        *    need for MFENCE or something.)
+        *
+        * See kern_mutex.c for details -- this is necessary for
+        * adaptive mutexes to detect whether the lwp is on the CPU in
+        * order to safely block without requiring atomic r/m/w in
+        * mutex_exit.
         */
        movq    %r12,%rcx
        xchgq   %rcx,CPUVAR(CURLWP)
diff -r ec68e520a95a -r 76744682b320 sys/arch/amd64/amd64/spl.S
--- a/sys/arch/amd64/amd64/spl.S        Wed Mar 01 08:18:39 2023 +0000
+++ b/sys/arch/amd64/amd64/spl.S        Wed Mar 01 08:38:50 2023 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: spl.S,v 1.48 2022/09/07 00:40:18 knakahara Exp $       */
+/*     $NetBSD: spl.S,v 1.49 2023/03/01 08:38:50 riastradh Exp $       */
 
 /*
  * Copyright (c) 2003 Wasabi Systems, Inc.
@@ -116,6 +116,20 @@
        movq    IS_LWP(%rax),%rdi       /* switch to handler LWP */
        movq    L_PCB(%rdi),%rdx
        movq    L_PCB(%r15),%rcx
+       /*
+        * Simple MOV to set curlwp to softlwp.  See below on ordering
+        * required to restore softlwp like cpu_switchto.
+        *
+        * 1. Don't need store-before-store barrier because x86 is TSO.
+        *
+        * 2. Don't need store-before-load barrier because when we
+        *    enter a softint lwp, it can't be holding any mutexes, so
+        *    it can't release any until after it has acquired them, so
+        *    we need not participate in the protocol with
+        *    mutex_vector_enter barriers here.
+        *
+        * Hence no need for XCHG or barriers around MOV.
+        */
        movq    %rdi,CPUVAR(CURLWP)
 
 #ifdef KASAN
@@ -158,9 +172,31 @@
        movq    PCB_RSP(%rcx),%rsp
 
        /*
-        * for non-interlocked mutex release to work safely the change
-        * to ci_curlwp must not languish in the store buffer. therefore
-        * we use XCHG and not MOV here.  see kern_mutex.c.
+        * Use XCHG, not MOV, to coordinate mutex_exit on this CPU with
+        * mutex_vector_enter on another CPU.
+        *
+        * 1. Any prior mutex_exit by the softint must be visible to
+        *    other CPUs before we restore curlwp on this one,
+        *    requiring store-before-store ordering.
+        *
+        *    (This is always guaranteed by the x86 memory model, TSO,
+        *    but other architectures require a explicit barrier before
+        *    the store to ci->ci_curlwp.)
+        *
+        * 2. Restoring curlwp must be visible on all other CPUs before
+        *    any subsequent mutex_exit on this one can even test
+        *    whether there might be waiters, requiring
+        *    store-before-load ordering.
+        *
+        *    (This is the only ordering x86 TSO ever requires any kind
+        *    of barrier for -- in this case, we take advantage of the
+        *    sequential consistency implied by XCHG to obviate the
+        *    need for MFENCE or something.)
+        *
+        * See kern_mutex.c for details -- this is necessary for
+        * adaptive mutexes to detect whether the lwp is on the CPU in
+        * order to safely block without requiring atomic r/m/w in
+        * mutex_exit.  See also cpu_switchto.
         */
        xchgq   %r15,CPUVAR(CURLWP)     /* restore curlwp */
        popq    %r15                    /* unwind switchframe */
diff -r ec68e520a95a -r 76744682b320 sys/arch/i386/i386/locore.S
--- a/sys/arch/i386/i386/locore.S       Wed Mar 01 08:18:39 2023 +0000
+++ b/sys/arch/i386/i386/locore.S       Wed Mar 01 08:38:50 2023 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: locore.S,v 1.192 2023/02/25 18:35:54 riastradh Exp $   */
+/*     $NetBSD: locore.S,v 1.193 2023/03/01 08:38:50 riastradh Exp $   */
 
 /*
  * Copyright-o-rama!
@@ -128,7 +128,7 @@
  */
 
 #include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.192 2023/02/25 18:35:54 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.193 2023/03/01 08:38:50 riastradh Exp $");
 
 #include "opt_copy_symtab.h"
 #include "opt_ddb.h"
@@ -1401,8 +1401,32 @@
        movl    PCB_ESP(%ebx),%esp
 
        /*
-        * Set curlwp.  This must be globally visible in order to permit
-        * non-interlocked mutex release.
+        * Issue XCHG, rather than MOV, to set ci_curlwp := newlwp in
+        * order to coordinate mutex_exit on this CPU with
+        * mutex_vector_enter on another CPU.
+        *
+        * 1. Any prior mutex_exit by oldlwp must be visible to other
+        *    CPUs before we set ci_curlwp := newlwp on this one,
+        *    requiring a store-before-store barrier.
+        *
+        *    (This is always guaranteed by the x86 memory model, TSO,
+        *    but other architectures require a explicit barrier before
+        *    the store to ci->ci_curlwp.)
+        *
+        * 2. ci_curlwp := newlwp must be visible on all other CPUs
+        *    before any subsequent mutex_exit by newlwp can even test
+        *    whether there might be waiters, requiring a
+        *    store-before-load barrier.
+        *
+        *    (This is the only ordering x86 TSO ever requires any kind
+        *    of barrier for -- in this case, we take advantage of the
+        *    sequential consistency implied by XCHG to obviate the
+        *    need for MFENCE or something.)
+        *
+        * See kern_mutex.c for details -- this is necessary for
+        * adaptive mutexes to detect whether the lwp is on the CPU in
+        * order to safely block without requiring atomic r/m/w in
+        * mutex_exit.
         */
        movl    %edi,%ecx
        xchgl   %ecx,CPUVAR(CURLWP)
diff -r ec68e520a95a -r 76744682b320 sys/arch/i386/i386/spl.S
--- a/sys/arch/i386/i386/spl.S  Wed Mar 01 08:18:39 2023 +0000
+++ b/sys/arch/i386/i386/spl.S  Wed Mar 01 08:38:50 2023 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: spl.S,v 1.57 2022/09/08 06:57:44 knakahara Exp $       */
+/*     $NetBSD: spl.S,v 1.58 2023/03/01 08:38:50 riastradh Exp $       */
 
 /*
  * Copyright (c) 1998, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: spl.S,v 1.57 2022/09/08 06:57:44 knakahara Exp $");
+__KERNEL_RCSID(0, "$NetBSD: spl.S,v 1.58 2023/03/01 08:38:50 riastradh Exp $");
 
 #include "opt_ddb.h"
 #include "opt_spldebug.h"
@@ -384,6 +384,20 @@
        STI(%esi)
        movl    CPUVAR(CURLWP),%esi
        movl    IS_LWP(%eax),%edi       /* switch to handler LWP */
+       /*
+        * Simple MOV to set curlwp to softlwp.  See below on ordering
+        * required to restore softlwp like cpu_switchto.
+        *
+        * 1. Don't need store-before-store barrier because x86 is TSO.
+        *
+        * 2. Don't need store-before-load barrier because when we
+        *    enter a softint lwp, it can't be holding any mutexes, so
+        *    it can't release any until after it has acquired them, so
+        *    we need not participate in the protocol with
+        *    mutex_vector_enter barriers here.
+        *
+        * Hence no need for XCHG or barriers around MOV.
+        */
        movl    %edi,CPUVAR(CURLWP)
        movl    L_PCB(%edi),%edx
        movl    L_PCB(%esi),%ecx
@@ -399,9 +413,31 @@
        movl    PCB_ESP(%ecx),%esp
 
        /*
-        * for non-interlocked mutex release to work safely the change
-        * to ci_curlwp must not languish in the store buffer. therefore
-        * we use XCHG and not MOV here.  see kern_mutex.c.
+        * Use XCHG, not MOV, to coordinate mutex_exit on this CPU with
+        * mutex_vector_enter on another CPU.
+        *
+        * 1. Any prior mutex_exit by the softint must be visible to
+        *    other CPUs before we restore curlwp on this one,
+        *    requiring store-before-store ordering.
+        *
+        *    (This is always guaranteed by the x86 memory model, TSO,
+        *    but other architectures require a explicit barrier before
+        *    the store to ci->ci_curlwp.)
+        *
+        * 2. Restoring curlwp must be visible on all other CPUs before
+        *    any subsequent mutex_exit on this one can even test
+        *    whether there might be waiters, requiring
+        *    store-before-load ordering.
+        *
+        *    (This is the only ordering x86 TSO ever requires any kind
+        *    of barrier for -- in this case, we take advantage of the
+        *    sequential consistency implied by XCHG to obviate the
+        *    need for MFENCE or something.)
+        *
+        * See kern_mutex.c for details -- this is necessary for
+        * adaptive mutexes to detect whether the lwp is on the CPU in
+        * order to safely block without requiring atomic r/m/w in
+        * mutex_exit.  See also cpu_switchto.
         */
        xchgl   %esi,CPUVAR(CURLWP)     /* restore ci_curlwp */
        popl    %edi                    /* unwind switchframe */



Home | Main Index | Thread Index | Old Index