NetBSD-Bugs archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: port-xen/57199: Pure PVH i386 guests hang on disk activity
> Date: Mon, 15 Jul 2024 17:33:17 +0000
> From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
>
> 2. This is a single-(v)CPU system which has patched out the lock
> prefix in membar_sync.
> [...]
> If my hypothesis about (2) is correct, the right thing is probably
> either to make xen_mb be an assembly stub that does
>
> lock
> addq $0,-8(%rsp)
>
> (without the membar_sync hotpatching), or to make xen_mb be inline asm
> to do the same.
The attached patch implements this approach, and leaves extensive
comments explaining what's going on, without issuing any unnecessary
mfence/lfence/sfence instructions. Can you try it out?
Under my hypothesis, the domU kernel certainly needs this change. And
the dom0 kernel may also need it, because I believe it uses
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY and RING_FINAL_CHECK_FOR_REQUESTS
which rely on xen_mb too.
# HG changeset patch
# User Taylor R Campbell <riastradh%NetBSD.org@localhost>
# Date 1721067262 0
# Mon Jul 15 18:14:22 2024 +0000
# Branch trunk
# Node ID 7ecbeb4758c5d1bd73ce8492ada9c88cb4a3f4b3
# Parent b61cb6f1e4ce13fd55dc3584c62bcd23b9a9d2dd
# EXP-Topic riastradh-pr57199-xenmbuniproc
xen: Don't hotpatch away LOCK prefix in xen_mb, even on UP boots.
Both xen_mb and membar_sync are designed to provide store-before-load
ordering, but xen_mb has to provide it in synchronizing guest with
hypervisor, while membar_sync only has to provide it in synchronizing
one (guest) CPU with another (guest) CPU.
It is safe to hotpatch away the LOCK prefix in membar_sync on a
uniprocessor boot because membar_sync is only designed to coordinate
between normal memory on multiple CPUs, and is never necessary when
there's only one CPU involved.
But xen_mb is used to coordinate between the guest and the `device'
implemented by a hypervisor, which might be running on another
_physical_ CPU even if the NetBSD guest only sees one `CPU', i.e.,
one _virtual_ CPU. So even on `uniprocessor' boots, xen_mb must
still issue an instruction with store-before-load ordering on
multiprocessor systems, such as a LOCK ADD (or MFENCE, but MFENCE is
costlier for no benefit here).
No need to change xen_wmb (release ordering, load/store-before-store)
or xen_rmb (acquire ordering, load-before-load/store) because every
x86 store is a store-release and every x86 load is a load-acquire,
even on multiprocessor systems, so there's no hotpatching involved
anyway.
PR kern/57199
diff -r b61cb6f1e4ce -r 7ecbeb4758c5 common/lib/libc/arch/i386/atomic/atomic.S
--- a/common/lib/libc/arch/i386/atomic/atomic.S Mon Jul 15 01:57:23 2024 +0000
+++ b/common/lib/libc/arch/i386/atomic/atomic.S Mon Jul 15 18:14:22 2024 +0000
@@ -211,6 +211,8 @@ ENTRY(_membar_sync)
* https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/
* https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
* https://www.agner.org/optimize/instruction_tables.pdf
+ *
+ * Sync with xen_mb in sys/arch/i386/i386/cpufunc.S.
*/
LOCK
addl $0, -4(%esp)
diff -r b61cb6f1e4ce -r 7ecbeb4758c5 common/lib/libc/arch/x86_64/atomic/atomic.S
--- a/common/lib/libc/arch/x86_64/atomic/atomic.S Mon Jul 15 01:57:23 2024 +0000
+++ b/common/lib/libc/arch/x86_64/atomic/atomic.S Mon Jul 15 18:14:22 2024 +0000
@@ -279,6 +279,8 @@ ENTRY(_membar_sync)
* https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/
* https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
* https://www.agner.org/optimize/instruction_tables.pdf
+ *
+ * Sync with xen_mb in sys/arch/amd64/amd64/cpufunc.S.
*/
LOCK
addq $0, -8(%rsp)
diff -r b61cb6f1e4ce -r 7ecbeb4758c5 sys/arch/amd64/amd64/cpufunc.S
--- a/sys/arch/amd64/amd64/cpufunc.S Mon Jul 15 01:57:23 2024 +0000
+++ b/sys/arch/amd64/amd64/cpufunc.S Mon Jul 15 18:14:22 2024 +0000
@@ -61,6 +61,28 @@ ENTRY(x86_mfence)
ret
END(x86_mfence)
+#ifdef XEN
+ENTRY(xen_mb)
+ /*
+ * Store-before-load ordering with respect to matching logic
+ * on the hypervisor side.
+ *
+ * This is the same as membar_sync, but without hotpatching
+ * away the LOCK prefix on uniprocessor boots -- because under
+ * Xen, we still have to coordinate with a `device' backed by a
+ * hypervisor that is potentially on another physical CPU even
+ * if we observe only one virtual CPU as the guest.
+ *
+ * See common/lib/libc/arch/x86_64/atomic/atomic.S for
+ * rationale and keep this in sync with the implementation
+ * of membar_sync there.
+ */
+ lock
+ addq $0,-8(%rsp)
+ ret
+END(xen_mb)
+#endif /* XEN */
+
#ifndef XENPV
ENTRY(invlpg)
#ifdef SVS
diff -r b61cb6f1e4ce -r 7ecbeb4758c5 sys/arch/i386/i386/cpufunc.S
--- a/sys/arch/i386/i386/cpufunc.S Mon Jul 15 01:57:23 2024 +0000
+++ b/sys/arch/i386/i386/cpufunc.S Mon Jul 15 18:14:22 2024 +0000
@@ -67,6 +67,28 @@ ENTRY(x86_mfence)
ret
END(x86_mfence)
+#ifdef XEN
+ENTRY(xen_mb)
+ /*
+ * Store-before-load ordering with respect to matching logic
+ * on the hypervisor side.
+ *
+ * This is the same as membar_sync, but without hotpatching
+ * away the LOCK prefix on uniprocessor boots -- because under
+ * Xen, we still have to coordinate with a `device' backed by a
+ * hypervisor that is potentially on another physical CPU even
+ * if we observe only one virtual CPU as the guest.
+ *
+ * See common/lib/libc/arch/i386/atomic/atomic.S for
+ * rationale and keep this in sync with the implementation
+ * of membar_sync there.
+ */
+ lock
+ addl $0,-4(%esp)
+ ret
+END(xen_mb)
+#endif /* XEN */
+
#ifndef XENPV
ENTRY(lidt)
movl 4(%esp), %eax
diff -r b61cb6f1e4ce -r 7ecbeb4758c5 sys/arch/xen/include/hypervisor.h
--- a/sys/arch/xen/include/hypervisor.h Mon Jul 15 01:57:23 2024 +0000
+++ b/sys/arch/xen/include/hypervisor.h Mon Jul 15 18:14:22 2024 +0000
@@ -112,7 +112,7 @@ struct xen_npx_attach_args {
#undef xen_rmb
#undef xen_wmb
-#define xen_mb() membar_sync()
+void xen_mb(void);
#define xen_rmb() membar_acquire()
#define xen_wmb() membar_release()
#endif /* __XEN_INTERFACE_VERSION */
diff -r b61cb6f1e4ce -r 7ecbeb4758c5 sys/arch/xen/include/xenring.h
--- a/sys/arch/xen/include/xenring.h Mon Jul 15 01:57:23 2024 +0000
+++ b/sys/arch/xen/include/xenring.h Mon Jul 15 18:14:22 2024 +0000
@@ -24,7 +24,7 @@
#undef xen_rmb
#undef xen_wmb
-#define xen_mb() membar_sync()
+void xen_mb(void);
#define xen_rmb() membar_acquire()
#define xen_wmb() membar_release()
Home |
Main Index |
Thread Index |
Old Index