Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/mips/include mips/cavium: Take advantage of Octeon'...
details: https://anonhg.NetBSD.org/src/rev/28edddf0fd2c
branches: trunk
changeset: 365728:28edddf0fd2c
user: riastradh <riastradh%NetBSD.org@localhost>
date: Thu Apr 21 12:06:31 2022 +0000
description:
mips/cavium: Take advantage of Octeon's guaranteed r/rw ordering.
diffstat:
common/lib/libc/arch/mips/atomic/membar_ops.S | 96 ++++++++++++++++++--------
sys/arch/mips/include/asm.h | 21 ++++-
2 files changed, 81 insertions(+), 36 deletions(-)
diffs (155 lines):
diff -r 1c456eed4846 -r 28edddf0fd2c common/lib/libc/arch/mips/atomic/membar_ops.S
--- a/common/lib/libc/arch/mips/atomic/membar_ops.S Thu Apr 21 12:05:13 2022 +0000
+++ b/common/lib/libc/arch/mips/atomic/membar_ops.S Thu Apr 21 12:06:31 2022 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: membar_ops.S,v 1.12 2022/04/09 23:32:51 riastradh Exp $ */
+/* $NetBSD: membar_ops.S,v 1.13 2022/04/21 12:06:31 riastradh Exp $ */
/*-
* Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
@@ -38,44 +38,80 @@
j ra
BDSYNC
END(_membar_sync)
+ATOMIC_OP_ALIAS(membar_sync,_membar_sync)
+
+STRONG_ALIAS(_membar_enter,_membar_sync)
+ATOMIC_OP_ALIAS(membar_enter,_membar_sync)
#ifdef __OCTEON__
+
+/*
+ * cnMIPS guarantees load-before-load/store ordering without any
+ * barriers. So the only barriers we need are store-before-load (sync)
+ * and store-before-store (syncw, i.e., sync 4). See Table 2-32
+ * `Execution Ordering Rules' on p. 104 of Cavium OCTEON III CN78XX
+ * Hardware Reference Manual, CN78XX-HM-0.99E, September 2014:
+ *
+ * First Operation DLD [load instruction to a physical
+ * address that is L2/DRAM]
+ * Second Operation Any
+ * Execution Ordering Comments
+ *
+ * The second operation cannot appear to execute before
+ * the first (DLD) operation, regardless of the presence
+ * or absence of SYNC* instructions.
+ *
+ * Note: I'm not sure if this applies to earlier cnMIPS -- can't find
+ * it in the Cavium Networks OCTEON Plus CN50XX Hardware Reference
+ * Manual CN50XX-HM-0.99E, July 2008. Experimentally, on an erlite3
+ * (Cavium Octeon CN5020-500), I can easily detect reordering of
+ * store-before-store and store-before-load, but I haven't been able to
+ * detect any reordering of load-before-load or load-before-store.
+ *
+ * Note: On early cnMIPS (CN3xxx), there is an erratum which sometimes
+ * requires issuing two syncw's in a row. I don't know the details --
+ * don't have documentation -- and in Linux it is only used for I/O
+ * purposes.
+ *
+ * Currently we don't build kernels that work on both Octeon and
+ * non-Octeon MIPS CPUs, so none of this is done with binary patching.
+ * For userlands we could use a separate shared library on Octeon with
+ * ld.so.conf to override the symbols with cheaper definitions, but we
+ * don't do that now.
+ */
+
+LEAF(_membar_acquire)
+ j ra
+ nop
+END(_membar_acquire)
+ATOMIC_OP_ALIAS(membar_acquire,_membar_acquire)
+
+STRONG_ALIAS(_membar_consumer,_membar_acquire)
+ATOMIC_OP_ALIAS(membar_consumer,_membar_acquire)
+
LEAF(_membar_release)
- /*
- * syncw is documented as ordering store-before-store in
- *
- * Cavium OCTEON III CN78XX Hardware Reference Manual,
- * CN78XX-HM-0.99E, September 2014.
- *
- * It's unclear from the documentation the architecture
- * guarantees load-before-store ordering without barriers, but
- * this code assumes it does. If that assumption is wrong, we
- * can only use syncw for membar_producer -- membar_release has
- * to use the full sync.
- */
j ra
syncw
END(_membar_release)
-#endif
+ATOMIC_OP_ALIAS(membar_release,_membar_release)
-ATOMIC_OP_ALIAS(membar_sync,_membar_sync)
-ATOMIC_OP_ALIAS(membar_acquire,_membar_sync)
-STRONG_ALIAS(_membar_acquire,_membar_sync)
-ATOMIC_OP_ALIAS(membar_enter,_membar_sync)
-STRONG_ALIAS(_membar_enter,_membar_sync)
-#ifdef __OCTEON__
+STRONG_ALIAS(_membar_exit,_membar_release)
ATOMIC_OP_ALIAS(membar_exit,_membar_release)
-STRONG_ALIAS(_membar_exit,_membar_release)
-ATOMIC_OP_ALIAS(membar_release,_membar_release)
+
+STRONG_ALIAS(_membar_producer,_membar_release)
ATOMIC_OP_ALIAS(membar_producer,_membar_release)
-STRONG_ALIAS(_membar_producer,_membar_release)
-#else
-ATOMIC_OP_ALIAS(membar_exit,_membar_sync)
-STRONG_ALIAS(_membar_exit,_membar_sync)
-ATOMIC_OP_ALIAS(membar_release,_membar_sync)
+
+#else /* !__OCTEON__ */
+
+STRONG_ALIAS(_membar_acquire,_membar_sync)
+ATOMIC_OP_ALIAS(membar_acquire,_membar_sync)
STRONG_ALIAS(_membar_release,_membar_sync)
+ATOMIC_OP_ALIAS(membar_release,_membar_sync)
+STRONG_ALIAS(_membar_exit,_membar_sync)
+ATOMIC_OP_ALIAS(membar_exit,_membar_sync)
+STRONG_ALIAS(_membar_consumer,_membar_sync)
+ATOMIC_OP_ALIAS(membar_consumer,_membar_sync)
+STRONG_ALIAS(_membar_producer,_membar_sync)
ATOMIC_OP_ALIAS(membar_producer,_membar_sync)
-STRONG_ALIAS(_membar_producer,_membar_sync)
+
#endif
-ATOMIC_OP_ALIAS(membar_consumer,_membar_sync)
-STRONG_ALIAS(_membar_consumer,_membar_sync)
diff -r 1c456eed4846 -r 28edddf0fd2c sys/arch/mips/include/asm.h
--- a/sys/arch/mips/include/asm.h Thu Apr 21 12:05:13 2022 +0000
+++ b/sys/arch/mips/include/asm.h Thu Apr 21 12:06:31 2022 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: asm.h,v 1.70 2022/04/09 14:09:32 riastradh Exp $ */
+/* $NetBSD: asm.h,v 1.71 2022/04/21 12:06:31 riastradh Exp $ */
/*
* Copyright (c) 1992, 1993
@@ -574,12 +574,21 @@
/* compiler define */
#if defined(__OCTEON__)
- /* early cnMIPS have erratum which means 2 */
-#define LLSCSYNC sync 4; sync 4
+/*
+ * See common/lib/libc/arch/mips/atomic/membar_ops.S for notes on
+ * Octeon memory ordering guarantees and barriers.
+ *
+ * cnMIPS also has a quirk where the store buffer can get clogged and
+ * we need to apply a plunger to it _after_ releasing a lock or else
+ * other CPUs may spin for hundreds of thousands of cycles before they
+ * see the lock is released. So we also have the quirky SYNC_PLUNGER
+ * barrier as syncw.
+ */
+#define LLSCSYNC /* nothing */
#define BDSYNC sync
-#define BDSYNC_ACQ sync
-#define SYNC_ACQ sync
-#define SYNC_REL sync
+#define BDSYNC_ACQ nop
+#define SYNC_ACQ /* nothing */
+#define SYNC_REL sync 4
#define BDSYNC_PLUNGER sync 4
#define SYNC_PLUNGER sync 4
#elif __mips >= 3 || !defined(__mips_o32)
Home |
Main Index |
Thread Index |
Old Index