tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[PATCH] __builtin_ffs/clz in sys/bitops.h



The attached patch adapts sys/bitops.h to take advantage of
__builtin_ffs/clz if available.

For recent gcc and clang on x86 and Arm, this makes the difference
between a string of branches and arithmetic versus a single x86 BSR or
Arm CLZ instruction or similar.

(No change to popcount, which lives in strings.h, not sys/bitops.h,
and still uses the usual logarithmic-SWAR algorithm that beats
microcoded loops often found in CPUs.)

The patch arranges to keep both versions exercised in the automatic
tests.

Would anyone like to see if there's a performance benefit on a range
of CPUs?  It might be worthwhile, but performance measurements for
this sort of thing constitute a rabbit hole that I probably won't find
the time to fall into for in the foreseeable future.
From 494ce7f4bddb181a7de6e97aa461899926c8e761 Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
Date: Thu, 13 Jul 2023 20:33:06 +0000
Subject: [PATCH] sys/bitops.h: Use __builtin_ffs/ffsll/clz/clzll if available.

Make sure to test both versions of the code.
---
 sys/sys/bitops.h                       | 25 +++++++++++++++++++
 tests/include/sys/Makefile             |  1 +
 tests/include/sys/t_bitops_nobuiltin.c | 34 ++++++++++++++++++++++++++
 3 files changed, 60 insertions(+)
 create mode 100644 tests/include/sys/t_bitops_nobuiltin.c

diff --git a/sys/sys/bitops.h b/sys/sys/bitops.h
index b80fdc247214..29017fc3e631 100644
--- a/sys/sys/bitops.h
+++ b/sys/sys/bitops.h
@@ -31,6 +31,7 @@
 #ifndef _SYS_BITOPS_H_
 #define _SYS_BITOPS_H_
 
+#include <sys/cdefs.h>
 #include <sys/stdint.h>
 
 /*
@@ -40,6 +41,11 @@
 static __inline int __unused
 ffs32(uint32_t _n)
 {
+#if defined(__has_builtin) && __has_builtin(__builtin_ffs) && \
+    !defined(_SYS_BITOPS_NOBUILTIN)
+	__CTASSERT(sizeof(_n) <= sizeof(int));
+	return __builtin_ffs(_n);
+#else
 	int _v;
 
 	if (!_n)
@@ -67,6 +73,7 @@ ffs32(uint32_t _n)
 		_v += 1;
 	}
 	return _v;
+#endif
 }
 #endif
 
@@ -74,6 +81,11 @@ ffs32(uint32_t _n)
 static __inline int __unused
 ffs64(uint64_t _n)
 {
+#if defined(__has_builtin) && __has_builtin(__builtin_ffsl) && \
+    !defined(_SYS_BITOPS_NOBUILTIN)
+	__CTASSERT(sizeof(_n) <= sizeof(long long));
+	return __builtin_ffsll(_n);
+#else
 	int _v;
 
 	if (!_n)
@@ -105,6 +117,7 @@ ffs64(uint64_t _n)
 		_v += 1;
 	}
 	return _v;
+#endif
 }
 #endif
 
@@ -115,6 +128,11 @@ ffs64(uint64_t _n)
 static __inline int __unused
 fls32(uint32_t _n)
 {
+#if defined(__has_builtin) && __has_builtin(__builtin_clz) && \
+    !defined(_SYS_BITOPS_NOBUILTIN)
+	__CTASSERT(sizeof(_n) == sizeof(int));
+	return _n == 0 ? 0 : 32 - __builtin_clz(_n);
+#else
 	int _v;
 
 	if (!_n)
@@ -142,6 +160,7 @@ fls32(uint32_t _n)
 		_v -= 1;
 	}
 	return _v;
+#endif
 }
 #endif
 
@@ -149,6 +168,11 @@ fls32(uint32_t _n)
 static __inline int __unused
 fls64(uint64_t _n)
 {
+#if defined(__has_builtin) && __has_builtin(__builtin_clzll) && \
+    !defined(_SYS_BITOPS_NOBUILTIN)
+	__CTASSERT(sizeof(_n) == sizeof(long long));
+	return _n == 0 ? 0 : 64 - __builtin_clzll(_n);
+#else
 	int _v;
 
 	if (!_n)
@@ -180,6 +204,7 @@ fls64(uint64_t _n)
 		_v -= 1;
 	}
 	return _v;
+#endif
 }
 #endif
 
diff --git a/tests/include/sys/Makefile b/tests/include/sys/Makefile
index 25755bebfe3d..59c57b174e28 100644
--- a/tests/include/sys/Makefile
+++ b/tests/include/sys/Makefile
@@ -7,6 +7,7 @@ NOMAN=		# defined
 TESTSDIR=		${TESTSBASE}/include/sys
 
 TESTS_C+=		t_bitops
+TESTS_C+=		t_bitops_nobuiltin
 TESTS_C+=		t_bootblock
 TESTS_C+=		t_cdefs
 TESTS_C+=		t_list
diff --git a/tests/include/sys/t_bitops_nobuiltin.c b/tests/include/sys/t_bitops_nobuiltin.c
new file mode 100644
index 000000000000..dd5d67a8fc1d
--- /dev/null
+++ b/tests/include/sys/t_bitops_nobuiltin.c
@@ -0,0 +1,34 @@
+/*	$NetBSD$	*/
+
+/*-
+ * Copyright (c) 2023 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define	_SYS_BITOPS_NOBUILTIN
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD$");
+
+#include "t_bitops.c"


Home | Main Index | Thread Index | Old Index