Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/crypto Fix some clang neon intrinsics.
details: https://anonhg.NetBSD.org/src/rev/7332d9bee403
branches: trunk
changeset: 1012736:7332d9bee403
user: riastradh <riastradh%NetBSD.org@localhost>
date: Sun Aug 09 02:49:38 2020 +0000
description:
Fix some clang neon intrinsics.
Compile-tested only, with -Wno-nonportable-vector-initializers. Need
to address -- and test -- this stuff properly but this is progress.
diffstat:
sys/crypto/aes/arch/arm/arm_neon.h | 46 +++++++++++++++++++++++++---------
sys/crypto/chacha/arch/arm/arm_neon.h | 46 +++++++++++++++++++++++++---------
2 files changed, 66 insertions(+), 26 deletions(-)
diffs (168 lines):
diff -r 44353a56d9c6 -r 7332d9bee403 sys/crypto/aes/arch/arm/arm_neon.h
--- a/sys/crypto/aes/arch/arm/arm_neon.h Sun Aug 09 02:48:38 2020 +0000
+++ b/sys/crypto/aes/arch/arm/arm_neon.h Sun Aug 09 02:49:38 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: arm_neon.h,v 1.9 2020/08/09 02:48:38 riastradh Exp $ */
+/* $NetBSD: arm_neon.h,v 1.10 2020/08/09 02:49:38 riastradh Exp $ */
/*-
* Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -85,6 +85,8 @@
typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
+typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
+
typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
typedef struct { uint8x8_t val[2]; } uint8x8x2_t;
@@ -218,7 +220,7 @@
15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); \
uint8x16_t __r = __builtin_neon_vextq_v((int8x16_t)__lo_r, \
(int8x16_t)__hi_r, (__i), 48); \
- return __builtin_shufflevector(__r, __r, \
+ __builtin_shufflevector(__r, __r, \
15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); \
})
#endif /* __LITTLE_ENDIAN */
@@ -326,19 +328,37 @@
return (uint8x16_t)__out64;
#endif
#elif defined(__clang__)
-#ifdef __LITTLE_ENDIAN__
- return (uint8x16_t)__builtin_neon_vqtbl1q_v((int8x16_t)__tab,
- (int8x16_t)__idx, 48);
-#else
- uint32x4_t __lo_r = __builtin_shufflevector(__lo, __lo,
+#ifndef __LITTLE_ENDIAN__
+ __tab = __builtin_shufflevector(__tab, __tab,
15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
- uint32x4_t __hi_r = __builtin_shufflevector(__hi, __hi,
- 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
- uint32x4_t __r = __builtin_neon_vqtbl1q_v((int8x16_t)__tab,
- (int8x16_t)__idx, __i, 48);
- return __builtin_shufflevector(__r, __r,
+ __idx = __builtin_shufflevector(__idx, __idx,
15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
#endif
+ uint8x16_t __r;
+#ifdef __aarch64__
+ __r = __builtin_neon_vqtbl1q_v((int8x16_t)__tab, (int8x16_t)__idx, 48);
+#else
+ uint64x2_t __tab64 = (uint64x2_t)__tab;
+ uint8x8_t __tablo = (uint8x8_t)__tab64[0];
+ uint8x8_t __tabhi = (uint8x8_t)__tab64[1];
+ uint64x2_t __idx64, __out64;
+ int8x8_t __idxlo, __idxhi, __outlo, __outhi;
+
+ __idx64 = (uint64x2_t)__idx;
+ __idxlo = (int8x8_t)__idx64[0];
+ __idxhi = (int8x8_t)__idx64[1];
+ __outlo = (uint8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__tablo,
+ (int8x8_t)__tabhi, (int8x8_t)__idxlo, 16);
+ __outhi = (uint8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__tablo,
+ (int8x8_t)__tabhi, (int8x8_t)__idxhi, 16);
+ __out64 = (uint64x2_t) { (uint64_t)__outlo, (uint64_t)__outhi };
+ __r = (uint8x16_t)__out64;
+#endif
+#ifndef __LITTLE_ENDIAN__
+ __r = __builtin_shufflevector(__r, __r,
+ 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
+#endif
+ return __r;
#endif
}
@@ -579,7 +599,7 @@
(int32x4_t)__builtin_neon_vsriq_n_v((int32x4_t)(__vins), \
(int32x4_t)(__vsh), (__bits), 34)
#else
-#define vsliq_n_s32(__vins, __vsh, __bits) ( \
+#define vsriq_n_s32(__vins, __vsh, __bits) ( \
{ \
int32x4_t __tvins = (__vins); \
int32x4_t __tvsh = (__vsh); \
diff -r 44353a56d9c6 -r 7332d9bee403 sys/crypto/chacha/arch/arm/arm_neon.h
--- a/sys/crypto/chacha/arch/arm/arm_neon.h Sun Aug 09 02:48:38 2020 +0000
+++ b/sys/crypto/chacha/arch/arm/arm_neon.h Sun Aug 09 02:49:38 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: arm_neon.h,v 1.5 2020/08/09 02:48:38 riastradh Exp $ */
+/* $NetBSD: arm_neon.h,v 1.6 2020/08/09 02:49:38 riastradh Exp $ */
/*-
* Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -85,6 +85,8 @@
typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
+typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
+
typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
typedef struct { uint8x8_t val[2]; } uint8x8x2_t;
@@ -218,7 +220,7 @@
15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); \
uint8x16_t __r = __builtin_neon_vextq_v((int8x16_t)__lo_r, \
(int8x16_t)__hi_r, (__i), 48); \
- return __builtin_shufflevector(__r, __r, \
+ __builtin_shufflevector(__r, __r, \
15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); \
})
#endif /* __LITTLE_ENDIAN */
@@ -326,19 +328,37 @@
return (uint8x16_t)__out64;
#endif
#elif defined(__clang__)
-#ifdef __LITTLE_ENDIAN__
- return (uint8x16_t)__builtin_neon_vqtbl1q_v((int8x16_t)__tab,
- (int8x16_t)__idx, 48);
-#else
- uint32x4_t __lo_r = __builtin_shufflevector(__lo, __lo,
+#ifndef __LITTLE_ENDIAN__
+ __tab = __builtin_shufflevector(__tab, __tab,
15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
- uint32x4_t __hi_r = __builtin_shufflevector(__hi, __hi,
- 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
- uint32x4_t __r = __builtin_neon_vqtbl1q_v((int8x16_t)__tab,
- (int8x16_t)__idx, __i, 48);
- return __builtin_shufflevector(__r, __r,
+ __idx = __builtin_shufflevector(__idx, __idx,
15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
#endif
+ uint8x16_t __r;
+#ifdef __aarch64__
+ __r = __builtin_neon_vqtbl1q_v((int8x16_t)__tab, (int8x16_t)__idx, 48);
+#else
+ uint64x2_t __tab64 = (uint64x2_t)__tab;
+ uint8x8_t __tablo = (uint8x8_t)__tab64[0];
+ uint8x8_t __tabhi = (uint8x8_t)__tab64[1];
+ uint64x2_t __idx64, __out64;
+ int8x8_t __idxlo, __idxhi, __outlo, __outhi;
+
+ __idx64 = (uint64x2_t)__idx;
+ __idxlo = (int8x8_t)__idx64[0];
+ __idxhi = (int8x8_t)__idx64[1];
+ __outlo = (uint8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__tablo,
+ (int8x8_t)__tabhi, (int8x8_t)__idxlo, 16);
+ __outhi = (uint8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__tablo,
+ (int8x8_t)__tabhi, (int8x8_t)__idxhi, 16);
+ __out64 = (uint64x2_t) { (uint64_t)__outlo, (uint64_t)__outhi };
+ __r = (uint8x16_t)__out64;
+#endif
+#ifndef __LITTLE_ENDIAN__
+ __r = __builtin_shufflevector(__r, __r,
+ 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
+#endif
+ return __r;
#endif
}
@@ -579,7 +599,7 @@
(int32x4_t)__builtin_neon_vsriq_n_v((int32x4_t)(__vins), \
(int32x4_t)(__vsh), (__bits), 34)
#else
-#define vsliq_n_s32(__vins, __vsh, __bits) ( \
+#define vsriq_n_s32(__vins, __vsh, __bits) ( \
{ \
int32x4_t __tvins = (__vins); \
int32x4_t __tvsh = (__vsh); \
Home |
Main Index |
Thread Index |
Old Index