Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/crypto/aes/arch/arm Add 32-bit load, store, and shift in...
details: https://anonhg.NetBSD.org/src/rev/4ca3132e434a
branches: trunk
changeset: 974161:4ca3132e434a
user: riastradh <riastradh%NetBSD.org@localhost>
date: Sat Jul 25 22:43:01 2020 +0000
description:
Add 32-bit load, store, and shift intrinsics.
vld1q_u32
vst1q_u32
vshlq_n_u32
vshrq_n_u32
diffstat:
sys/crypto/aes/arch/arm/arm_neon.h | 80 +++++++++++++++++++++++++++++++++++++-
1 files changed, 79 insertions(+), 1 deletions(-)
diffs (108 lines):
diff -r 9f317c30b965 -r 4ca3132e434a sys/crypto/aes/arch/arm/arm_neon.h
--- a/sys/crypto/aes/arch/arm/arm_neon.h Sat Jul 25 22:42:31 2020 +0000
+++ b/sys/crypto/aes/arch/arm/arm_neon.h Sat Jul 25 22:43:01 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: arm_neon.h,v 1.5 2020/07/25 22:42:31 riastradh Exp $ */
+/* $NetBSD: arm_neon.h,v 1.6 2020/07/25 22:43:01 riastradh Exp $ */
/*-
* Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -222,6 +222,30 @@
#endif
_INTRINSATTR
+static __inline uint32x4_t
+vld1q_u32(const uint32_t *__p32)
+{
+#if defined(__GNUC__) && !defined(__clang__)
+#ifdef __aarch64__
+ const __builtin_aarch64_simd_si *__p =
+ (const __builtin_aarch64_simd_si *)__p32;
+
+ return (uint32x4_t)__builtin_aarch64_ld1v4si(__p);
+#else
+ const __builtin_neon_si *__p = (const __builtin_neon_si *)__p32;
+
+ return (uint32x4_t)__builtin_neon_vld1v4si(__p);
+#endif
+#elif defined(__clang__)
+ uint32x4_t __v = (uint32x4_t)__builtin_neon_vld1q_v(__p32, 50);
+#ifndef __LITTLE_ENDIAN__
+ __v = __builtin_shufflevector(__v, __v, 3,2,1,0);
+#endif
+ return __v;
+#endif
+}
+
+_INTRINSATTR
static __inline uint8x16_t
vld1q_u8(const uint8_t *__p8)
{
@@ -383,6 +407,38 @@
#if defined(__GNUC__) && !defined(__clang__)
_INTRINSATTR
+static __inline uint32x4_t
+vshlq_n_u32(uint32x4_t __v, uint8_t __bits)
+{
+#ifdef __aarch64__
+ return (uint32x4_t)__builtin_aarch64_ashlv4si((int32x4_t)__v, __bits);
+#else
+ return (uint32x4_t)__builtin_neon_vshl_nv4si((int32x4_t)__v, __bits);
+#endif
+}
+#elif defined(__clang__)
+#define vshlq_n_u32(__v, __bits) \
+ (uint32x4_t)__builtin_neon_vshlq_n_v((int32x4_t)(__v), (__bits), 50)
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+_INTRINSATTR
+static __inline uint32x4_t
+vshrq_n_u32(uint32x4_t __v, uint8_t __bits)
+{
+#ifdef __aarch64__
+ return (uint32x4_t)__builtin_aarch64_lshrv4si((int32x4_t)__v, __bits);
+#else
+ return (uint32x4_t)__builtin_neon_vshru_nv4si((int32x4_t)__v, __bits);
+#endif
+}
+#elif defined(__clang__)
+#define vshrq_n_u8(__v, __bits) \
+ (uint32x4_t)__builtin_neon_vshrq_n_v((int32x4_t)(__v), (__bits), 50)
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+_INTRINSATTR
static __inline uint8x16_t
vshrq_n_u8(uint8x16_t __v, uint8_t __bits)
{
@@ -432,6 +488,28 @@
_INTRINSATTR
static __inline void
+vst1q_u32(uint32_t *__p32, uint32x4_t __v)
+{
+#if defined(__GNUC__) && !defined(__clang__)
+#ifdef __aarch64__
+ __builtin_aarch64_simd_si *__p = (__builtin_aarch64_simd_si *)__p32;
+
+ __builtin_aarch64_st1v4si(__p, (int32x4_t)__v);
+#else
+ __builtin_neon_si *__p = (__builtin_neon_si *)__p32;
+
+ __builtin_neon_vst1v4si(__p, (int32x4_t)__v);
+#endif
+#elif defined(__clang__)
+#ifndef __LITTLE_ENDIAN__
+ __v = __builtin_shufflevector(__v, __v, 3,2,1,0);
+#endif
+ __builtin_neon_vst1q_v(__p32, __v, 50);
+#endif
+}
+
+_INTRINSATTR
+static __inline void
vst1q_u8(uint8_t *__p8, uint8x16_t __v)
{
#if defined(__GNUC__) && !defined(__clang__)
Home |
Main Index |
Thread Index |
Old Index