Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/crypto/aes/arch/arm Implement AES-CCM with NEON.



details:   https://anonhg.NetBSD.org/src/rev/e76f807c3205
branches:  trunk
changeset: 1012187:e76f807c3205
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Sat Jul 25 22:36:06 2020 +0000

description:
Implement AES-CCM with NEON.

diffstat:

 sys/crypto/aes/arch/arm/aes_neon.h      |   8 ++-
 sys/crypto/aes/arch/arm/aes_neon_impl.c |  40 ++++++++++++++-
 sys/crypto/aes/arch/arm/aes_neon_subr.c |  89 ++++++++++++++++++++++++++++++++-
 sys/crypto/aes/arch/arm/arm_neon.h      |  22 +++++++-
 4 files changed, 153 insertions(+), 6 deletions(-)

diffs (241 lines):

diff -r 6465b7fbb940 -r e76f807c3205 sys/crypto/aes/arch/arm/aes_neon.h
--- a/sys/crypto/aes/arch/arm/aes_neon.h        Sat Jul 25 22:33:04 2020 +0000
+++ b/sys/crypto/aes/arch/arm/aes_neon.h        Sat Jul 25 22:36:06 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: aes_neon.h,v 1.2 2020/07/25 22:12:57 riastradh Exp $   */
+/*     $NetBSD: aes_neon.h,v 1.3 2020/07/25 22:36:06 riastradh Exp $   */
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -59,6 +59,12 @@
     uint8_t[static 16], size_t, uint8_t[static 16], uint32_t);
 void aes_neon_xts_dec(const struct aesdec *, const uint8_t[static 16],
     uint8_t[static 16], size_t, uint8_t[static 16], uint32_t);
+void aes_neon_cbcmac_update1(const struct aesenc *, const uint8_t[static 16],
+    size_t, uint8_t[static 16], uint32_t);
+void aes_neon_ccm_enc1(const struct aesenc *, const uint8_t[static 16],
+    uint8_t[static 16], size_t, uint8_t[static 32], uint32_t);
+void aes_neon_ccm_dec1(const struct aesenc *, const uint8_t[static 16],
+    uint8_t[static 16], size_t, uint8_t[static 32], uint32_t);
 
 int aes_neon_selftest(void);
 
diff -r 6465b7fbb940 -r e76f807c3205 sys/crypto/aes/arch/arm/aes_neon_impl.c
--- a/sys/crypto/aes/arch/arm/aes_neon_impl.c   Sat Jul 25 22:33:04 2020 +0000
+++ b/sys/crypto/aes/arch/arm/aes_neon_impl.c   Sat Jul 25 22:36:06 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: aes_neon_impl.c,v 1.3 2020/07/25 22:12:57 riastradh Exp $      */
+/*     $NetBSD: aes_neon_impl.c,v 1.4 2020/07/25 22:36:06 riastradh Exp $      */
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(1, "$NetBSD: aes_neon_impl.c,v 1.3 2020/07/25 22:12:57 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_neon_impl.c,v 1.4 2020/07/25 22:36:06 riastradh Exp $");
 
 #include <sys/types.h>
 #include <sys/proc.h>
@@ -144,6 +144,39 @@
        fpu_kern_leave();
 }
 
+static void
+aes_neon_cbcmac_update1_impl(const struct aesenc *enc,
+    const uint8_t in[static 16], size_t nbytes, uint8_t auth[static 16],
+    uint32_t nrounds)
+{
+
+       fpu_kern_enter();
+       aes_neon_cbcmac_update1(enc, in, nbytes, auth, nrounds);
+       fpu_kern_leave();
+}
+
+static void
+aes_neon_ccm_enc1_impl(const struct aesenc *enc, const uint8_t in[static 16],
+    uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
+    uint32_t nrounds)
+{
+
+       fpu_kern_enter();
+       aes_neon_ccm_enc1(enc, in, out, nbytes, authctr, nrounds);
+       fpu_kern_leave();
+}
+
+static void
+aes_neon_ccm_dec1_impl(const struct aesenc *enc, const uint8_t in[static 16],
+    uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
+    uint32_t nrounds)
+{
+
+       fpu_kern_enter();
+       aes_neon_ccm_dec1(enc, in, out, nbytes, authctr, nrounds);
+       fpu_kern_leave();
+}
+
 static int
 aes_neon_probe(void)
 {
@@ -204,4 +237,7 @@
        .ai_cbc_dec = aes_neon_cbc_dec_impl,
        .ai_xts_enc = aes_neon_xts_enc_impl,
        .ai_xts_dec = aes_neon_xts_dec_impl,
+       .ai_cbcmac_update1 = aes_neon_cbcmac_update1_impl,
+       .ai_ccm_enc1 = aes_neon_ccm_enc1_impl,
+       .ai_ccm_dec1 = aes_neon_ccm_dec1_impl,
 };
diff -r 6465b7fbb940 -r e76f807c3205 sys/crypto/aes/arch/arm/aes_neon_subr.c
--- a/sys/crypto/aes/arch/arm/aes_neon_subr.c   Sat Jul 25 22:33:04 2020 +0000
+++ b/sys/crypto/aes/arch/arm/aes_neon_subr.c   Sat Jul 25 22:36:06 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: aes_neon_subr.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $      */
+/*     $NetBSD: aes_neon_subr.c,v 1.3 2020/07/25 22:36:06 riastradh Exp $      */
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,9 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.3 2020/07/25 22:36:06 riastradh Exp $");
+
+#include <sys/endian.h>
 
 #ifdef _KERNEL
 #include <sys/systm.h>
@@ -213,6 +215,89 @@
        storeblock(tweak, t);
 }
 
+void
+aes_neon_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
+    size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds)
+{
+       uint8x16_t auth;
+
+       KASSERT(nbytes);
+       KASSERT(nbytes % 16 == 0);
+
+       auth = loadblock(auth0);
+       for (; nbytes; nbytes -= 16, in += 16)
+               auth = aes_neon_enc1(enc, auth ^ loadblock(in), nrounds);
+       storeblock(auth0, auth);
+}
+
+/*
+ * XXX On aarch64, we have enough registers that we should be able to
+ * pipeline two simultaneous vpaes computations in an `aes_neon_enc2'
+ * function, which should substantially improve CCM throughput.
+ */
+
+#if _BYTE_ORDER == _LITTLE_ENDIAN
+#define        vbetoh32q_u8    vrev32q_u8
+#define        vhtobe32q_u8    vrev32q_u8
+#elif _BYTE_ORDER == _BIG_ENDIAN
+#define        vbetoh32q_u8(x) (x)
+#define        vhtobe32q_u8(x) (x)
+#else
+#error what kind of endian are you anyway
+#endif
+
+void
+aes_neon_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16],
+    uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
+    uint32_t nrounds)
+{
+       const uint32x4_t ctr32_inc = {0, 0, 0, 1};
+       uint8x16_t auth, ptxt, ctr_be;
+       uint32x4_t ctr;
+
+       KASSERT(nbytes);
+       KASSERT(nbytes % 16 == 0);
+
+       auth = loadblock(authctr);
+       ctr_be = loadblock(authctr + 16);
+       ctr = vreinterpretq_u32_u8(vbetoh32q_u8(ctr_be));
+       for (; nbytes; nbytes -= 16, in += 16, out += 16) {
+               ptxt = loadblock(in);
+               auth = aes_neon_enc1(enc, auth ^ ptxt, nrounds);
+               ctr = vaddq_u32(ctr, ctr32_inc);
+               ctr_be = vhtobe32q_u8(vreinterpretq_u8_u32(ctr));
+               storeblock(out, ptxt ^ aes_neon_enc1(enc, ctr_be, nrounds));
+       }
+       storeblock(authctr, auth);
+       storeblock(authctr + 16, ctr_be);
+}
+
+void
+aes_neon_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16],
+    uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
+    uint32_t nrounds)
+{
+       const uint32x4_t ctr32_inc = {0, 0, 0, 1};
+       uint8x16_t auth, ctr_be, ptxt;
+       uint32x4_t ctr;
+
+       KASSERT(nbytes);
+       KASSERT(nbytes % 16 == 0);
+
+       auth = loadblock(authctr);
+       ctr_be = loadblock(authctr + 16);
+       ctr = vreinterpretq_u32_u8(vbetoh32q_u8(ctr_be));
+       for (; nbytes; nbytes -= 16, in += 16, out += 16) {
+               ctr = vaddq_u32(ctr, ctr32_inc);
+               ctr_be = vhtobe32q_u8(vreinterpretq_u8_u32(ctr));
+               ptxt = loadblock(in) ^ aes_neon_enc1(enc, ctr_be, nrounds);
+               storeblock(out, ptxt);
+               auth = aes_neon_enc1(enc, auth ^ ptxt, nrounds);
+       }
+       storeblock(authctr, auth);
+       storeblock(authctr + 16, ctr_be);
+}
+
 int
 aes_neon_selftest(void)
 {
diff -r 6465b7fbb940 -r e76f807c3205 sys/crypto/aes/arch/arm/arm_neon.h
--- a/sys/crypto/aes/arch/arm/arm_neon.h        Sat Jul 25 22:33:04 2020 +0000
+++ b/sys/crypto/aes/arch/arm/arm_neon.h        Sat Jul 25 22:36:06 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: arm_neon.h,v 1.3 2020/07/23 11:33:01 ryo Exp $ */
+/*     $NetBSD: arm_neon.h,v 1.4 2020/07/25 22:36:06 riastradh Exp $   */
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -91,6 +91,13 @@
 
 _INTRINSATTR
 static __inline uint32x4_t
+vaddq_u32(uint32x4_t __v0, uint32x4_t __v1)
+{
+       return __v0 + __v1;
+}
+
+_INTRINSATTR
+static __inline uint32x4_t
 vcltq_s32(int32x4_t __v0, int32x4_t __v1)
 {
        return (uint32x4_t)(__v0 < __v1);
@@ -328,6 +335,19 @@
        return (uint8x16_t)__v;
 }
 
+_INTRINSATTR
+static __inline uint8x16_t
+vrev32q_u8(uint8x16_t __v)
+{
+#if defined(__GNUC__) && !defined(__clang__)
+       return __builtin_shuffle(__v,
+           (uint8x16_t) { 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12 });
+#elif defined(__clang__)
+       return __builtin_shufflevector(__v,
+           3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12);
+#endif
+}
+
 #if defined(__GNUC__) && !defined(__clang__)
 _INTRINSATTR
 static __inline uint32x4_t



Home | Main Index | Thread Index | Old Index