Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/crypto/aes/arch/x86 Split SSE2 logic into separate units.



details:   https://anonhg.NetBSD.org/src/rev/2cac8193eea0
branches:  trunk
changeset: 1011438:2cac8193eea0
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Mon Jun 29 23:50:05 2020 +0000

description:
Split SSE2 logic into separate units.

Ensure that there are no paths into files compiled with -msse -msse2
at all except via fpu_kern_enter.

I didn't run into a practical problem with this, but let's not leave
a ticking time bomb for subsequent toolchain changes in case the mere
declaration of local __m128i variables causes trouble.

diffstat:

 sys/crypto/aes/arch/x86/aes_sse2.h      |   27 +-
 sys/crypto/aes/arch/x86/aes_sse2_impl.c |  545 ++-----------------------------
 sys/crypto/aes/arch/x86/aes_sse2_impl.h |    4 +-
 sys/crypto/aes/arch/x86/aes_sse2_subr.c |  526 ++++++++++++++++++++++++++++++
 sys/crypto/aes/arch/x86/files.aessse2   |   11 +-
 5 files changed, 612 insertions(+), 501 deletions(-)

diffs (truncated from 1245 to 300 lines):

diff -r 38d573a03b83 -r 2cac8193eea0 sys/crypto/aes/arch/x86/aes_sse2.h
--- a/sys/crypto/aes/arch/x86/aes_sse2.h        Mon Jun 29 23:47:54 2020 +0000
+++ b/sys/crypto/aes/arch/x86/aes_sse2.h        Mon Jun 29 23:50:05 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: aes_sse2.h,v 1.1 2020/06/29 23:47:54 riastradh Exp $   */
+/*     $NetBSD: aes_sse2.h,v 1.2 2020/06/29 23:50:05 riastradh Exp $   */
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -31,6 +31,31 @@
 
 #include <crypto/aes/aes.h>
 
+/*
+ * These functions MUST NOT use any vector registers for parameters or
+ * results -- the caller is compiled with -mno-sse &c. in the kernel,
+ * and dynamically turns on the vector unit just before calling them.
+ * Internal subroutines that use the vector unit for parameters are
+ * declared in aes_sse2_impl.h instead.
+ */
+
+void aes_sse2_setkey(uint64_t[static 30], const void *, uint32_t);
+
+void aes_sse2_enc(const struct aesenc *, const uint8_t in[static 16],
+    uint8_t[static 16], uint32_t);
+void aes_sse2_dec(const struct aesdec *, const uint8_t in[static 16],
+    uint8_t[static 16], uint32_t);
+void aes_sse2_cbc_enc(const struct aesenc *, const uint8_t[static 16],
+    uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
+void aes_sse2_cbc_dec(const struct aesdec *, const uint8_t[static 16],
+    uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
+void aes_sse2_xts_enc(const struct aesenc *, const uint8_t[static 16],
+    uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
+void aes_sse2_xts_dec(const struct aesdec *, const uint8_t[static 16],
+    uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
+
+int aes_sse2_selftest(void);
+
 extern struct aes_impl aes_sse2_impl;
 
 #endif /* _CRYPTO_AES_ARCH_X86_AES_SSE2_H */
diff -r 38d573a03b83 -r 2cac8193eea0 sys/crypto/aes/arch/x86/aes_sse2_impl.c
--- a/sys/crypto/aes/arch/x86/aes_sse2_impl.c   Mon Jun 29 23:47:54 2020 +0000
+++ b/sys/crypto/aes/arch/x86/aes_sse2_impl.c   Mon Jun 29 23:50:05 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: aes_sse2_impl.c,v 1.1 2020/06/29 23:47:54 riastradh Exp $      */
+/*     $NetBSD: aes_sse2_impl.c,v 1.2 2020/06/29 23:50:05 riastradh Exp $      */
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,11 +27,10 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(1, "$NetBSD: aes_sse2_impl.c,v 1.1 2020/06/29 23:47:54 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_sse2_impl.c,v 1.2 2020/06/29 23:50:05 riastradh Exp $");
 
 #include <sys/types.h>
 #include <sys/endian.h>
-#include <sys/systm.h>
 
 #include <crypto/aes/aes.h>
 #include <crypto/aes/arch/x86/aes_sse2.h>
@@ -41,532 +40,99 @@
 #include <x86/fpu.h>
 #include <x86/specialreg.h>
 
-#include "aes_sse2_impl.h"
-
 static void
-aes_sse2_setkey(uint64_t rk[static 30], const void *key, uint32_t nrounds)
+aes_sse2_setenckey_impl(struct aesenc *enc, const uint8_t *key,
+    uint32_t nrounds)
 {
-       size_t key_len;
-
-       switch (nrounds) {
-       case 10:
-               key_len = 16;
-               break;
-       case 12:
-               key_len = 24;
-               break;
-       case 14:
-               key_len = 32;
-               break;
-       default:
-               panic("invalid AES nrounds: %u", nrounds);
-       }
 
        fpu_kern_enter();
-       aes_sse2_keysched(rk, key, key_len);
+       aes_sse2_setkey(enc->aese_aes.aes_rk64, key, nrounds);
        fpu_kern_leave();
 }
 
 static void
-aes_sse2_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
+aes_sse2_setdeckey_impl(struct aesdec *dec, const uint8_t *key,
+    uint32_t nrounds)
 {
 
-       aes_sse2_setkey(enc->aese_aes.aes_rk64, key, nrounds);
-}
-
-static void
-aes_sse2_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
-{
-
+       fpu_kern_enter();
        /*
         * BearSSL computes InvMixColumns on the fly -- no need for
         * distinct decryption round keys.
         */
        aes_sse2_setkey(dec->aesd_aes.aes_rk64, key, nrounds);
-}
-
-static void
-aes_sse2_enc(const struct aesenc *enc, const uint8_t in[static 16],
-    uint8_t out[static 16], uint32_t nrounds)
-{
-       uint64_t sk_exp[120];
-       __m128i q[4];
-
-       fpu_kern_enter();
-
-       /* Expand round keys for bitslicing.  */
-       aes_sse2_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
-
-       /* Load input block interleaved with garbage blocks.  */
-       q[0] = aes_sse2_interleave_in(_mm_loadu_epi8(in));
-       q[1] = q[2] = q[3] = _mm_setzero_si128();
-
-       /* Transform to bitslice, decrypt, transform from bitslice.  */
-       aes_sse2_ortho(q);
-       aes_sse2_bitslice_encrypt(nrounds, sk_exp, q);
-       aes_sse2_ortho(q);
-
-       /* Store output block.  */
-       _mm_storeu_epi8(out, aes_sse2_interleave_out(q[0]));
-
-       /* Paranoia: Zero temporary buffers.  */
-       explicit_memset(sk_exp, 0, sizeof sk_exp);
-       explicit_memset(q, 0, sizeof q);
-
        fpu_kern_leave();
 }
 
 static void
-aes_sse2_dec(const struct aesdec *dec, const uint8_t in[static 16],
+aes_sse2_enc_impl(const struct aesenc *enc, const uint8_t in[static 16],
     uint8_t out[static 16], uint32_t nrounds)
 {
-       uint64_t sk_exp[120];
-       __m128i q[4];
 
        fpu_kern_enter();
-
-       /* Expand round keys for bitslicing.  */
-       aes_sse2_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk64);
-
-       /* Load input block interleaved with garbage blocks.  */
-       q[0] = aes_sse2_interleave_in(_mm_loadu_epi8(in));
-       q[1] = q[2] = q[3] = _mm_setzero_si128();
-
-       /* Transform to bitslice, decrypt, transform from bitslice.  */
-       aes_sse2_ortho(q);
-       aes_sse2_bitslice_decrypt(nrounds, sk_exp, q);
-       aes_sse2_ortho(q);
-
-       /* Store output block.  */
-       _mm_storeu_epi8(out, aes_sse2_interleave_out(q[0]));
-
-       /* Paranoia: Zero temporary buffers.  */
-       explicit_memset(sk_exp, 0, sizeof sk_exp);
-       explicit_memset(q, 0, sizeof q);
-
+       aes_sse2_enc(enc, in, out, nrounds);
        fpu_kern_leave();
 }
 
 static void
-aes_sse2_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
-    uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
-    uint32_t nrounds)
+aes_sse2_dec_impl(const struct aesdec *dec, const uint8_t in[static 16],
+    uint8_t out[static 16], uint32_t nrounds)
 {
-       uint64_t sk_exp[120];
-       __m128i q[4];
-       __m128i cv;
-
-       KASSERT(nbytes % 16 == 0);
-
-       /* Skip if there's nothing to do.  */
-       if (nbytes == 0)
-               return;
 
        fpu_kern_enter();
-
-       /* Expand round keys for bitslicing.  */
-       aes_sse2_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
-
-       /* Load the IV.  */
-       cv = _mm_loadu_epi8(iv);
-
-       for (; nbytes; nbytes -= 16, in += 16, out += 16) {
-               /* Load input block and apply CV.  */
-               q[0] = aes_sse2_interleave_in(cv ^ _mm_loadu_epi8(in));
-
-               /* Transform to bitslice, encrypt, transform from bitslice.  */
-               aes_sse2_ortho(q);
-               aes_sse2_bitslice_encrypt(nrounds, sk_exp, q);
-               aes_sse2_ortho(q);
-
-               /* Remember ciphertext as CV and store output block.  */
-               cv = aes_sse2_interleave_out(q[0]);
-               _mm_storeu_epi8(out, cv);
-       }
-
-       /* Store updated IV.  */
-       _mm_storeu_epi8(iv, cv);
-
-       /* Paranoia: Zero temporary buffers.  */
-       explicit_memset(sk_exp, 0, sizeof sk_exp);
-       explicit_memset(q, 0, sizeof q);
-
+       aes_sse2_dec(dec, in, out, nrounds);
        fpu_kern_leave();
 }
 
 static void
-aes_sse2_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
-    uint8_t out[static 16], size_t nbytes, uint8_t ivp[static 16],
+aes_sse2_cbc_enc_impl(const struct aesenc *enc, const uint8_t in[static 16],
+    uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
     uint32_t nrounds)
 {
-       uint64_t sk_exp[120];
-       __m128i q[4];
-       __m128i cv, iv, w;
 
-       KASSERT(nbytes % 16 == 0);
-
-       /* Skip if there's nothing to do.  */
        if (nbytes == 0)
                return;
-
        fpu_kern_enter();
-
-       /* Expand round keys for bitslicing.  */
-       aes_sse2_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk64);
-
-       /* Load the IV.  */
-       iv = _mm_loadu_epi8(ivp);
-
-       /* Load the last cipher block.  */
-       cv = _mm_loadu_epi8(in + nbytes - 16);
-
-       /* Store the updated IV.  */
-       _mm_storeu_epi8(ivp, cv);
-
-       /* Process the last blocks if not an even multiple of four.  */
-       if (nbytes % (4*16)) {
-               unsigned n = (nbytes/16) % 4;
-
-               KASSERT(n > 0);
-               KASSERT(n < 4);
-
-               q[1] = q[2] = q[3] = _mm_setzero_si128();
-               q[n - 1] = aes_sse2_interleave_in(cv);
-               switch (nbytes % 64) {
-               case 48:
-                       w = _mm_loadu_epi8(in + nbytes - 32);
-                       q[1] = aes_sse2_interleave_in(w);
-                       /*FALLTHROUGH*/
-               case 32:
-                       w = _mm_loadu_epi8(in + nbytes - 48);
-                       q[0] = aes_sse2_interleave_in(w);
-                       /*FALLTHROUGH*/
-               case 16:
-                       break;
-               }
-
-               /* Decrypt.  */
-               aes_sse2_ortho(q);
-               aes_sse2_bitslice_decrypt(nrounds, sk_exp, q);
-               aes_sse2_ortho(q);
-
-               do {
-                       n--;
-                       w = aes_sse2_interleave_out(q[n]);
-                       if ((nbytes -= 16) == 0)
-                               goto out;
-                       cv = _mm_loadu_epi8(in + nbytes - 16);
-                       _mm_storeu_epi8(out + nbytes, w ^ cv);
-               } while (n);
-       }
-
-       for (;;) {



Home | Main Index | Thread Index | Old Index