Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/crypto/aes/arch/x86 Split SSE2 logic into separate units.
details: https://anonhg.NetBSD.org/src/rev/2cac8193eea0
branches: trunk
changeset: 1011438:2cac8193eea0
user: riastradh <riastradh%NetBSD.org@localhost>
date: Mon Jun 29 23:50:05 2020 +0000
description:
Split SSE2 logic into separate units.
Ensure that there are no paths into files compiled with -msse -msse2
at all except via fpu_kern_enter.
I didn't run into a practical problem with this, but let's not leave
a ticking time bomb for subsequent toolchain changes in case the mere
declaration of local __m128i variables causes trouble.
diffstat:
sys/crypto/aes/arch/x86/aes_sse2.h | 27 +-
sys/crypto/aes/arch/x86/aes_sse2_impl.c | 545 ++-----------------------------
sys/crypto/aes/arch/x86/aes_sse2_impl.h | 4 +-
sys/crypto/aes/arch/x86/aes_sse2_subr.c | 526 ++++++++++++++++++++++++++++++
sys/crypto/aes/arch/x86/files.aessse2 | 11 +-
5 files changed, 612 insertions(+), 501 deletions(-)
diffs (truncated from 1245 to 300 lines):
diff -r 38d573a03b83 -r 2cac8193eea0 sys/crypto/aes/arch/x86/aes_sse2.h
--- a/sys/crypto/aes/arch/x86/aes_sse2.h Mon Jun 29 23:47:54 2020 +0000
+++ b/sys/crypto/aes/arch/x86/aes_sse2.h Mon Jun 29 23:50:05 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: aes_sse2.h,v 1.1 2020/06/29 23:47:54 riastradh Exp $ */
+/* $NetBSD: aes_sse2.h,v 1.2 2020/06/29 23:50:05 riastradh Exp $ */
/*-
* Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -31,6 +31,31 @@
#include <crypto/aes/aes.h>
+/*
+ * These functions MUST NOT use any vector registers for parameters or
+ * results -- the caller is compiled with -mno-sse &c. in the kernel,
+ * and dynamically turns on the vector unit just before calling them.
+ * Internal subroutines that use the vector unit for parameters are
+ * declared in aes_sse2_impl.h instead.
+ */
+
+void aes_sse2_setkey(uint64_t[static 30], const void *, uint32_t);
+
+void aes_sse2_enc(const struct aesenc *, const uint8_t in[static 16],
+ uint8_t[static 16], uint32_t);
+void aes_sse2_dec(const struct aesdec *, const uint8_t in[static 16],
+ uint8_t[static 16], uint32_t);
+void aes_sse2_cbc_enc(const struct aesenc *, const uint8_t[static 16],
+ uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
+void aes_sse2_cbc_dec(const struct aesdec *, const uint8_t[static 16],
+ uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
+void aes_sse2_xts_enc(const struct aesenc *, const uint8_t[static 16],
+ uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
+void aes_sse2_xts_dec(const struct aesdec *, const uint8_t[static 16],
+ uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
+
+int aes_sse2_selftest(void);
+
extern struct aes_impl aes_sse2_impl;
#endif /* _CRYPTO_AES_ARCH_X86_AES_SSE2_H */
diff -r 38d573a03b83 -r 2cac8193eea0 sys/crypto/aes/arch/x86/aes_sse2_impl.c
--- a/sys/crypto/aes/arch/x86/aes_sse2_impl.c Mon Jun 29 23:47:54 2020 +0000
+++ b/sys/crypto/aes/arch/x86/aes_sse2_impl.c Mon Jun 29 23:50:05 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: aes_sse2_impl.c,v 1.1 2020/06/29 23:47:54 riastradh Exp $ */
+/* $NetBSD: aes_sse2_impl.c,v 1.2 2020/06/29 23:50:05 riastradh Exp $ */
/*-
* Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,11 +27,10 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(1, "$NetBSD: aes_sse2_impl.c,v 1.1 2020/06/29 23:47:54 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_sse2_impl.c,v 1.2 2020/06/29 23:50:05 riastradh Exp $");
#include <sys/types.h>
#include <sys/endian.h>
-#include <sys/systm.h>
#include <crypto/aes/aes.h>
#include <crypto/aes/arch/x86/aes_sse2.h>
@@ -41,532 +40,99 @@
#include <x86/fpu.h>
#include <x86/specialreg.h>
-#include "aes_sse2_impl.h"
-
static void
-aes_sse2_setkey(uint64_t rk[static 30], const void *key, uint32_t nrounds)
+aes_sse2_setenckey_impl(struct aesenc *enc, const uint8_t *key,
+ uint32_t nrounds)
{
- size_t key_len;
-
- switch (nrounds) {
- case 10:
- key_len = 16;
- break;
- case 12:
- key_len = 24;
- break;
- case 14:
- key_len = 32;
- break;
- default:
- panic("invalid AES nrounds: %u", nrounds);
- }
fpu_kern_enter();
- aes_sse2_keysched(rk, key, key_len);
+ aes_sse2_setkey(enc->aese_aes.aes_rk64, key, nrounds);
fpu_kern_leave();
}
static void
-aes_sse2_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
+aes_sse2_setdeckey_impl(struct aesdec *dec, const uint8_t *key,
+ uint32_t nrounds)
{
- aes_sse2_setkey(enc->aese_aes.aes_rk64, key, nrounds);
-}
-
-static void
-aes_sse2_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
-{
-
+ fpu_kern_enter();
/*
* BearSSL computes InvMixColumns on the fly -- no need for
* distinct decryption round keys.
*/
aes_sse2_setkey(dec->aesd_aes.aes_rk64, key, nrounds);
-}
-
-static void
-aes_sse2_enc(const struct aesenc *enc, const uint8_t in[static 16],
- uint8_t out[static 16], uint32_t nrounds)
-{
- uint64_t sk_exp[120];
- __m128i q[4];
-
- fpu_kern_enter();
-
- /* Expand round keys for bitslicing. */
- aes_sse2_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
-
- /* Load input block interleaved with garbage blocks. */
- q[0] = aes_sse2_interleave_in(_mm_loadu_epi8(in));
- q[1] = q[2] = q[3] = _mm_setzero_si128();
-
- /* Transform to bitslice, decrypt, transform from bitslice. */
- aes_sse2_ortho(q);
- aes_sse2_bitslice_encrypt(nrounds, sk_exp, q);
- aes_sse2_ortho(q);
-
- /* Store output block. */
- _mm_storeu_epi8(out, aes_sse2_interleave_out(q[0]));
-
- /* Paranoia: Zero temporary buffers. */
- explicit_memset(sk_exp, 0, sizeof sk_exp);
- explicit_memset(q, 0, sizeof q);
-
fpu_kern_leave();
}
static void
-aes_sse2_dec(const struct aesdec *dec, const uint8_t in[static 16],
+aes_sse2_enc_impl(const struct aesenc *enc, const uint8_t in[static 16],
uint8_t out[static 16], uint32_t nrounds)
{
- uint64_t sk_exp[120];
- __m128i q[4];
fpu_kern_enter();
-
- /* Expand round keys for bitslicing. */
- aes_sse2_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk64);
-
- /* Load input block interleaved with garbage blocks. */
- q[0] = aes_sse2_interleave_in(_mm_loadu_epi8(in));
- q[1] = q[2] = q[3] = _mm_setzero_si128();
-
- /* Transform to bitslice, decrypt, transform from bitslice. */
- aes_sse2_ortho(q);
- aes_sse2_bitslice_decrypt(nrounds, sk_exp, q);
- aes_sse2_ortho(q);
-
- /* Store output block. */
- _mm_storeu_epi8(out, aes_sse2_interleave_out(q[0]));
-
- /* Paranoia: Zero temporary buffers. */
- explicit_memset(sk_exp, 0, sizeof sk_exp);
- explicit_memset(q, 0, sizeof q);
-
+ aes_sse2_enc(enc, in, out, nrounds);
fpu_kern_leave();
}
static void
-aes_sse2_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
- uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
- uint32_t nrounds)
+aes_sse2_dec_impl(const struct aesdec *dec, const uint8_t in[static 16],
+ uint8_t out[static 16], uint32_t nrounds)
{
- uint64_t sk_exp[120];
- __m128i q[4];
- __m128i cv;
-
- KASSERT(nbytes % 16 == 0);
-
- /* Skip if there's nothing to do. */
- if (nbytes == 0)
- return;
fpu_kern_enter();
-
- /* Expand round keys for bitslicing. */
- aes_sse2_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
-
- /* Load the IV. */
- cv = _mm_loadu_epi8(iv);
-
- for (; nbytes; nbytes -= 16, in += 16, out += 16) {
- /* Load input block and apply CV. */
- q[0] = aes_sse2_interleave_in(cv ^ _mm_loadu_epi8(in));
-
- /* Transform to bitslice, encrypt, transform from bitslice. */
- aes_sse2_ortho(q);
- aes_sse2_bitslice_encrypt(nrounds, sk_exp, q);
- aes_sse2_ortho(q);
-
- /* Remember ciphertext as CV and store output block. */
- cv = aes_sse2_interleave_out(q[0]);
- _mm_storeu_epi8(out, cv);
- }
-
- /* Store updated IV. */
- _mm_storeu_epi8(iv, cv);
-
- /* Paranoia: Zero temporary buffers. */
- explicit_memset(sk_exp, 0, sizeof sk_exp);
- explicit_memset(q, 0, sizeof q);
-
+ aes_sse2_dec(dec, in, out, nrounds);
fpu_kern_leave();
}
static void
-aes_sse2_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
- uint8_t out[static 16], size_t nbytes, uint8_t ivp[static 16],
+aes_sse2_cbc_enc_impl(const struct aesenc *enc, const uint8_t in[static 16],
+ uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
uint32_t nrounds)
{
- uint64_t sk_exp[120];
- __m128i q[4];
- __m128i cv, iv, w;
- KASSERT(nbytes % 16 == 0);
-
- /* Skip if there's nothing to do. */
if (nbytes == 0)
return;
-
fpu_kern_enter();
-
- /* Expand round keys for bitslicing. */
- aes_sse2_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk64);
-
- /* Load the IV. */
- iv = _mm_loadu_epi8(ivp);
-
- /* Load the last cipher block. */
- cv = _mm_loadu_epi8(in + nbytes - 16);
-
- /* Store the updated IV. */
- _mm_storeu_epi8(ivp, cv);
-
- /* Process the last blocks if not an even multiple of four. */
- if (nbytes % (4*16)) {
- unsigned n = (nbytes/16) % 4;
-
- KASSERT(n > 0);
- KASSERT(n < 4);
-
- q[1] = q[2] = q[3] = _mm_setzero_si128();
- q[n - 1] = aes_sse2_interleave_in(cv);
- switch (nbytes % 64) {
- case 48:
- w = _mm_loadu_epi8(in + nbytes - 32);
- q[1] = aes_sse2_interleave_in(w);
- /*FALLTHROUGH*/
- case 32:
- w = _mm_loadu_epi8(in + nbytes - 48);
- q[0] = aes_sse2_interleave_in(w);
- /*FALLTHROUGH*/
- case 16:
- break;
- }
-
- /* Decrypt. */
- aes_sse2_ortho(q);
- aes_sse2_bitslice_decrypt(nrounds, sk_exp, q);
- aes_sse2_ortho(q);
-
- do {
- n--;
- w = aes_sse2_interleave_out(q[n]);
- if ((nbytes -= 16) == 0)
- goto out;
- cv = _mm_loadu_epi8(in + nbytes - 16);
- _mm_storeu_epi8(out + nbytes, w ^ cv);
- } while (n);
- }
-
- for (;;) {
Home |
Main Index |
Thread Index |
Old Index