Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src-draft/trunk]: src/sys/crypto/aes/arch/x86 Split SSE2 logic into separate...
details: https://anonhg.NetBSD.org/src-all/rev/1f9f3f817515
branches: trunk
changeset: 935164:1f9f3f817515
user: Taylor R Campbell <riastradh%NetBSD.org@localhost>
date: Fri Jun 26 21:15:43 2020 +0000
description:
Split SSE2 logic into separate units.
Ensure that there are no paths into files compiled with -msse -msee2
at all except via fpu_kern_enter.
I didn't run into a practical problem with this, but let's not leave
a ticking time bomb for subsequent toolchain changes.
diffstat:
sys/crypto/aes/arch/x86/aes_sse2.c | 2 +-
sys/crypto/aes/arch/x86/aes_sse2_dec.c | 2 +-
sys/crypto/aes/arch/x86/aes_sse2_enc.c | 2 +-
sys/crypto/aes/arch/x86/aes_sse2_impl.c | 541 ++-------------------------
sys/crypto/aes/arch/x86/aes_sse2_impl.h | 47 --
sys/crypto/aes/arch/x86/aes_sse2_internal.h | 50 ++
sys/crypto/aes/arch/x86/aes_sse2_subr.c | 525 +++++++++++++++++++++++++++
sys/crypto/aes/arch/x86/aes_sse2_subr.h | 59 +++
sys/crypto/aes/arch/x86/files.aessse2 | 9 +-
9 files changed, 692 insertions(+), 545 deletions(-)
diffs (truncated from 1378 to 300 lines):
diff -r d81ebf8d7a16 -r 1f9f3f817515 sys/crypto/aes/arch/x86/aes_sse2.c
--- a/sys/crypto/aes/arch/x86/aes_sse2.c Sat Jun 20 02:02:41 2020 +0000
+++ b/sys/crypto/aes/arch/x86/aes_sse2.c Fri Jun 26 21:15:43 2020 +0000
@@ -29,7 +29,7 @@
#include <lib/libkern/libkern.h>
-#include "aes_sse2_impl.h"
+#include "aes_sse2_internal.h"
static void
br_range_dec32le(uint32_t *p32, size_t nwords, const void *v)
diff -r d81ebf8d7a16 -r 1f9f3f817515 sys/crypto/aes/arch/x86/aes_sse2_dec.c
--- a/sys/crypto/aes/arch/x86/aes_sse2_dec.c Sat Jun 20 02:02:41 2020 +0000
+++ b/sys/crypto/aes/arch/x86/aes_sse2_dec.c Fri Jun 26 21:15:43 2020 +0000
@@ -27,7 +27,7 @@
#include <sys/types.h>
-#include "aes_sse2_impl.h"
+#include "aes_sse2_internal.h"
/* see inner.h */
void
diff -r d81ebf8d7a16 -r 1f9f3f817515 sys/crypto/aes/arch/x86/aes_sse2_enc.c
--- a/sys/crypto/aes/arch/x86/aes_sse2_enc.c Sat Jun 20 02:02:41 2020 +0000
+++ b/sys/crypto/aes/arch/x86/aes_sse2_enc.c Fri Jun 26 21:15:43 2020 +0000
@@ -27,7 +27,7 @@
#include <sys/types.h>
-#include "aes_sse2_impl.h"
+#include "aes_sse2_internal.h"
static inline void
add_round_key(__m128i q[static 4], const uint64_t sk[static 8])
diff -r d81ebf8d7a16 -r 1f9f3f817515 sys/crypto/aes/arch/x86/aes_sse2_impl.c
--- a/sys/crypto/aes/arch/x86/aes_sse2_impl.c Sat Jun 20 02:02:41 2020 +0000
+++ b/sys/crypto/aes/arch/x86/aes_sse2_impl.c Fri Jun 26 21:15:43 2020 +0000
@@ -31,7 +31,6 @@
#include <sys/types.h>
#include <sys/endian.h>
-#include <sys/systm.h>
#include <crypto/aes/aes.h>
#include <crypto/aes/arch/x86/aes_sse2.h>
@@ -41,532 +40,101 @@
#include <x86/fpu.h>
#include <x86/specialreg.h>
-#include "aes_sse2_impl.h"
+#include "aes_sse2_subr.h"
static void
-aes_sse2_setkey(uint64_t rk[static 30], const void *key, uint32_t nrounds)
+aes_sse2_setenckey_impl(struct aesenc *enc, const uint8_t *key,
+ uint32_t nrounds)
{
- size_t key_len;
-
- switch (nrounds) {
- case 10:
- key_len = 16;
- break;
- case 12:
- key_len = 24;
- break;
- case 14:
- key_len = 32;
- break;
- default:
- panic("invalid AES nrounds: %u", nrounds);
- }
fpu_kern_enter();
- aes_sse2_keysched(rk, key, key_len);
+ aes_sse2_setkey(enc->aese_aes.aes_rk64, key, nrounds);
fpu_kern_leave();
}
static void
-aes_sse2_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
+aes_sse2_setdeckey_impl(struct aesdec *dec, const uint8_t *key,
+ uint32_t nrounds)
{
- aes_sse2_setkey(enc->aese_aes.aes_rk64, key, nrounds);
-}
-
-static void
-aes_sse2_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
-{
-
+ fpu_kern_enter();
/*
* BearSSL computes InvMixColumns on the fly -- no need for
* distinct decryption round keys.
*/
aes_sse2_setkey(dec->aesd_aes.aes_rk64, key, nrounds);
-}
-
-static void
-aes_sse2_enc(const struct aesenc *enc, const uint8_t in[static 16],
- uint8_t out[static 16], uint32_t nrounds)
-{
- uint64_t sk_exp[120];
- __m128i q[4];
-
- fpu_kern_enter();
-
- /* Expand round keys for bitslicing. */
- aes_sse2_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
-
- /* Load input block interleaved with garbage blocks. */
- q[0] = aes_sse2_interleave_in(_mm_loadu_epi8(in));
- q[1] = q[2] = q[3] = _mm_setzero_si128();
-
- /* Transform to bitslice, decrypt, transform from bitslice. */
- aes_sse2_ortho(q);
- aes_sse2_bitslice_encrypt(nrounds, sk_exp, q);
- aes_sse2_ortho(q);
-
- /* Store output block. */
- _mm_storeu_epi8(out, aes_sse2_interleave_out(q[0]));
-
- /* Paranoia: Zero temporary buffers. */
- explicit_memset(sk_exp, 0, sizeof sk_exp);
- explicit_memset(q, 0, sizeof q);
-
fpu_kern_leave();
}
static void
-aes_sse2_dec(const struct aesdec *dec, const uint8_t in[static 16],
+aes_sse2_enc_impl(const struct aesenc *enc, const uint8_t in[static 16],
uint8_t out[static 16], uint32_t nrounds)
{
- uint64_t sk_exp[120];
- __m128i q[4];
fpu_kern_enter();
-
- /* Expand round keys for bitslicing. */
- aes_sse2_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk64);
-
- /* Load input block interleaved with garbage blocks. */
- q[0] = aes_sse2_interleave_in(_mm_loadu_epi8(in));
- q[1] = q[2] = q[3] = _mm_setzero_si128();
-
- /* Transform to bitslice, decrypt, transform from bitslice. */
- aes_sse2_ortho(q);
- aes_sse2_bitslice_decrypt(nrounds, sk_exp, q);
- aes_sse2_ortho(q);
-
- /* Store output block. */
- _mm_storeu_epi8(out, aes_sse2_interleave_out(q[0]));
-
- /* Paranoia: Zero temporary buffers. */
- explicit_memset(sk_exp, 0, sizeof sk_exp);
- explicit_memset(q, 0, sizeof q);
-
+ aes_sse2_enc(enc, in, out, nrounds);
fpu_kern_leave();
}
static void
-aes_sse2_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
- uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
- uint32_t nrounds)
+aes_sse2_dec_impl(const struct aesdec *dec, const uint8_t in[static 16],
+ uint8_t out[static 16], uint32_t nrounds)
{
- uint64_t sk_exp[120];
- __m128i q[4];
- __m128i cv;
-
- KASSERT(nbytes % 16 == 0);
-
- /* Skip if there's nothing to do. */
- if (nbytes == 0)
- return;
fpu_kern_enter();
-
- /* Expand round keys for bitslicing. */
- aes_sse2_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
-
- /* Load the IV. */
- cv = _mm_loadu_epi8(iv);
-
- for (; nbytes; nbytes -= 16, in += 16, out += 16) {
- /* Load input block and apply CV. */
- q[0] = aes_sse2_interleave_in(cv ^ _mm_loadu_epi8(in));
-
- /* Transform to bitslice, encrypt, transform from bitslice. */
- aes_sse2_ortho(q);
- aes_sse2_bitslice_encrypt(nrounds, sk_exp, q);
- aes_sse2_ortho(q);
-
- /* Remember ciphertext as CV and store output block. */
- cv = aes_sse2_interleave_out(q[0]);
- _mm_storeu_epi8(out, cv);
- }
-
- /* Store updated IV. */
- _mm_storeu_epi8(iv, cv);
-
- /* Paranoia: Zero temporary buffers. */
- explicit_memset(sk_exp, 0, sizeof sk_exp);
- explicit_memset(q, 0, sizeof q);
-
+ aes_sse2_dec(dec, in, out, nrounds);
fpu_kern_leave();
}
static void
-aes_sse2_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
- uint8_t out[static 16], size_t nbytes, uint8_t ivp[static 16],
+aes_sse2_cbc_enc_impl(const struct aesenc *enc, const uint8_t in[static 16],
+ uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
uint32_t nrounds)
{
- uint64_t sk_exp[120];
- __m128i q[4];
- __m128i cv, iv, w;
- KASSERT(nbytes % 16 == 0);
-
- /* Skip if there's nothing to do. */
if (nbytes == 0)
return;
-
fpu_kern_enter();
-
- /* Expand round keys for bitslicing. */
- aes_sse2_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk64);
-
- /* Load the IV. */
- iv = _mm_loadu_epi8(ivp);
-
- /* Load the last cipher block. */
- cv = _mm_loadu_epi8(in + nbytes - 16);
-
- /* Store the updated IV. */
- _mm_storeu_epi8(ivp, cv);
-
- /* Process the last blocks if not an even multiple of four. */
- if (nbytes % (4*16)) {
- unsigned n = (nbytes/16) % 4;
-
- KASSERT(n > 0);
- KASSERT(n < 4);
-
- q[1] = q[2] = q[3] = _mm_setzero_si128();
- q[n - 1] = aes_sse2_interleave_in(cv);
- switch (nbytes % 64) {
- case 48:
- w = _mm_loadu_epi8(in + nbytes - 32);
- q[1] = aes_sse2_interleave_in(w);
- /*FALLTHROUGH*/
- case 32:
- w = _mm_loadu_epi8(in + nbytes - 48);
- q[0] = aes_sse2_interleave_in(w);
- /*FALLTHROUGH*/
- case 16:
- break;
- }
-
- /* Decrypt. */
- aes_sse2_ortho(q);
- aes_sse2_bitslice_decrypt(nrounds, sk_exp, q);
- aes_sse2_ortho(q);
-
- do {
- n--;
- w = aes_sse2_interleave_out(q[n]);
- if ((nbytes -= 16) == 0)
- goto out;
- cv = _mm_loadu_epi8(in + nbytes - 16);
- _mm_storeu_epi8(out + nbytes, w ^ cv);
- } while (n);
- }
-
- for (;;) {
- KASSERT(nbytes >= 64);
- nbytes -= 64;
-
- /*
- * 1. Set up upper cipher block from cv.
- * 2. Load lower cipher block into cv and set it up.
- * 3. Decrypt.
- */
- q[3] = aes_sse2_interleave_in(cv);
-
- w = _mm_loadu_epi8(in + nbytes + 4*8);
- q[2] = aes_sse2_interleave_in(w);
-
- w = _mm_loadu_epi8(in + nbytes + 4*4);
- q[1] = aes_sse2_interleave_in(w);
Home |
Main Index |
Thread Index |
Old Index