Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/crypto Align critical-path loops in AES and ChaCha.
details: https://anonhg.NetBSD.org/src/rev/a9d471e52719
branches: trunk
changeset: 974309:a9d471e52719
user: riastradh <riastradh%NetBSD.org@localhost>
date: Mon Jul 27 20:53:22 2020 +0000
description:
Align critical-path loops in AES and ChaCha.
diffstat:
sys/crypto/aes/arch/arm/aes_armv8_64.S | 18 +++++++++++++++++-
sys/crypto/aes/arch/arm/aes_neon_32.S | 4 +++-
sys/crypto/aes/arch/x86/aes_ni_64.S | 18 +++++++++++++++++-
sys/crypto/chacha/arch/arm/chacha_neon_64.S | 4 +++-
4 files changed, 40 insertions(+), 4 deletions(-)
diffs (truncated from 324 to 300 lines):
diff -r 972f4e85f879 -r a9d471e52719 sys/crypto/aes/arch/arm/aes_armv8_64.S
--- a/sys/crypto/aes/arch/arm/aes_armv8_64.S Mon Jul 27 20:52:10 2020 +0000
+++ b/sys/crypto/aes/arch/arm/aes_armv8_64.S Mon Jul 27 20:53:22 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: aes_armv8_64.S,v 1.8 2020/07/25 22:33:04 riastradh Exp $ */
+/* $NetBSD: aes_armv8_64.S,v 1.9 2020/07/27 20:53:22 riastradh Exp $ */
/*-
* Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -440,6 +440,7 @@
ENTRY(aesarmv8_enctodec)
ldr q0, [x0, x2, lsl #4] /* load last round key */
b 2f
+ _ALIGN_TEXT
1: aesimc v0.16b, v0.16b /* convert encryption to decryption */
2: str q0, [x1], #0x10 /* store round key */
subs x2, x2, #1 /* count down round */
@@ -503,6 +504,7 @@
mov x9, x0 /* x9 := enckey */
mov x10, x3 /* x10 := nbytes */
ldr q0, [x4] /* q0 := chaining value */
+ _ALIGN_TEXT
1: ldr q1, [x1], #0x10 /* q1 := plaintext block */
eor v0.16b, v0.16b, v1.16b /* q0 := cv ^ ptxt */
mov x0, x9 /* x0 := enckey */
@@ -539,6 +541,7 @@
ldr q0, [x1, #-0x10]! /* q0 := last ciphertext block */
str q0, [x4] /* update iv */
b 2f
+ _ALIGN_TEXT
1: ldr q31, [x1, #-0x10]! /* q31 := chaining value */
eor v0.16b, v0.16b, v31.16b /* q0 := plaintext block */
str q0, [x2, #-0x10]! /* store plaintext block */
@@ -576,6 +579,7 @@
ldp q6, q7, [x1, #-0x20]! /* q6, q7 := last ciphertext blocks */
str q7, [x4] /* update iv */
b 2f
+ _ALIGN_TEXT
1: ldp q6, q7, [x1, #-0x20]!
eor v0.16b, v0.16b, v7.16b /* q0 := pt0 */
stp q0, q1, [x2, #-0x20]!
@@ -629,6 +633,7 @@
mov x9, x0 /* x9 := enckey */
mov x10, x3 /* x10 := nbytes */
ldr q31, [x4] /* q31 := tweak */
+ _ALIGN_TEXT
1: ldr q0, [x1], #0x10 /* q0 := ptxt */
mov x0, x9 /* x0 := enckey */
mov x3, x5 /* x3 := nrounds */
@@ -661,6 +666,7 @@
mov x9, x0 /* x9 := enckey */
mov x10, x3 /* x10 := nbytes */
ldr q31, [x4] /* q31 := tweak */
+ _ALIGN_TEXT
1: mov v24.16b, v31.16b /* q24 := tweak[0] */
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
mov v25.16b, v31.16b /* q25 := tweak[1] */
@@ -729,6 +735,7 @@
mov x9, x0 /* x9 := deckey */
mov x10, x3 /* x10 := nbytes */
ldr q31, [x4] /* q31 := tweak */
+ _ALIGN_TEXT
1: ldr q0, [x1], #0x10 /* q0 := ctxt */
mov x0, x9 /* x0 := deckey */
mov x3, x5 /* x3 := nrounds */
@@ -761,6 +768,7 @@
mov x9, x0 /* x9 := deckey */
mov x10, x3 /* x10 := nbytes */
ldr q31, [x4] /* q31 := tweak */
+ _ALIGN_TEXT
1: mov v24.16b, v31.16b /* q24 := tweak[0] */
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
mov v25.16b, v31.16b /* q25 := tweak[1] */
@@ -879,6 +887,7 @@
ldr q0, [x3] /* q0 := initial authenticator */
mov x9, x0 /* x9 := enckey */
mov x5, x3 /* x5 := &auth (enc1 trashes x3) */
+ _ALIGN_TEXT
1: ldr q1, [x1], #0x10 /* q1 := plaintext block */
mov x0, x9 /* x0 := enckey */
mov x3, x4 /* x3 := nrounds */
@@ -913,6 +922,7 @@
#if _BYTE_ORDER == _LITTLE_ENDIAN
rev32 v2.16b, v2.16b /* q2 := ctr (host-endian) */
#endif
+ _ALIGN_TEXT
1: ldr q3, [x1], #0x10 /* q3 := plaintext block */
add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */
mov x0, x9 /* x0 := enckey */
@@ -972,6 +982,7 @@
bl aesarmv8_enc1 /* q0 := pad; trash x0/x3/q16 */
b 2f
+ _ALIGN_TEXT
1: /*
* Authenticate the last block and decrypt the next block
* simultaneously.
@@ -1031,6 +1042,7 @@
aesarmv8_enc1:
ldr q16, [x0], #0x10 /* load round key */
b 2f
+ _ALIGN_TEXT
1: /* q0 := MixColumns(q0) */
aesmc v0.16b, v0.16b
2: subs x3, x3, #1
@@ -1056,6 +1068,7 @@
aesarmv8_enc2:
ldr q16, [x0], #0x10 /* load round key */
b 2f
+ _ALIGN_TEXT
1: /* q[i] := MixColumns(q[i]) */
aesmc v0.16b, v0.16b
aesmc v1.16b, v1.16b
@@ -1085,6 +1098,7 @@
aesarmv8_enc8:
ldr q16, [x0], #0x10 /* load round key */
b 2f
+ _ALIGN_TEXT
1: /* q[i] := MixColumns(q[i]) */
aesmc v0.16b, v0.16b
aesmc v1.16b, v1.16b
@@ -1131,6 +1145,7 @@
aesarmv8_dec1:
ldr q16, [x0], #0x10 /* load round key */
b 2f
+ _ALIGN_TEXT
1: /* q0 := InMixColumns(q0) */
aesimc v0.16b, v0.16b
2: subs x3, x3, #1
@@ -1157,6 +1172,7 @@
aesarmv8_dec8:
ldr q16, [x0], #0x10 /* load round key */
b 2f
+ _ALIGN_TEXT
1: /* q[i] := InMixColumns(q[i]) */
aesimc v0.16b, v0.16b
aesimc v1.16b, v1.16b
diff -r 972f4e85f879 -r a9d471e52719 sys/crypto/aes/arch/arm/aes_neon_32.S
--- a/sys/crypto/aes/arch/arm/aes_neon_32.S Mon Jul 27 20:52:10 2020 +0000
+++ b/sys/crypto/aes/arch/arm/aes_neon_32.S Mon Jul 27 20:53:22 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: aes_neon_32.S,v 1.2 2020/07/27 20:52:10 riastradh Exp $ */
+/* $NetBSD: aes_neon_32.S,v 1.3 2020/07/27 20:53:22 riastradh Exp $ */
/*-
* Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -316,6 +316,7 @@
b 2f
+ _ALIGN_TEXT
1: vld1.64 {d28-d29}, [r0 :128]! /* q14 = *rk++ */
/* q0 := A = rk[i] + sb1_0(io) + sb1_1(jo) */
@@ -535,6 +536,7 @@
b 2f
+ _ALIGN_TEXT
1: /* load dsbd */
add r4, r12, #(dsbd_0 - .Lconstants)
vld1.64 {d16-d17}, [r4 :128]! /* q8 := dsbd[0] */
diff -r 972f4e85f879 -r a9d471e52719 sys/crypto/aes/arch/x86/aes_ni_64.S
--- a/sys/crypto/aes/arch/x86/aes_ni_64.S Mon Jul 27 20:52:10 2020 +0000
+++ b/sys/crypto/aes/arch/x86/aes_ni_64.S Mon Jul 27 20:53:22 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: aes_ni_64.S,v 1.4 2020/07/25 22:29:06 riastradh Exp $ */
+/* $NetBSD: aes_ni_64.S,v 1.5 2020/07/27 20:53:22 riastradh Exp $ */
/*-
* Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -523,6 +523,7 @@
movdqa (%rdi,%rdx),%xmm0 /* load last round key */
movdqa %xmm0,(%rsi) /* store last round key verbatim */
jmp 2f
+ _ALIGN_TEXT
1: movdqa (%rdi,%rdx),%xmm0 /* load round key */
aesimc %xmm0,%xmm0 /* convert encryption to decryption */
movdqa %xmm0,(%rsi) /* store round key */
@@ -580,6 +581,7 @@
jz 2f
mov %rcx,%r10 /* r10 := nbytes */
movdqu (%r8),%xmm0 /* xmm0 := chaining value */
+ _ALIGN_TEXT
1: movdqu (%rsi),%xmm1 /* xmm1 := plaintext block */
lea 0x10(%rsi),%rsi
pxor %xmm1,%xmm0 /* xmm0 := cv ^ ptxt */
@@ -615,6 +617,7 @@
movdqu -0x10(%rsi,%r10),%xmm0 /* xmm0 := last ciphertext block */
movdqu %xmm0,(%r8) /* update iv */
jmp 2f
+ _ALIGN_TEXT
1: movdqu -0x10(%rsi,%r10),%xmm8 /* xmm8 := chaining value */
pxor %xmm8,%xmm0 /* xmm0 := ptxt */
movdqu %xmm0,(%rdx,%r10) /* store plaintext block */
@@ -650,6 +653,7 @@
movdqu -0x10(%rsi,%r10),%xmm7 /* xmm7 := ciphertext block[n-1] */
movdqu %xmm7,(%r8) /* update iv */
jmp 2f
+ _ALIGN_TEXT
1: movdqu -0x10(%rsi,%r10),%xmm7 /* xmm7 := cv[0] */
pxor %xmm7,%xmm0 /* xmm0 := ptxt[0] */
movdqu %xmm0,(%rdx,%r10) /* store plaintext block */
@@ -706,6 +710,7 @@
ENTRY(aesni_xts_enc1)
mov %rcx,%r10 /* r10 := nbytes */
movdqu (%r8),%xmm15 /* xmm15 := tweak */
+ _ALIGN_TEXT
1: movdqu (%rsi),%xmm0 /* xmm0 := ptxt */
lea 0x10(%rsi),%rsi /* advance rdi to next block */
pxor %xmm15,%xmm0 /* xmm0 := ptxt ^ tweak */
@@ -738,6 +743,7 @@
sub $0x10,%rsp
mov %rcx,%r10 /* r10 := nbytes */
movdqu (%r8),%xmm15 /* xmm15 := tweak[0] */
+ _ALIGN_TEXT
1: movdqa %xmm15,%xmm8 /* xmm8 := tweak[0] */
call aesni_xts_mulx /* xmm15 := tweak[1] */
movdqa %xmm15,%xmm9 /* xmm9 := tweak[1] */
@@ -812,6 +818,7 @@
ENTRY(aesni_xts_dec1)
mov %rcx,%r10 /* r10 := nbytes */
movdqu (%r8),%xmm15 /* xmm15 := tweak */
+ _ALIGN_TEXT
1: movdqu (%rsi),%xmm0 /* xmm0 := ctxt */
lea 0x10(%rsi),%rsi /* advance rdi to next block */
pxor %xmm15,%xmm0 /* xmm0 := ctxt ^ tweak */
@@ -844,6 +851,7 @@
sub $0x10,%rsp
mov %rcx,%r10 /* r10 := nbytes */
movdqu (%r8),%xmm15 /* xmm15 := tweak[0] */
+ _ALIGN_TEXT
1: movdqa %xmm15,%xmm8 /* xmm8 := tweak[0] */
call aesni_xts_mulx /* xmm15 := tweak[1] */
movdqa %xmm15,%xmm9 /* xmm9 := tweak[1] */
@@ -964,6 +972,7 @@
movdqu (%rcx),%xmm0 /* xmm0 := auth */
mov %rdx,%r10 /* r10 := nbytes */
mov %rcx,%rdx /* rdx := &auth */
+ _ALIGN_TEXT
1: pxor (%rsi),%xmm0 /* xmm0 ^= plaintext block */
lea 0x10(%rsi),%rsi
mov %r8d,%ecx /* ecx := nrounds */
@@ -992,6 +1001,7 @@
movdqa ctr32_inc(%rip),%xmm5 /* xmm5 := (0,0,0,1) (le) */
movdqu (%r8),%xmm0 /* xmm0 := auth */
pshufb %xmm4,%xmm2 /* xmm2 := ctr (le) */
+ _ALIGN_TEXT
1: movdqu (%rsi),%xmm3 /* xmm3 := plaintext block */
paddd %xmm5,%xmm2 /* increment ctr (32-bit) */
lea 0x10(%rsi),%rsi
@@ -1040,6 +1050,7 @@
call aesni_enc1 /* xmm0 := pad; trash rax/rcx/xmm8 */
jmp 2f
+ _ALIGN_TEXT
1: /*
* Authenticate the last block and decrypt the next block
* simultaneously.
@@ -1103,6 +1114,7 @@
lea 0x10(%rdi,%rcx),%rax /* rax := end of round key array */
neg %rcx /* rcx := byte offset of round key from end */
jmp 2f
+ _ALIGN_TEXT
1: aesenc %xmm8,%xmm0
2: movdqa (%rax,%rcx),%xmm8 /* load round key */
add $0x10,%rcx
@@ -1130,6 +1142,7 @@
pxor %xmm8,%xmm0 /* xor in first round key */
pxor %xmm8,%xmm1
jmp 2f
+ _ALIGN_TEXT
1: aesenc %xmm8,%xmm0
aesenc %xmm8,%xmm1
2: movdqa (%rax,%rcx),%xmm8 /* load round key */
@@ -1165,6 +1178,7 @@
lea 0x10(%rdi,%rcx),%rax /* rax := end of round key array */
neg %rcx /* rcx := byte offset of round key from end */
jmp 2f
+ _ALIGN_TEXT
1: aesenc %xmm8,%xmm0
aesenc %xmm8,%xmm1
aesenc %xmm8,%xmm2
@@ -1204,6 +1218,7 @@
lea 0x10(%rdi,%rcx),%rax /* rax := pointer to round key */
neg %rcx /* rcx := byte offset of round key from end */
jmp 2f
+ _ALIGN_TEXT
1: aesdec %xmm8,%xmm0
2: movdqa (%rax,%rcx),%xmm8 /* load round key */
add $0x10,%rcx
@@ -1237,6 +1252,7 @@
lea 0x10(%rdi,%rcx),%rax /* rax := pointer to round key */
neg %rcx /* rcx := byte offset of round key from end */
jmp 2f
+ _ALIGN_TEXT
1: aesdec %xmm8,%xmm0
aesdec %xmm8,%xmm1
aesdec %xmm8,%xmm2
diff -r 972f4e85f879 -r a9d471e52719 sys/crypto/chacha/arch/arm/chacha_neon_64.S
Home |
Main Index |
Thread Index |
Old Index