Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src-draft/trunk]: src/sys/crypto/aes/arch/arm [jak] aesarmv8: Adapt aes_armv...
details: https://anonhg.NetBSD.org/src-all/rev/9f70ead4319f
branches: trunk
changeset: 938477:9f70ead4319f
user: Taylor R Campbell <riastradh%NetBSD.org@localhost>
date: Tue Sep 08 21:37:12 2020 +0000
description:
[jak] aesarmv8: Adapt aes_armv8_64.S to big-endian.
diffstat:
sys/crypto/aes/arch/arm/aes_armv8_64.S | 161 +++++++++++++++++---------------
1 files changed, 85 insertions(+), 76 deletions(-)
diffs (truncated from 432 to 300 lines):
diff -r 52fcc6acd34b -r 9f70ead4319f sys/crypto/aes/arch/arm/aes_armv8_64.S
--- a/sys/crypto/aes/arch/arm/aes_armv8_64.S Tue Sep 08 22:43:21 2020 +0000
+++ b/sys/crypto/aes/arch/arm/aes_armv8_64.S Tue Sep 08 21:37:12 2020 +0000
@@ -114,11 +114,11 @@
* Standard ABI calling convention.
*/
ENTRY(aesarmv8_setenckey128)
- ldr q1, [x1] /* q1 := master key */
+ ld1 {v1.16b}, [x1] /* q1 := master key */
adrl x4, unshiftrows_rotword_3
eor v0.16b, v0.16b, v0.16b /* q0 := 0 */
- ldr q16, [x4] /* q16 := unshiftrows_rotword_3 table */
+ ld1 {v16.16b}, [x4] /* q16 := unshiftrows_rotword_3 table */
str q1, [x0], #0x10 /* store master key as first round key */
mov x2, #10 /* round count */
@@ -171,14 +171,14 @@
* Standard ABI calling convention.
*/
ENTRY(aesarmv8_setenckey192)
- ldr q1, [x1], #0x10 /* q1 := master key[0:128) */
- ldr d2, [x1] /* d2 := master key[128:192) */
+ ld1 {v1.16b}, [x1], #0x10 /* q1 := master key[0:128) */
+ ld1 {v2.8b}, [x1] /* d2 := master key[128:192) */
adrl x4, unshiftrows_rotword_1
adrl x5, unshiftrows_rotword_3
eor v0.16b, v0.16b, v0.16b /* q0 := 0 */
- ldr q16, [x4] /* q16 := unshiftrows_rotword_1 */
- ldr q17, [x5] /* q17 := unshiftrows_rotword_3 */
+ ld1 {v16.16b}, [x4] /* q16 := unshiftrows_rotword_1 */
+ ld1 {v17.16b}, [x5] /* q17 := unshiftrows_rotword_3 */
str q1, [x0], #0x10 /* store master key[0:128) as round key */
mov x2, #12 /* round count */
@@ -351,13 +351,13 @@
*/
ENTRY(aesarmv8_setenckey256)
/* q1 := key[0:128), q2 := key[128:256) */
- ldp q1, q2, [x1], #0x20
+ ld1 {v1.16b-v2.16b}, [x1], #0x20
adrl x4, unshiftrows_rotword_3
adrl x5, unshiftrows_3
eor v0.16b, v0.16b, v0.16b /* q0 := 0 */
- ldr q16, [x4] /* q16 := unshiftrows_rotword_3 */
- ldr q17, [x5] /* q17 := unshiftrows_3 */
+ ld1 {v16.16b}, [x4] /* q16 := unshiftrows_rotword_3 */
+ ld1 {v17.16b}, [x5] /* q17 := unshiftrows_3 */
/* store master key as first two round keys */
stp q1, q2, [x0], #0x20
@@ -461,9 +461,9 @@
ENTRY(aesarmv8_enc)
stp fp, lr, [sp, #-16]! /* push stack frame */
mov fp, sp
- ldr q0, [x1] /* q0 := ptxt */
+ ld1 {v0.16b}, [x1] /* q0 := ptxt */
bl aesarmv8_enc1 /* q0 := ctxt; trash x0/x3/q16 */
- str q0, [x2] /* store ctxt */
+ st1 {v0.16b}, [x2] /* store ctxt */
ldp fp, lr, [sp], #16 /* pop stack frame */
ret
END(aesarmv8_enc)
@@ -479,9 +479,9 @@
ENTRY(aesarmv8_dec)
stp fp, lr, [sp, #-16]! /* push stack frame */
mov fp, sp
- ldr q0, [x1] /* q0 := ctxt */
+ ld1 {v0.16b}, [x1] /* q0 := ctxt */
bl aesarmv8_dec1 /* q0 := ptxt; trash x0/x3/q16 */
- str q0, [x2] /* store ptxt */
+ st1 {v0.16b}, [x2] /* store ptxt */
ldp fp, lr, [sp], #16 /* pop stack frame */
ret
END(aesarmv8_dec)
@@ -503,17 +503,17 @@
mov fp, sp
mov x9, x0 /* x9 := enckey */
mov x10, x3 /* x10 := nbytes */
- ldr q0, [x4] /* q0 := chaining value */
+ ld1 {v0.16b}, [x4] /* q0 := chaining value */
_ALIGN_TEXT
-1: ldr q1, [x1], #0x10 /* q1 := plaintext block */
+1: ld1 {v1.16b}, [x1], #0x10 /* q1 := plaintext block */
eor v0.16b, v0.16b, v1.16b /* q0 := cv ^ ptxt */
mov x0, x9 /* x0 := enckey */
mov x3, x5 /* x3 := nrounds */
bl aesarmv8_enc1 /* q0 := ctxt; trash x0/x3/q16 */
subs x10, x10, #0x10 /* count down nbytes */
- str q0, [x2], #0x10 /* store ciphertext block */
+ st1 {v0.16b}, [x2], #0x10 /* store ciphertext block */
b.ne 1b /* repeat if x10 is nonzero */
- str q0, [x4] /* store chaining value */
+ st1 {v0.16b}, [x4] /* store chaining value */
ldp fp, lr, [sp], #16 /* pop stack frame */
2: ret
END(aesarmv8_cbc_enc)
@@ -533,13 +533,14 @@
ENTRY(aesarmv8_cbc_dec1)
stp fp, lr, [sp, #-16]! /* push stack frame */
mov fp, sp
- ldr q24, [x4] /* q24 := iv */
+ ld1 {v24.16b}, [x4] /* q24 := iv */
mov x9, x0 /* x9 := enckey */
mov x10, x3 /* x10 := nbytes */
add x1, x1, x3 /* x1 := pointer past end of in */
add x2, x2, x3 /* x2 := pointer past end of out */
- ldr q0, [x1, #-0x10]! /* q0 := last ciphertext block */
- str q0, [x4] /* update iv */
+ sub x1, x1, #0x10
+ ld1 {v0.16b}, [x1] /* q0 := last ciphertext block */
+ st1 {v0.16b}, [x4] /* update iv */
b 2f
_ALIGN_TEXT
1: ldr q31, [x1, #-0x10]! /* q31 := chaining value */
@@ -552,7 +553,8 @@
subs x10, x10, #0x10 /* count down nbytes */
b.ne 1b /* repeat if more blocks */
eor v0.16b, v0.16b, v24.16b /* q0 := first plaintext block */
- str q0, [x2, #-0x10]! /* store first plaintext block */
+ sub x2, x2, #0x10 /* store first plaintext block */
+ st1 {v0.16b}, [x2]
ldp fp, lr, [sp], #16 /* pop stack frame */
ret
END(aesarmv8_cbc_dec1)
@@ -571,21 +573,26 @@
ENTRY(aesarmv8_cbc_dec8)
stp fp, lr, [sp, #-16]! /* push stack frame */
mov fp, sp
- ldr q24, [x4] /* q24 := iv */
+ ld1 {v24.16b}, [x4] /* q24 := iv */
mov x9, x0 /* x9 := enckey */
mov x10, x3 /* x10 := nbytes */
add x1, x1, x3 /* x1 := pointer past end of in */
add x2, x2, x3 /* x2 := pointer past end of out */
- ldp q6, q7, [x1, #-0x20]! /* q6, q7 := last ciphertext blocks */
- str q7, [x4] /* update iv */
+ sub x1, x1, #0x20
+ ld1 {v6.16b, v7.16b}, [x1] /* q6, q7 := last ciphertext blocks */
+ st1 {v7.16b}, [x4] /* update iv */
b 2f
_ALIGN_TEXT
-1: ldp q6, q7, [x1, #-0x20]!
+1: sub x1, x1, #0x20
+ ld1 {v6.16b, v7.16b}, [x1]
eor v0.16b, v0.16b, v7.16b /* q0 := pt0 */
- stp q0, q1, [x2, #-0x20]!
-2: ldp q4, q5, [x1, #-0x20]!
- ldp q2, q3, [x1, #-0x20]!
- ldp q0, q1, [x1, #-0x20]!
+ sub x2, x2, #0x20
+ st1 {v0.16b, v1.16b}, [x2]
+2: sub x1, x1, #0x20
+ ld1 {v4.16b-v5.16b}, [x1]
+ sub x1, x1, #0x40
+ ld1 {v0.16b-v3.16b}, [x1]
+
mov v31.16b, v6.16b /* q[24+i] := cv[i], 0<i<8 */
mov v30.16b, v5.16b
mov v29.16b, v4.16b
@@ -605,12 +612,15 @@
eor v2.16b, v2.16b, v26.16b
eor v1.16b, v1.16b, v25.16b
subs x10, x10, #0x80 /* count down nbytes */
- stp q6, q7, [x2, #-0x20]! /* store plaintext blocks */
- stp q4, q5, [x2, #-0x20]!
- stp q2, q3, [x2, #-0x20]!
+ /* store plaintext blocks */
+ sub x2, x2, #0x20
+ st1 {v6.16b-v7.16b}, [x2]
+ sub x2, x2, #0x40
+ st1 {v2.16b-v5.16b}, [x2]
b.ne 1b /* repeat if there's more */
eor v0.16b, v0.16b, v24.16b /* q0 := pt0 */
- stp q0, q1, [x2, #-0x20]! /* store first two plaintext blocks */
+ sub x2, x2, #0x20
+ st1 {v0.16b, v1.16b}, [x2] /* store first two plaintext blocks */
ldp fp, lr, [sp], #16 /* pop stack frame */
ret
END(aesarmv8_cbc_dec8)
@@ -632,19 +642,19 @@
mov fp, sp
mov x9, x0 /* x9 := enckey */
mov x10, x3 /* x10 := nbytes */
- ldr q31, [x4] /* q31 := tweak */
+ ld1 {v31.16b}, [x4] /* q31 := tweak */
_ALIGN_TEXT
-1: ldr q0, [x1], #0x10 /* q0 := ptxt */
+1: ld1 {v0.16b}, [x1], #0x10 /* q0 := ptxt */
mov x0, x9 /* x0 := enckey */
mov x3, x5 /* x3 := nrounds */
eor v0.16b, v0.16b, v31.16b /* q0 := ptxt ^ tweak */
bl aesarmv8_enc1 /* q0 := AES(...); trash x0/x3/q16 */
eor v0.16b, v0.16b, v31.16b /* q0 := AES(ptxt ^ tweak) ^ tweak */
- str q0, [x2], #0x10 /* store ciphertext block */
+ st1 {v0.16b}, [x2], #0x10 /* store ciphertext block */
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
subs x10, x10, #0x10 /* count down nbytes */
b.ne 1b /* repeat if more blocks */
- str q31, [x4] /* update tweak */
+ st1 {v31.16b}, [x4] /* update tweak */
ldp fp, lr, [sp], #16 /* pop stack frame */
ret
END(aesarmv8_xts_enc1)
@@ -665,7 +675,7 @@
mov fp, sp
mov x9, x0 /* x9 := enckey */
mov x10, x3 /* x10 := nbytes */
- ldr q31, [x4] /* q31 := tweak */
+ ld1 {v31.16b}, [x4] /* q31 := tweak */
_ALIGN_TEXT
1: mov v24.16b, v31.16b /* q24 := tweak[0] */
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
@@ -682,10 +692,10 @@
mov v30.16b, v31.16b /* q30 := tweak[6] */
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
/* q31 := tweak[7] */
- ldp q0, q1, [x1], #0x20 /* q[i] := ptxt[i] */
- ldp q2, q3, [x1], #0x20
- ldp q4, q5, [x1], #0x20
- ldp q6, q7, [x1], #0x20
+ ld1 {v0.16b,v1.16b}, [x1], #0x20 /* q[i] := ptxt[i] */
+ ld1 {v2.16b,v3.16b}, [x1], #0x20
+ ld1 {v4.16b,v5.16b}, [x1], #0x20
+ ld1 {v6.16b,v7.16b}, [x1], #0x20
eor v0.16b, v0.16b, v24.16b /* q[i] := ptxt[i] ^ tweak[i] */
eor v1.16b, v1.16b, v25.16b
eor v2.16b, v2.16b, v26.16b
@@ -705,14 +715,14 @@
eor v5.16b, v5.16b, v29.16b
eor v6.16b, v6.16b, v30.16b
eor v7.16b, v7.16b, v31.16b
- stp q0, q1, [x2], #0x20 /* store ciphertext blocks */
- stp q2, q3, [x2], #0x20
- stp q4, q5, [x2], #0x20
- stp q6, q7, [x2], #0x20
+ st1 {v0.16b,v1.16b}, [x2], #0x20 /* store ciphertext blocks */
+ st1 {v2.16b,v3.16b}, [x2], #0x20
+ st1 {v4.16b,v5.16b}, [x2], #0x20
+ st1 {v6.16b,v7.16b}, [x2], #0x20
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
subs x10, x10, #0x80 /* count down nbytes */
b.ne 1b /* repeat if more block groups */
- str q31, [x4] /* update tweak */
+ st1 {v31.16b}, [x4] /* update tweak */
ldp fp, lr, [sp], #16 /* pop stack frame */
ret
END(aesarmv8_xts_enc8)
@@ -734,19 +744,19 @@
mov fp, sp
mov x9, x0 /* x9 := deckey */
mov x10, x3 /* x10 := nbytes */
- ldr q31, [x4] /* q31 := tweak */
+ ld1 {v31.16b}, [x4] /* q31 := tweak */
_ALIGN_TEXT
-1: ldr q0, [x1], #0x10 /* q0 := ctxt */
+1: ld1 {v0.16b}, [x1], #0x10 /* q0 := ctxt */
mov x0, x9 /* x0 := deckey */
mov x3, x5 /* x3 := nrounds */
eor v0.16b, v0.16b, v31.16b /* q0 := ctxt ^ tweak */
bl aesarmv8_dec1 /* q0 := AES(...); trash x0/x3/q16 */
eor v0.16b, v0.16b, v31.16b /* q0 := AES(ctxt ^ tweak) ^ tweak */
- str q0, [x2], #0x10 /* store plaintext block */
+ st1 {v0.16b}, [x2], #0x10 /* store plaintext block */
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
subs x10, x10, #0x10 /* count down nbytes */
b.ne 1b /* repeat if more blocks */
- str q31, [x4] /* update tweak */
+ st1 {v31.16b}, [x4] /* update tweak */
ldp fp, lr, [sp], #16 /* pop stack frame */
ret
END(aesarmv8_xts_dec1)
@@ -767,7 +777,7 @@
mov fp, sp
mov x9, x0 /* x9 := deckey */
mov x10, x3 /* x10 := nbytes */
- ldr q31, [x4] /* q31 := tweak */
+ ld1 {v31.16b}, [x4] /* q31 := tweak */
_ALIGN_TEXT
1: mov v24.16b, v31.16b /* q24 := tweak[0] */
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
@@ -784,10 +794,8 @@
mov v30.16b, v31.16b /* q30 := tweak[6] */
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
/* q31 := tweak[7] */
- ldp q0, q1, [x1], #0x20 /* q[i] := ctxt[i] */
- ldp q2, q3, [x1], #0x20
- ldp q4, q5, [x1], #0x20
- ldp q6, q7, [x1], #0x20
+ ld1 {v0.16b-v3.16b}, [x1], #0x40 /* q[i] := ctxt[i] */
+ ld1 {v4.16b-v7.16b}, [x1], #0x40
eor v0.16b, v0.16b, v24.16b /* q[i] := ctxt[i] ^ tweak[i] */
eor v1.16b, v1.16b, v25.16b
eor v2.16b, v2.16b, v26.16b
@@ -807,14 +815,12 @@
eor v5.16b, v5.16b, v29.16b
eor v6.16b, v6.16b, v30.16b
eor v7.16b, v7.16b, v31.16b
Home |
Main Index |
Thread Index |
Old Index