Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/crypto/aes/arch/arm Invert some loops to save a branch i...
details: https://anonhg.NetBSD.org/src/rev/10fe2a12fff8
branches: trunk
changeset: 936316:10fe2a12fff8
user: riastradh <riastradh%NetBSD.org@localhost>
date: Sat Jul 25 22:32:09 2020 +0000
description:
Invert some loops to save a branch instruction on every iteration.
diffstat:
sys/crypto/aes/arch/arm/aes_armv8_64.S | 118 ++++++++++++++++----------------
1 files changed, 59 insertions(+), 59 deletions(-)
diffs (206 lines):
diff -r b3d0a4ffb4a7 -r 10fe2a12fff8 sys/crypto/aes/arch/arm/aes_armv8_64.S
--- a/sys/crypto/aes/arch/arm/aes_armv8_64.S Sat Jul 25 22:31:32 2020 +0000
+++ b/sys/crypto/aes/arch/arm/aes_armv8_64.S Sat Jul 25 22:32:09 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: aes_armv8_64.S,v 1.6 2020/07/22 06:15:21 riastradh Exp $ */
+/* $NetBSD: aes_armv8_64.S,v 1.7 2020/07/25 22:32:09 riastradh Exp $ */
/*-
* Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -437,13 +437,13 @@
*/
ENTRY(aesarmv8_enctodec)
ldr q0, [x0, x2, lsl #4] /* load last round key */
-1: str q0, [x1], #0x10 /* store round key */
+ b 2f
+1: aesimc v0.16b, v0.16b /* convert encryption to decryption */
+2: str q0, [x1], #0x10 /* store round key */
subs x2, x2, #1 /* count down round */
ldr q0, [x0, x2, lsl #4] /* load previous round key */
- b.eq 2f /* stop if this is the last one */
- aesimc v0.16b, v0.16b /* convert encryption to decryption */
- b 1b
-2: str q0, [x1] /* store first round key verbatim */
+ b.ne 1b /* repeat if there's more */
+ str q0, [x1] /* store first round key verbatim */
ret
END(aesarmv8_enctodec)
@@ -536,17 +536,17 @@
add x2, x2, x3 /* x2 := pointer past end of out */
ldr q0, [x1, #-0x10]! /* q0 := last ciphertext block */
str q0, [x4] /* update iv */
-1: mov x0, x9 /* x0 := enckey */
+ b 2f
+1: ldr q31, [x1, #-0x10]! /* q31 := chaining value */
+ eor v0.16b, v0.16b, v31.16b /* q0 := plaintext block */
+ str q0, [x2, #-0x10]! /* store plaintext block */
+ mov v0.16b, v31.16b /* move cv = ciphertext block */
+2: mov x0, x9 /* x0 := enckey */
mov x3, x5 /* x3 := nrounds */
bl aesarmv8_dec1 /* q0 := cv ^ ptxt; trash x0/x3/q16 */
subs x10, x10, #0x10 /* count down nbytes */
- b.eq 2f /* stop if this is the first block */
- ldr q31, [x1, #-0x10]! /* q31 := chaining value */
- eor v0.16b, v0.16b, v31.16b /* q0 := plaintext block */
- str q0, [x2, #-0x10]! /* store plaintext block */
- mov v0.16b, v31.16b /* move cv = ciphertext block */
- b 1b
-2: eor v0.16b, v0.16b, v24.16b /* q0 := first plaintext block */
+ b.ne 1b /* repeat if more blocks */
+ eor v0.16b, v0.16b, v24.16b /* q0 := first plaintext block */
str q0, [x2, #-0x10]! /* store first plaintext block */
ldp fp, lr, [sp], #16 /* pop stack frame */
ret
@@ -573,7 +573,11 @@
add x2, x2, x3 /* x2 := pointer past end of out */
ldp q6, q7, [x1, #-0x20]! /* q6, q7 := last ciphertext blocks */
str q7, [x4] /* update iv */
-1: ldp q4, q5, [x1, #-0x20]!
+ b 2f
+1: ldp q6, q7, [x1, #-0x20]!
+ eor v0.16b, v0.16b, v7.16b /* q0 := pt0 */
+ stp q0, q1, [x2, #-0x20]!
+2: ldp q4, q5, [x1, #-0x20]!
ldp q2, q3, [x1, #-0x20]!
ldp q0, q1, [x1, #-0x20]!
mov v31.16b, v6.16b /* q[24+i] := cv[i], 0<i<8 */
@@ -598,12 +602,8 @@
stp q6, q7, [x2, #-0x20]! /* store plaintext blocks */
stp q4, q5, [x2, #-0x20]!
stp q2, q3, [x2, #-0x20]!
- b.eq 2f /* stop if this is the first block */
- ldp q6, q7, [x1, #-0x20]!
- eor v0.16b, v0.16b, v7.16b /* q0 := pt0 */
- stp q0, q1, [x2, #-0x20]!
- b 1b
-2: eor v0.16b, v0.16b, v24.16b /* q0 := pt0 */
+ b.ne 1b /* repeat if there's more */
+ eor v0.16b, v0.16b, v24.16b /* q0 := pt0 */
stp q0, q1, [x2, #-0x20]! /* store first two plaintext blocks */
ldp fp, lr, [sp], #16 /* pop stack frame */
ret
@@ -873,15 +873,15 @@
.type aesarmv8_enc1,@function
aesarmv8_enc1:
ldr q16, [x0], #0x10 /* load round key */
-1: subs x3, x3, #1
+ b 2f
+1: /* q0 := MixColumns(q0) */
+ aesmc v0.16b, v0.16b
+2: subs x3, x3, #1
/* q0 := ShiftRows(SubBytes(AddRoundKey_q16(q0))) */
aese v0.16b, v16.16b
ldr q16, [x0], #0x10 /* load next round key */
- b.eq 2f
- /* q0 := MixColumns(q0) */
- aesmc v0.16b, v0.16b
- b 1b
-2: eor v0.16b, v0.16b, v16.16b
+ b.ne 1b
+ eor v0.16b, v0.16b, v16.16b
ret
END(aesarmv8_enc1)
@@ -899,7 +899,17 @@
.type aesarmv8_enc8,@function
aesarmv8_enc8:
ldr q16, [x0], #0x10 /* load round key */
-1: subs x3, x3, #1
+ b 2f
+1: /* q[i] := MixColumns(q[i]) */
+ aesmc v0.16b, v0.16b
+ aesmc v1.16b, v1.16b
+ aesmc v2.16b, v2.16b
+ aesmc v3.16b, v3.16b
+ aesmc v4.16b, v4.16b
+ aesmc v5.16b, v5.16b
+ aesmc v6.16b, v6.16b
+ aesmc v7.16b, v7.16b
+2: subs x3, x3, #1
/* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */
aese v0.16b, v16.16b
aese v1.16b, v16.16b
@@ -910,18 +920,8 @@
aese v6.16b, v16.16b
aese v7.16b, v16.16b
ldr q16, [x0], #0x10 /* load next round key */
- b.eq 2f
- /* q[i] := MixColumns(q[i]) */
- aesmc v0.16b, v0.16b
- aesmc v1.16b, v1.16b
- aesmc v2.16b, v2.16b
- aesmc v3.16b, v3.16b
- aesmc v4.16b, v4.16b
- aesmc v5.16b, v5.16b
- aesmc v6.16b, v6.16b
- aesmc v7.16b, v7.16b
- b 1b
-2: eor v0.16b, v0.16b, v16.16b /* AddRoundKey */
+ b.ne 1b
+ eor v0.16b, v0.16b, v16.16b /* AddRoundKey */
eor v1.16b, v1.16b, v16.16b
eor v2.16b, v2.16b, v16.16b
eor v3.16b, v3.16b, v16.16b
@@ -945,15 +945,15 @@
.type aesarmv8_dec1,@function
aesarmv8_dec1:
ldr q16, [x0], #0x10 /* load round key */
-1: subs x3, x3, #1
+ b 2f
+1: /* q0 := InMixColumns(q0) */
+ aesimc v0.16b, v0.16b
+2: subs x3, x3, #1
/* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */
aesd v0.16b, v16.16b
ldr q16, [x0], #0x10 /* load next round key */
- b.eq 2f
- /* q0 := InMixColumns(q0) */
- aesimc v0.16b, v0.16b
- b 1b
-2: eor v0.16b, v0.16b, v16.16b
+ b.ne 1b
+ eor v0.16b, v0.16b, v16.16b
ret
END(aesarmv8_dec1)
@@ -971,7 +971,17 @@
.type aesarmv8_dec8,@function
aesarmv8_dec8:
ldr q16, [x0], #0x10 /* load round key */
-1: subs x3, x3, #1
+ b 2f
+1: /* q[i] := InMixColumns(q[i]) */
+ aesimc v0.16b, v0.16b
+ aesimc v1.16b, v1.16b
+ aesimc v2.16b, v2.16b
+ aesimc v3.16b, v3.16b
+ aesimc v4.16b, v4.16b
+ aesimc v5.16b, v5.16b
+ aesimc v6.16b, v6.16b
+ aesimc v7.16b, v7.16b
+2: subs x3, x3, #1
/* q[i] := InSubBytes(InShiftRows(AddRoundKey_q16(q[i]))) */
aesd v0.16b, v16.16b
aesd v1.16b, v16.16b
@@ -982,18 +992,8 @@
aesd v6.16b, v16.16b
aesd v7.16b, v16.16b
ldr q16, [x0], #0x10 /* load next round key */
- b.eq 2f
- /* q[i] := InMixColumns(q[i]) */
- aesimc v0.16b, v0.16b
- aesimc v1.16b, v1.16b
- aesimc v2.16b, v2.16b
- aesimc v3.16b, v3.16b
- aesimc v4.16b, v4.16b
- aesimc v5.16b, v5.16b
- aesimc v6.16b, v6.16b
- aesimc v7.16b, v7.16b
- b 1b
-2: eor v0.16b, v0.16b, v16.16b /* AddRoundKey */
+ b.ne 1b
+ eor v0.16b, v0.16b, v16.16b /* AddRoundKey */
eor v1.16b, v1.16b, v16.16b
eor v2.16b, v2.16b, v16.16b
eor v3.16b, v3.16b, v16.16b
Home |
Main Index |
Thread Index |
Old Index