Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/crypto/aes/arch/arm aes neon: Gather mc_forward/backward...
details: https://anonhg.NetBSD.org/src/rev/b9dac1212730
branches: trunk
changeset: 943770:b9dac1212730
user: riastradh <riastradh%NetBSD.org@localhost>
date: Thu Sep 10 11:31:03 2020 +0000
description:
aes neon: Gather mc_forward/backward so we can load 256 bits at once.
diffstat:
sys/crypto/aes/arch/arm/aes_neon_32.S | 64 +++++++++++++---------------------
1 files changed, 25 insertions(+), 39 deletions(-)
diffs (128 lines):
diff -r dfb0be56654f -r b9dac1212730 sys/crypto/aes/arch/arm/aes_neon_32.S
--- a/sys/crypto/aes/arch/arm/aes_neon_32.S Thu Sep 10 11:30:28 2020 +0000
+++ b/sys/crypto/aes/arch/arm/aes_neon_32.S Thu Sep 10 11:31:03 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $ */
+/* $NetBSD: aes_neon_32.S,v 1.11 2020/09/10 11:31:03 riastradh Exp $ */
/*-
* Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
#include <arm/asm.h>
-RCSID("$NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $")
+RCSID("$NetBSD: aes_neon_32.S,v 1.11 2020/09/10 11:31:03 riastradh Exp $")
.fpu neon
@@ -54,36 +54,26 @@
.byte 0x09,0x08,0x05,0x02,0x0C,0x0E,0x0D,0x03
END(inva)
- .type mc_forward,_ASM_TYPE_OBJECT
-mc_forward:
- .byte 0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04 /* 0 */
+ .type mc,_ASM_TYPE_OBJECT
+mc:
+ .byte 0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04 /* 0 forward */
.byte 0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C
-
- .byte 0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08 /* 1 */
+ .byte 0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06 /* 0 backward */
+ .byte 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E
+ .byte 0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08 /* 1 forward */
.byte 0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00
-
- .byte 0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C /* 2 */
+ .byte 0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02 /* 1 backward */
+ .byte 0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A
+ .byte 0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C /* 2 forward */
.byte 0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04
-
+ .byte 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E /* 2 backward */
+ .byte 0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06
.Lmc_forward_3:
- .byte 0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00 /* 3 */
+ .byte 0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00 /* 3 forward */
.byte 0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08
-END(mc_forward)
-
- .type mc_backward,_ASM_TYPE_OBJECT
-mc_backward:
- .byte 0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06 /* 0 */
- .byte 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E
-
- .byte 0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02 /* 1 */
- .byte 0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A
-
- .byte 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E /* 2 */
- .byte 0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06
-
- .byte 0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A /* 3 */
+ .byte 0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A /* 3 backward */
.byte 0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02
-END(mc_backward)
+END(mc)
.type sr,_ASM_TYPE_OBJECT
sr:
@@ -210,8 +200,7 @@
/*
* r3: rmod4
- * r4: mc_forward
- * r5: mc_backward
+ * r4: mc
* r6,r8,r10,ip: temporaries
* q0={d0-d1}: x/ak/A
* q1={d2-d3}: 0x0f0f...
@@ -225,8 +214,8 @@
* q9={d18-d19}: sb2[1]
* q10={d20-d21}: inv
* q11={d22-d23}: inva
- * q12={d24-d25}: ir/iak/iakr/sb1_0(io)/mc_backward[rmod4]
- * q13={d26-d27}: jr/jak/jakr/sb1_1(jo)/mc_forward[rmod4]
+ * q12={d24-d25}: ir/iak/iakr/sb1_0(io)/mc[rmod4].backward
+ * q13={d26-d27}: jr/jak/jakr/sb1_1(jo)/mc[rmod4].forward
* q14={d28-d29}: rk/A2/A2_B_D
* q15={d30-d31}: A2_B/sr[rmod4]
*/
@@ -254,9 +243,8 @@
vld1.8 {q8-q9}, [r6 :256] /* q8 = sb2[0], q9 = sb2[1] */
vld1.8 {q10-q11}, [r8 :256] /* q10 = inv, q11 = inva */
- /* (r4, r5) := (&mc_forward[0], &mc_backward[0]) */
- add r4, ip, #(mc_forward - .Lconstants)
- add r5, ip, #(mc_backward - .Lconstants)
+ /* r4 := mc */
+ add r4, ip, #(mc - .Lconstants)
/* (q2, q3) := (lo, hi) */
vshr.u8 q3, q0, #4
@@ -291,13 +279,11 @@
vtbl.8 d25, {q8}, d5
vtbl.8 d26, {q9}, d6
vtbl.8 d27, {q9}, d7
+ add r6, r4, r3, lsl #5 /* r6 := &mc[rmod4] */
veor q14, q12, q13
- /* (q12, q13) := (mc_forward[rmod4], mc_backward[rmod4]) */
- add r6, r4, r3, lsl #4
- add r8, r5, r3, lsl #4
- vld1.8 {q12}, [r6 :128]
- vld1.8 {q13}, [r8 :128]
+ /* (q12, q13) := (mc[rmod4].forward, mc[rmod4].backward) */
+ vld1.8 {q12-q13}, [r6 :256]
/* q15 := A2_B = A2 + A(mcf) */
vtbl.8 d30, {q0}, d24
@@ -474,7 +460,7 @@
add r8, ip, #(.Lmc_forward_3 - .Lconstants)
vld1.8 {q6-q7}, [r4 :256] /* q6 := dsbb[0], q7 := dsbb[1] */
vld1.8 {q10-q11}, [r6 :256] /* q10 := inv, q11 := inva */
- vld1.8 {q15}, [r8 :128] /* q15 := mc_forward[3] */
+ vld1.8 {q15}, [r8 :128] /* q15 := mc[3].forward */
/* (q2, q3) := (lo, hi) */
vshr.u8 q3, q0, #4
Home |
Main Index |
Thread Index |
Old Index