Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/crypto/aes/arch/arm aes neon: Gather mc_forward/backward...



details:   https://anonhg.NetBSD.org/src/rev/6bbbc69eef21
branches:  trunk
changeset: 1014017:6bbbc69eef21
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Thu Sep 10 11:31:03 2020 +0000

description:
aes neon: Gather mc_forward/backward so we can load 256 bits at once.

diffstat:

 sys/crypto/aes/arch/arm/aes_neon_32.S |  64 +++++++++++++---------------------
 1 files changed, 25 insertions(+), 39 deletions(-)

diffs (128 lines):

diff -r e09eadcd3490 -r 6bbbc69eef21 sys/crypto/aes/arch/arm/aes_neon_32.S
--- a/sys/crypto/aes/arch/arm/aes_neon_32.S     Thu Sep 10 11:30:28 2020 +0000
+++ b/sys/crypto/aes/arch/arm/aes_neon_32.S     Thu Sep 10 11:31:03 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $       */
+/*     $NetBSD: aes_neon_32.S,v 1.11 2020/09/10 11:31:03 riastradh Exp $       */
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include <arm/asm.h>
 
-RCSID("$NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $")
+RCSID("$NetBSD: aes_neon_32.S,v 1.11 2020/09/10 11:31:03 riastradh Exp $")
 
        .fpu    neon
 
@@ -54,36 +54,26 @@
        .byte   0x09,0x08,0x05,0x02,0x0C,0x0E,0x0D,0x03
 END(inva)
 
-       .type   mc_forward,_ASM_TYPE_OBJECT
-mc_forward:
-       .byte   0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04 /* 0 */
+       .type   mc,_ASM_TYPE_OBJECT
+mc:
+       .byte   0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04 /* 0 forward */
        .byte   0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C
-
-       .byte   0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08 /* 1 */
+       .byte   0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06 /* 0 backward */
+       .byte   0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E
+       .byte   0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08 /* 1 forward */
        .byte   0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00
-
-       .byte   0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C /* 2 */
+       .byte   0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02 /* 1 backward */
+       .byte   0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A
+       .byte   0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C /* 2 forward */
        .byte   0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04
-
+       .byte   0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E /* 2 backward */
+       .byte   0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06
 .Lmc_forward_3:
-       .byte   0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00 /* 3 */
+       .byte   0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00 /* 3 forward */
        .byte   0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08
-END(mc_forward)
-
-       .type   mc_backward,_ASM_TYPE_OBJECT
-mc_backward:
-       .byte   0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06 /* 0 */
-       .byte   0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E
-
-       .byte   0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02 /* 1 */
-       .byte   0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A
-
-       .byte   0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E /* 2 */
-       .byte   0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06
-
-       .byte   0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A /* 3 */
+       .byte   0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A /* 3 backward */
        .byte   0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02
-END(mc_backward)
+END(mc)
 
        .type   sr,_ASM_TYPE_OBJECT
 sr:
@@ -210,8 +200,7 @@
 
        /*
         * r3: rmod4
-        * r4: mc_forward
-        * r5: mc_backward
+        * r4: mc
         * r6,r8,r10,ip: temporaries
         * q0={d0-d1}: x/ak/A
         * q1={d2-d3}: 0x0f0f...
@@ -225,8 +214,8 @@
         * q9={d18-d19}: sb2[1]
         * q10={d20-d21}: inv
         * q11={d22-d23}: inva
-        * q12={d24-d25}: ir/iak/iakr/sb1_0(io)/mc_backward[rmod4]
-        * q13={d26-d27}: jr/jak/jakr/sb1_1(jo)/mc_forward[rmod4]
+        * q12={d24-d25}: ir/iak/iakr/sb1_0(io)/mc[rmod4].backward
+        * q13={d26-d27}: jr/jak/jakr/sb1_1(jo)/mc[rmod4].forward
         * q14={d28-d29}: rk/A2/A2_B_D
         * q15={d30-d31}: A2_B/sr[rmod4]
         */
@@ -254,9 +243,8 @@
        vld1.8  {q8-q9}, [r6 :256]      /* q8 = sb2[0], q9 = sb2[1] */
        vld1.8  {q10-q11}, [r8 :256]    /* q10 = inv, q11 = inva */
 
-       /* (r4, r5) := (&mc_forward[0], &mc_backward[0]) */
-       add     r4, ip, #(mc_forward - .Lconstants)
-       add     r5, ip, #(mc_backward - .Lconstants)
+       /* r4 := mc */
+       add     r4, ip, #(mc - .Lconstants)
 
        /* (q2, q3) := (lo, hi) */
        vshr.u8 q3, q0, #4
@@ -291,13 +279,11 @@
        vtbl.8  d25, {q8}, d5
        vtbl.8  d26, {q9}, d6
        vtbl.8  d27, {q9}, d7
+       add     r6, r4, r3, lsl #5      /* r6 := &mc[rmod4] */
        veor    q14, q12, q13
 
-       /* (q12, q13) := (mc_forward[rmod4], mc_backward[rmod4]) */
-       add     r6, r4, r3, lsl #4
-       add     r8, r5, r3, lsl #4
-       vld1.8  {q12}, [r6 :128]
-       vld1.8  {q13}, [r8 :128]
+       /* (q12, q13) := (mc[rmod4].forward, mc[rmod4].backward) */
+       vld1.8  {q12-q13}, [r6 :256]
 
        /* q15 := A2_B = A2 + A(mcf) */
        vtbl.8  d30, {q0}, d24
@@ -474,7 +460,7 @@
        add     r8, ip, #(.Lmc_forward_3 - .Lconstants)
        vld1.8  {q6-q7}, [r4 :256]      /* q6 := dsbb[0], q7 := dsbb[1] */
        vld1.8  {q10-q11}, [r6 :256]    /* q10 := inv, q11 := inva */
-       vld1.8  {q15}, [r8 :128]        /* q15 := mc_forward[3] */
+       vld1.8  {q15}, [r8 :128]        /* q15 := mc[3].forward */
 
        /* (q2, q3) := (lo, hi) */
        vshr.u8 q3, q0, #4



Home | Main Index | Thread Index | Old Index