Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/crypto/aes/arch/arm aesarmv8: Adapt aes_armv8_64.S to bi...



details:   https://anonhg.NetBSD.org/src/rev/191c25d28752
branches:  trunk
changeset: 954716:191c25d28752
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Tue Sep 08 23:57:13 2020 +0000

description:
aesarmv8: Adapt aes_armv8_64.S to big-endian.

Patch mainly from (and tested by) jakllsch@ with minor tweaks by me.

diffstat:

 sys/crypto/aes/arch/arm/aes_armv8_64.S |  170 +++++++++++++++++---------------
 1 files changed, 90 insertions(+), 80 deletions(-)

diffs (truncated from 455 to 300 lines):

diff -r cbb0725e365f -r 191c25d28752 sys/crypto/aes/arch/arm/aes_armv8_64.S
--- a/sys/crypto/aes/arch/arm/aes_armv8_64.S    Tue Sep 08 22:48:24 2020 +0000
+++ b/sys/crypto/aes/arch/arm/aes_armv8_64.S    Tue Sep 08 23:57:13 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: aes_armv8_64.S,v 1.12 2020/08/08 14:47:01 riastradh Exp $      */
+/*     $NetBSD: aes_armv8_64.S,v 1.13 2020/09/08 23:57:13 riastradh Exp $      */
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include <aarch64/asm.h>
 
-RCSID("$NetBSD: aes_armv8_64.S,v 1.12 2020/08/08 14:47:01 riastradh Exp $")
+RCSID("$NetBSD: aes_armv8_64.S,v 1.13 2020/09/08 23:57:13 riastradh Exp $")
 
        .arch_extension aes
 
@@ -114,11 +114,11 @@
  *     Standard ABI calling convention.
  */
 ENTRY(aesarmv8_setenckey128)
-       ldr     q1, [x1]        /* q1 := master key */
+       ld1     {v1.16b}, [x1]  /* q1 := master key */
 
        adrl    x4, unshiftrows_rotword_3
        eor     v0.16b, v0.16b, v0.16b  /* q0 := 0 */
-       ldr     q16, [x4]       /* q16 := unshiftrows_rotword_3 table */
+       ld1     {v16.16b}, [x4] /* q16 := unshiftrows_rotword_3 table */
 
        str     q1, [x0], #0x10 /* store master key as first round key */
        mov     x2, #10         /* round count */
@@ -171,14 +171,14 @@
  *     Standard ABI calling convention.
  */
 ENTRY(aesarmv8_setenckey192)
-       ldr     q1, [x1], #0x10 /* q1 := master key[0:128) */
-       ldr     d2, [x1]        /* d2 := master key[128:192) */
+       ld1     {v1.16b}, [x1], #0x10   /* q1 := master key[0:128) */
+       ld1     {v2.8b}, [x1]   /* d2 := master key[128:192) */
 
        adrl    x4, unshiftrows_rotword_1
        adrl    x5, unshiftrows_rotword_3
        eor     v0.16b, v0.16b, v0.16b  /* q0 := 0 */
-       ldr     q16, [x4]       /* q16 := unshiftrows_rotword_1 */
-       ldr     q17, [x5]       /* q17 := unshiftrows_rotword_3 */
+       ld1     {v16.16b}, [x4] /* q16 := unshiftrows_rotword_1 */
+       ld1     {v17.16b}, [x5] /* q17 := unshiftrows_rotword_3 */
 
        str     q1, [x0], #0x10 /* store master key[0:128) as round key */
        mov     x2, #12         /* round count */
@@ -351,13 +351,13 @@
  */
 ENTRY(aesarmv8_setenckey256)
        /* q1 := key[0:128), q2 := key[128:256) */
-       ldp     q1, q2, [x1], #0x20
+       ld1     {v1.16b-v2.16b}, [x1], #0x20
 
        adrl    x4, unshiftrows_rotword_3
        adrl    x5, unshiftrows_3
        eor     v0.16b, v0.16b, v0.16b  /* q0 := 0 */
-       ldr     q16, [x4]       /* q16 := unshiftrows_rotword_3 */
-       ldr     q17, [x5]       /* q17 := unshiftrows_3 */
+       ld1     {v16.16b}, [x4] /* q16 := unshiftrows_rotword_3 */
+       ld1     {v17.16b}, [x5] /* q17 := unshiftrows_3 */
 
        /* store master key as first two round keys */
        stp     q1, q2, [x0], #0x20
@@ -461,9 +461,9 @@
 ENTRY(aesarmv8_enc)
        stp     fp, lr, [sp, #-16]!     /* push stack frame */
        mov     fp, sp
-       ldr     q0, [x1]        /* q0 := ptxt */
+       ld1     {v0.16b}, [x1]  /* q0 := ptxt */
        bl      aesarmv8_enc1   /* q0 := ctxt; trash x0/x3/q16 */
-       str     q0, [x2]        /* store ctxt */
+       st1     {v0.16b}, [x2]  /* store ctxt */
        ldp     fp, lr, [sp], #16       /* pop stack frame */
        ret
 END(aesarmv8_enc)
@@ -479,9 +479,9 @@
 ENTRY(aesarmv8_dec)
        stp     fp, lr, [sp, #-16]!     /* push stack frame */
        mov     fp, sp
-       ldr     q0, [x1]        /* q0 := ctxt */
+       ld1     {v0.16b}, [x1]  /* q0 := ctxt */
        bl      aesarmv8_dec1   /* q0 := ptxt; trash x0/x3/q16 */
-       str     q0, [x2]        /* store ptxt */
+       st1     {v0.16b}, [x2]  /* store ptxt */
        ldp     fp, lr, [sp], #16       /* pop stack frame */
        ret
 END(aesarmv8_dec)
@@ -503,17 +503,17 @@
        mov     fp, sp
        mov     x9, x0                  /* x9 := enckey */
        mov     x10, x3                 /* x10 := nbytes */
-       ldr     q0, [x4]                /* q0 := chaining value */
+       ld1     {v0.16b}, [x4]          /* q0 := chaining value */
        _ALIGN_TEXT
-1:     ldr     q1, [x1], #0x10         /* q1 := plaintext block */
+1:     ld1     {v1.16b}, [x1], #0x10   /* q1 := plaintext block */
        eor     v0.16b, v0.16b, v1.16b  /* q0 := cv ^ ptxt */
        mov     x0, x9                  /* x0 := enckey */
        mov     x3, x5                  /* x3 := nrounds */
        bl      aesarmv8_enc1           /* q0 := ctxt; trash x0/x3/q16 */
        subs    x10, x10, #0x10         /* count down nbytes */
-       str     q0, [x2], #0x10         /* store ciphertext block */
+       st1     {v0.16b}, [x2], #0x10   /* store ciphertext block */
        b.ne    1b                      /* repeat if x10 is nonzero */
-       str     q0, [x4]                /* store chaining value */
+       st1     {v0.16b}, [x4]          /* store chaining value */
        ldp     fp, lr, [sp], #16       /* pop stack frame */
 2:     ret
 END(aesarmv8_cbc_enc)
@@ -533,18 +533,21 @@
 ENTRY(aesarmv8_cbc_dec1)
        stp     fp, lr, [sp, #-16]!     /* push stack frame */
        mov     fp, sp
-       ldr     q24, [x4]               /* q24 := iv */
+       ld1     {v24.16b}, [x4]         /* q24 := iv */
        mov     x9, x0                  /* x9 := enckey */
        mov     x10, x3                 /* x10 := nbytes */
        add     x1, x1, x3              /* x1 := pointer past end of in */
        add     x2, x2, x3              /* x2 := pointer past end of out */
-       ldr     q0, [x1, #-0x10]!       /* q0 := last ciphertext block */
-       str     q0, [x4]                /* update iv */
+       sub     x1, x1, #0x10
+       ld1     {v0.16b}, [x1]          /* q0 := last ciphertext block */
+       st1     {v0.16b}, [x4]          /* update iv */
        b       2f
        _ALIGN_TEXT
-1:     ldr     q31, [x1, #-0x10]!      /* q31 := chaining value */
+1:     sub     x1, x1, #0x10
+       ld1     {v31.16b}, [x1]         /* q31 := chaining value */
+       sub     x2, x2, #0x10
        eor     v0.16b, v0.16b, v31.16b /* q0 := plaintext block */
-       str     q0, [x2, #-0x10]!       /* store plaintext block */
+       st1     {v0.16b}, [x2]          /* store plaintext block */
        mov     v0.16b, v31.16b         /* move cv = ciphertext block */
 2:     mov     x0, x9                  /* x0 := enckey */
        mov     x3, x5                  /* x3 := nrounds */
@@ -552,7 +555,8 @@
        subs    x10, x10, #0x10         /* count down nbytes */
        b.ne    1b                      /* repeat if more blocks */
        eor     v0.16b, v0.16b, v24.16b /* q0 := first plaintext block */
-       str     q0, [x2, #-0x10]!       /* store first plaintext block */
+       sub     x2, x2, #0x10           /* store first plaintext block */
+       st1     {v0.16b}, [x2]
        ldp     fp, lr, [sp], #16       /* pop stack frame */
        ret
 END(aesarmv8_cbc_dec1)
@@ -571,21 +575,26 @@
 ENTRY(aesarmv8_cbc_dec8)
        stp     fp, lr, [sp, #-16]!     /* push stack frame */
        mov     fp, sp
-       ldr     q24, [x4]               /* q24 := iv */
+       ld1     {v24.16b}, [x4]         /* q24 := iv */
        mov     x9, x0                  /* x9 := enckey */
        mov     x10, x3                 /* x10 := nbytes */
        add     x1, x1, x3              /* x1 := pointer past end of in */
        add     x2, x2, x3              /* x2 := pointer past end of out */
-       ldp     q6, q7, [x1, #-0x20]!   /* q6, q7 := last ciphertext blocks */
-       str     q7, [x4]                /* update iv */
+       sub     x1, x1, #0x20
+       ld1     {v6.16b, v7.16b}, [x1]  /* q6, q7 := last ciphertext blocks */
+       st1     {v7.16b}, [x4]          /* update iv */
        b       2f
        _ALIGN_TEXT
-1:     ldp     q6, q7, [x1, #-0x20]!
+1:     sub     x1, x1, #0x20
+       ld1     {v6.16b, v7.16b}, [x1]
        eor     v0.16b, v0.16b, v7.16b  /* q0 := pt0 */
-       stp     q0, q1, [x2, #-0x20]!
-2:     ldp     q4, q5, [x1, #-0x20]!
-       ldp     q2, q3, [x1, #-0x20]!
-       ldp     q0, q1, [x1, #-0x20]!
+       sub     x2, x2, #0x20
+       st1     {v0.16b, v1.16b}, [x2]
+2:     sub     x1, x1, #0x20
+       ld1     {v4.16b-v5.16b}, [x1]
+       sub     x1, x1, #0x40
+       ld1     {v0.16b-v3.16b}, [x1]
+
        mov     v31.16b, v6.16b         /* q[24+i] := cv[i], 0<i<8 */
        mov     v30.16b, v5.16b
        mov     v29.16b, v4.16b
@@ -605,12 +614,14 @@
        eor     v2.16b, v2.16b, v26.16b
        eor     v1.16b, v1.16b, v25.16b
        subs    x10, x10, #0x80         /* count down nbytes */
-       stp     q6, q7, [x2, #-0x20]!   /* store plaintext blocks */
-       stp     q4, q5, [x2, #-0x20]!
-       stp     q2, q3, [x2, #-0x20]!
+       sub     x2, x2, #0x20           /* store plaintext blocks */
+       st1     {v6.16b-v7.16b}, [x2]
+       sub     x2, x2, #0x40
+       st1     {v2.16b-v5.16b}, [x2]
        b.ne    1b                      /* repeat if there's more */
        eor     v0.16b, v0.16b, v24.16b /* q0 := pt0 */
-       stp     q0, q1, [x2, #-0x20]!   /* store first two plaintext blocks */
+       sub     x2, x2, #0x20
+       st1     {v0.16b, v1.16b}, [x2]  /* store first two plaintext blocks */
        ldp     fp, lr, [sp], #16       /* pop stack frame */
        ret
 END(aesarmv8_cbc_dec8)
@@ -632,19 +643,19 @@
        mov     fp, sp
        mov     x9, x0                  /* x9 := enckey */
        mov     x10, x3                 /* x10 := nbytes */
-       ldr     q31, [x4]               /* q31 := tweak */
+       ld1     {v31.16b}, [x4]         /* q31 := tweak */
        _ALIGN_TEXT
-1:     ldr     q0, [x1], #0x10         /* q0 := ptxt */
+1:     ld1     {v0.16b}, [x1], #0x10   /* q0 := ptxt */
        mov     x0, x9                  /* x0 := enckey */
        mov     x3, x5                  /* x3 := nrounds */
        eor     v0.16b, v0.16b, v31.16b /* q0 := ptxt ^ tweak */
        bl      aesarmv8_enc1           /* q0 := AES(...); trash x0/x3/q16 */
        eor     v0.16b, v0.16b, v31.16b /* q0 := AES(ptxt ^ tweak) ^ tweak */
-       str     q0, [x2], #0x10         /* store ciphertext block */
+       st1     {v0.16b}, [x2], #0x10   /* store ciphertext block */
        bl      aesarmv8_xts_mulx       /* q31 *= x; trash x0/q0/q1 */
        subs    x10, x10, #0x10         /* count down nbytes */
        b.ne    1b                      /* repeat if more blocks */
-       str     q31, [x4]               /* update tweak */
+       st1     {v31.16b}, [x4]         /* update tweak */
        ldp     fp, lr, [sp], #16       /* pop stack frame */
        ret
 END(aesarmv8_xts_enc1)
@@ -665,7 +676,7 @@
        mov     fp, sp
        mov     x9, x0                  /* x9 := enckey */
        mov     x10, x3                 /* x10 := nbytes */
-       ldr     q31, [x4]               /* q31 := tweak */
+       ld1     {v31.16b}, [x4]         /* q31 := tweak */
        _ALIGN_TEXT
 1:     mov     v24.16b, v31.16b        /* q24 := tweak[0] */
        bl      aesarmv8_xts_mulx       /* q31 *= x; trash x0/q0/q1 */
@@ -682,10 +693,10 @@
        mov     v30.16b, v31.16b        /* q30 := tweak[6] */
        bl      aesarmv8_xts_mulx       /* q31 *= x; trash x0/q0/q1 */
                                        /* q31 := tweak[7] */
-       ldp     q0, q1, [x1], #0x20     /* q[i] := ptxt[i] */
-       ldp     q2, q3, [x1], #0x20
-       ldp     q4, q5, [x1], #0x20
-       ldp     q6, q7, [x1], #0x20
+       ld1     {v0.16b,v1.16b}, [x1], #0x20    /* q[i] := ptxt[i] */
+       ld1     {v2.16b,v3.16b}, [x1], #0x20
+       ld1     {v4.16b,v5.16b}, [x1], #0x20
+       ld1     {v6.16b,v7.16b}, [x1], #0x20
        eor     v0.16b, v0.16b, v24.16b /* q[i] := ptxt[i] ^ tweak[i] */
        eor     v1.16b, v1.16b, v25.16b
        eor     v2.16b, v2.16b, v26.16b
@@ -705,14 +716,14 @@
        eor     v5.16b, v5.16b, v29.16b
        eor     v6.16b, v6.16b, v30.16b
        eor     v7.16b, v7.16b, v31.16b
-       stp     q0, q1, [x2], #0x20     /* store ciphertext blocks */
-       stp     q2, q3, [x2], #0x20
-       stp     q4, q5, [x2], #0x20
-       stp     q6, q7, [x2], #0x20
+       st1     {v0.16b,v1.16b}, [x2], #0x20    /* store ciphertext blocks */
+       st1     {v2.16b,v3.16b}, [x2], #0x20
+       st1     {v4.16b,v5.16b}, [x2], #0x20
+       st1     {v6.16b,v7.16b}, [x2], #0x20
        bl      aesarmv8_xts_mulx       /* q31 *= x; trash x0/q0/q1 */
        subs    x10, x10, #0x80         /* count down nbytes */
        b.ne    1b                      /* repeat if more block groups */
-       str     q31, [x4]               /* update tweak */
+       st1     {v31.16b}, [x4]         /* update tweak */
        ldp     fp, lr, [sp], #16       /* pop stack frame */
        ret
 END(aesarmv8_xts_enc8)
@@ -734,19 +745,19 @@
        mov     fp, sp
        mov     x9, x0                  /* x9 := deckey */
        mov     x10, x3                 /* x10 := nbytes */
-       ldr     q31, [x4]               /* q31 := tweak */
+       ld1     {v31.16b}, [x4]         /* q31 := tweak */
        _ALIGN_TEXT
-1:     ldr     q0, [x1], #0x10         /* q0 := ctxt */
+1:     ld1     {v0.16b}, [x1], #0x10   /* q0 := ctxt */
        mov     x0, x9                  /* x0 := deckey */
        mov     x3, x5                  /* x3 := nrounds */
        eor     v0.16b, v0.16b, v31.16b /* q0 := ctxt ^ tweak */
        bl      aesarmv8_dec1           /* q0 := AES(...); trash x0/x3/q16 */
        eor     v0.16b, v0.16b, v31.16b /* q0 := AES(ctxt ^ tweak) ^ tweak */
-       str     q0, [x2], #0x10         /* store plaintext block */
+       st1     {v0.16b}, [x2], #0x10   /* store plaintext block */
        bl      aesarmv8_xts_mulx       /* q31 *= x; trash x0/q0/q1 */
        subs    x10, x10, #0x10         /* count down nbytes */
        b.ne    1b                      /* repeat if more blocks */
-       str     q31, [x4]               /* update tweak */
+       st1     {v31.16b}, [x4]         /* update tweak */
        ldp     fp, lr, [sp], #16       /* pop stack frame */
        ret
 END(aesarmv8_xts_dec1)
@@ -767,7 +778,7 @@
        mov     fp, sp
        mov     x9, x0                  /* x9 := deckey */



Home | Main Index | Thread Index | Old Index