Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64 regen...



details:   https://anonhg.NetBSD.org/src/rev/c8637a099704
branches:  trunk
changeset: 374730:c8637a099704
user:      christos <christos%NetBSD.org@localhost>
date:      Thu May 11 01:31:54 2023 +0000

description:
regen for 64 bit arm and make it link

diffstat:

 crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/Makefile           |      6 +-
 crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/aes-gcm-armv8_64.S |  11581 ++++-----
 crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/aes.inc            |      4 +-
 crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/aesv8-armx.S       |   3891 ++-
 crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/ghashv8-armx.S     |    665 +-
 crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/modes.inc          |      4 +-
 crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/whrlpool.inc       |      5 +
 7 files changed, 9275 insertions(+), 6881 deletions(-)

diffs (truncated from 16680 to 300 lines):

diff -r 2c6e26a401dc -r c8637a099704 crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/Makefile
--- a/crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/Makefile   Thu May 11 00:32:48 2023 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/Makefile   Thu May 11 01:31:54 2023 +0000
@@ -1,4 +1,4 @@
-#      $NetBSD: Makefile,v 1.2 2018/03/07 16:05:44 christos Exp $
+#      $NetBSD: Makefile,v 1.3 2023/05/11 01:31:54 christos Exp $
 
 .include "bsd.own.mk"
 
@@ -8,9 +8,9 @@ CRYPTODIST=${NETBSDSRCDIR}/crypto
 regen:
        for i in $$(find ${OPENSSLSRC} -name \*arm\*.pl); do \
                case $$i in \
-               (*/charmap.pl|*/arm-xlate.pl|*/*v4*|*/*v7*);; \
+               (*/charmap.pl|*/arm-xlate.pl|*/*v4*);; \
                (*) perl -I${OPENSSLSRC}/crypto/perlasm \
-               -I${OPENSSLSRC}/crypto/bn/asm $$i linux /dev/stdout \
+               -I${OPENSSLSRC}/crypto/bn/asm $$i linux64 /dev/stdout \
                > $$(basename $$i .pl).S;; \
                esac; \
        done
diff -r 2c6e26a401dc -r c8637a099704 crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/aes-gcm-armv8_64.S
--- a/crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/aes-gcm-armv8_64.S Thu May 11 00:32:48 2023 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/aes-gcm-armv8_64.S Thu May 11 01:31:54 2023 +0000
@@ -1,6024 +1,6015 @@
 #include "arm_arch.h"
 
 #if __ARM_MAX_ARCH__>=8
-.fpu   neon
-#ifdef __thumb2__
-.syntax        unified
-.thumb
-# define INST(a,b,c,d)   c,0xef,a,b
-#else
-.code  32
-# define INST(a,b,c,d)   a,b,c,0xf2
-#endif
-
+.arch  armv8-a+crypto
 .text
 .globl aes_gcm_enc_128_kernel
 .type  aes_gcm_enc_128_kernel,%function
 .align 4
 aes_gcm_enc_128_kernel:
-       cbz     r1, .L128_enc_ret
-       stp     r19, r20, [sp, #-112]!
-       mov     r16, r4
-       mov     r8, r5
-       stp     r21, r22, [sp, #16]
-       stp     r23, r24, [sp, #32]
+       cbz     x1, .L128_enc_ret
+       stp     x19, x20, [sp, #-112]!
+       mov     x16, x4
+       mov     x8, x5
+       stp     x21, x22, [sp, #16]
+       stp     x23, x24, [sp, #32]
        stp     d8, d9, [sp, #48]
        stp     d10, d11, [sp, #64]
        stp     d12, d13, [sp, #80]
        stp     d14, d15, [sp, #96]
 
-       ldp     r10, r11, [r16]              @ ctr96_b64, ctr96_t32
-       ldp     r13, r14, [r8, #160]                     @ load rk10
-
-       ld1     {v11.16b}, [r3]
+       ldp     x10, x11, [x16]              //ctr96_b64, ctr96_t32
+       ldp     x13, x14, [x8, #160]                     //load rk10
+
+       ld1     {v11.16b}, [x3]
        ext     v11.16b, v11.16b, v11.16b, #8
        rev64   v11.16b, v11.16b
-       lsr     r5, r1, #3              @ byte_len
-       mov     r15, r5
-
-       ldr     q27, [r8, #144]                                @ load rk9
-       add     r4, r0, r1, lsr #3   @ end_input_ptr
-       sub     r5, r5, #1      @ byte_len - 1
-
-       lsr     r12, r11, #32
-       ldr     q15, [r3, #112]                        @ load h4l | h4h
+       lsr     x5, x1, #3              //byte_len
+       mov     x15, x5
+
+       ldr     q27, [x8, #144]                                //load rk9
+       add     x4, x0, x1, lsr #3   //end_input_ptr
+       sub     x5, x5, #1      //byte_len - 1
+
+       lsr     x12, x11, #32
+       ldr     q15, [x3, #112]                        //load h4l | h4h
        ext     v15.16b, v15.16b, v15.16b, #8
 
-       fmov    d1, r10                               @ CTR block 1
-       rev     r12, r12                                @ rev_ctr32
-
-       add     r12, r12, #1                            @ increment rev_ctr32
-       orr     r11, r11, r11
-       ldr     q18, [r8, #0]                                  @ load rk0
-
-       rev     r9, r12                                 @ CTR block 1
-       add     r12, r12, #1                            @ CTR block 1
-       fmov    d3, r10                               @ CTR block 3
-
-       orr     r9, r11, r9, lsl #32            @ CTR block 1
-       ld1     { q0}, [r16]                             @ special case vector load initial counter so we can start first AES block as quickly as possible
-
-       fmov    v1.d[1], r9                               @ CTR block 1
-       rev     r9, r12                                 @ CTR block 2
-
-       fmov    d2, r10                               @ CTR block 2
-       orr     r9, r11, r9, lsl #32            @ CTR block 2
-       add     r12, r12, #1                            @ CTR block 2
-
-       fmov    v2.d[1], r9                               @ CTR block 2
-       rev     r9, r12                                 @ CTR block 3
-
-       orr     r9, r11, r9, lsl #32            @ CTR block 3
-       ldr     q19, [r8, #16]                                 @ load rk1
-
-       add     r12, r12, #1                            @ CTR block 3
-       fmov    v3.d[1], r9                               @ CTR block 3
-
-       ldr     q14, [r3, #80]                         @ load h3l | h3h
+       fmov    d1, x10                               //CTR block 1
+       rev     w12, w12                                //rev_ctr32
+
+       add     w12, w12, #1                            //increment rev_ctr32
+       orr     w11, w11, w11
+       ldr     q18, [x8, #0]                                  //load rk0
+
+       rev     w9, w12                                 //CTR block 1
+       add     w12, w12, #1                            //CTR block 1
+       fmov    d3, x10                               //CTR block 3
+
+       orr     x9, x11, x9, lsl #32            //CTR block 1
+       ld1     { v0.16b}, [x16]                             //special case vector load initial counter so we can start first AES block as quickly as possible
+
+       fmov    v1.d[1], x9                               //CTR block 1
+       rev     w9, w12                                 //CTR block 2
+
+       fmov    d2, x10                               //CTR block 2
+       orr     x9, x11, x9, lsl #32            //CTR block 2
+       add     w12, w12, #1                            //CTR block 2
+
+       fmov    v2.d[1], x9                               //CTR block 2
+       rev     w9, w12                                 //CTR block 3
+
+       orr     x9, x11, x9, lsl #32            //CTR block 3
+       ldr     q19, [x8, #16]                                 //load rk1
+
+       add     w12, w12, #1                            //CTR block 3
+       fmov    v3.d[1], x9                               //CTR block 3
+
+       ldr     q14, [x3, #80]                         //load h3l | h3h
        ext     v14.16b, v14.16b, v14.16b, #8
 
-       aese    q1, v18.16b
-       aesmc   q1, q1          @ AES block 1 - round 0
-       ldr     q20, [r8, #32]                                 @ load rk2
-
-       aese    q2, v18.16b
-       aesmc   q2, q2          @ AES block 2 - round 0
-       ldr     q12, [r3, #32]                         @ load h1l | h1h
+       aese    v1.16b, v18.16b
+       aesmc   v1.16b, v1.16b          //AES block 1 - round 0
+       ldr     q20, [x8, #32]                                 //load rk2
+
+       aese    v2.16b, v18.16b
+       aesmc   v2.16b, v2.16b          //AES block 2 - round 0
+       ldr     q12, [x3, #32]                         //load h1l | h1h
        ext     v12.16b, v12.16b, v12.16b, #8
 
-       aese    q0, v18.16b
-       aesmc   q0, q0          @ AES block 0 - round 0
-       ldr     q26, [r8, #128]                                @ load rk8
-
-       aese    q3, v18.16b
-       aesmc   q3, q3          @ AES block 3 - round 0
-       ldr     q21, [r8, #48]                                 @ load rk3
-
-       aese    q2, v19.16b
-       aesmc   q2, q2          @ AES block 2 - round 1
-       trn2    v17.2d,  v14.2d,    v15.2d                      @ h4l | h3l
-
-       aese    q0, v19.16b
-       aesmc   q0, q0          @ AES block 0 - round 1
-       ldr     q24, [r8, #96]                                 @ load rk6
-
-       aese    q1, v19.16b
-       aesmc   q1, q1          @ AES block 1 - round 1
-       ldr     q25, [r8, #112]                                @ load rk7
-
-       aese    q3, v19.16b
-       aesmc   q3, q3          @ AES block 3 - round 1
-       trn1    q9, v14.2d,    v15.2d                      @ h4h | h3h
-
-       aese    q0, v20.16b
-       aesmc   q0, q0          @ AES block 0 - round 2
-       ldr     q23, [r8, #80]                                 @ load rk5
-
-       aese    q1, v20.16b
-       aesmc   q1, q1          @ AES block 1 - round 2
-       ldr     q13, [r3, #64]                         @ load h2l | h2h
+       aese    v0.16b, v18.16b
+       aesmc   v0.16b, v0.16b          //AES block 0 - round 0
+       ldr     q26, [x8, #128]                                //load rk8
+
+       aese    v3.16b, v18.16b
+       aesmc   v3.16b, v3.16b          //AES block 3 - round 0
+       ldr     q21, [x8, #48]                                 //load rk3
+
+       aese    v2.16b, v19.16b
+       aesmc   v2.16b, v2.16b          //AES block 2 - round 1
+       trn2    v17.2d,  v14.2d,    v15.2d                      //h4l | h3l
+
+       aese    v0.16b, v19.16b
+       aesmc   v0.16b, v0.16b          //AES block 0 - round 1
+       ldr     q24, [x8, #96]                                 //load rk6
+
+       aese    v1.16b, v19.16b
+       aesmc   v1.16b, v1.16b          //AES block 1 - round 1
+       ldr     q25, [x8, #112]                                //load rk7
+
+       aese    v3.16b, v19.16b
+       aesmc   v3.16b, v3.16b          //AES block 3 - round 1
+       trn1    v9.2d, v14.2d,    v15.2d                      //h4h | h3h
+
+       aese    v0.16b, v20.16b
+       aesmc   v0.16b, v0.16b          //AES block 0 - round 2
+       ldr     q23, [x8, #80]                                 //load rk5
+
+       aese    v1.16b, v20.16b
+       aesmc   v1.16b, v1.16b          //AES block 1 - round 2
+       ldr     q13, [x3, #64]                         //load h2l | h2h
        ext     v13.16b, v13.16b, v13.16b, #8
 
-       aese    q3, v20.16b
-       aesmc   q3, q3          @ AES block 3 - round 2
-
-       aese    q2, v20.16b
-       aesmc   q2, q2          @ AES block 2 - round 2
-       eor     v17.16b, v17.16b, q9                  @ h4k | h3k
-
-       aese    q0, v21.16b
-       aesmc   q0, q0          @ AES block 0 - round 3
-
-       aese    q1, v21.16b
-       aesmc   q1, q1          @ AES block 1 - round 3
-
-       aese    q2, v21.16b
-       aesmc   q2, q2          @ AES block 2 - round 3
-       ldr     q22, [r8, #64]                                 @ load rk4
-
-       aese    q3, v21.16b
-       aesmc   q3, q3          @ AES block 3 - round 3
-
-       and     r5, r5, #0xffffffffffffffc0    @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
-       trn2    v16.2d,  v12.2d,    v13.2d                      @ h2l | h1l
-
-       aese    q3, v22.16b
-       aesmc   q3, q3          @ AES block 3 - round 4
-       add     r5, r5, r0
-
-       aese    q2, v22.16b
-       aesmc   q2, q2          @ AES block 2 - round 4
-       cmp     r0, r5                   @ check if we have <= 4 blocks
-
-       aese    q0, v22.16b
-       aesmc   q0, q0          @ AES block 0 - round 4
-
-       aese    q3, v23.16b
-       aesmc   q3, q3          @ AES block 3 - round 5
-
-       aese    q2, v23.16b
-       aesmc   q2, q2          @ AES block 2 - round 5
-
-       aese    q0, v23.16b
-       aesmc   q0, q0          @ AES block 0 - round 5
-
-       aese    q3, v24.16b
-       aesmc   q3, q3          @ AES block 3 - round 6
-
-       aese    q1, v22.16b
-       aesmc   q1, q1          @ AES block 1 - round 4
-
-       aese    q2, v24.16b
-       aesmc   q2, q2          @ AES block 2 - round 6
-       trn1    q8,    v12.2d,    v13.2d                      @ h2h | h1h
-
-       aese    q0, v24.16b
-       aesmc   q0, q0          @ AES block 0 - round 6
-
-       aese    q1, v23.16b
-       aesmc   q1, q1          @ AES block 1 - round 5
-
-       aese    q3, v25.16b
-       aesmc   q3, q3          @ AES block 3 - round 7
-
-       aese    q0, v25.16b
-       aesmc   q0, q0          @ AES block 0 - round 7
-



Home | Main Index | Thread Index | Old Index