Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64 regen...
details: https://anonhg.NetBSD.org/src/rev/c8637a099704
branches: trunk
changeset: 374730:c8637a099704
user: christos <christos%NetBSD.org@localhost>
date: Thu May 11 01:31:54 2023 +0000
description:
regen for 64 bit arm and make it link
diffstat:
crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/Makefile | 6 +-
crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/aes-gcm-armv8_64.S | 11581 ++++-----
crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/aes.inc | 4 +-
crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/aesv8-armx.S | 3891 ++-
crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/ghashv8-armx.S | 665 +-
crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/modes.inc | 4 +-
crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/whrlpool.inc | 5 +
7 files changed, 9275 insertions(+), 6881 deletions(-)
diffs (truncated from 16680 to 300 lines):
diff -r 2c6e26a401dc -r c8637a099704 crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/Makefile
--- a/crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/Makefile Thu May 11 00:32:48 2023 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/Makefile Thu May 11 01:31:54 2023 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.2 2018/03/07 16:05:44 christos Exp $
+# $NetBSD: Makefile,v 1.3 2023/05/11 01:31:54 christos Exp $
.include "bsd.own.mk"
@@ -8,9 +8,9 @@ CRYPTODIST=${NETBSDSRCDIR}/crypto
regen:
for i in $$(find ${OPENSSLSRC} -name \*arm\*.pl); do \
case $$i in \
- (*/charmap.pl|*/arm-xlate.pl|*/*v4*|*/*v7*);; \
+ (*/charmap.pl|*/arm-xlate.pl|*/*v4*);; \
(*) perl -I${OPENSSLSRC}/crypto/perlasm \
- -I${OPENSSLSRC}/crypto/bn/asm $$i linux /dev/stdout \
+ -I${OPENSSLSRC}/crypto/bn/asm $$i linux64 /dev/stdout \
> $$(basename $$i .pl).S;; \
esac; \
done
diff -r 2c6e26a401dc -r c8637a099704 crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/aes-gcm-armv8_64.S
--- a/crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/aes-gcm-armv8_64.S Thu May 11 00:32:48 2023 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/aes-gcm-armv8_64.S Thu May 11 01:31:54 2023 +0000
@@ -1,6024 +1,6015 @@
#include "arm_arch.h"
#if __ARM_MAX_ARCH__>=8
-.fpu neon
-#ifdef __thumb2__
-.syntax unified
-.thumb
-# define INST(a,b,c,d) c,0xef,a,b
-#else
-.code 32
-# define INST(a,b,c,d) a,b,c,0xf2
-#endif
-
+.arch armv8-a+crypto
.text
.globl aes_gcm_enc_128_kernel
.type aes_gcm_enc_128_kernel,%function
.align 4
aes_gcm_enc_128_kernel:
- cbz r1, .L128_enc_ret
- stp r19, r20, [sp, #-112]!
- mov r16, r4
- mov r8, r5
- stp r21, r22, [sp, #16]
- stp r23, r24, [sp, #32]
+ cbz x1, .L128_enc_ret
+ stp x19, x20, [sp, #-112]!
+ mov x16, x4
+ mov x8, x5
+ stp x21, x22, [sp, #16]
+ stp x23, x24, [sp, #32]
stp d8, d9, [sp, #48]
stp d10, d11, [sp, #64]
stp d12, d13, [sp, #80]
stp d14, d15, [sp, #96]
- ldp r10, r11, [r16] @ ctr96_b64, ctr96_t32
- ldp r13, r14, [r8, #160] @ load rk10
-
- ld1 {v11.16b}, [r3]
+ ldp x10, x11, [x16] //ctr96_b64, ctr96_t32
+ ldp x13, x14, [x8, #160] //load rk10
+
+ ld1 {v11.16b}, [x3]
ext v11.16b, v11.16b, v11.16b, #8
rev64 v11.16b, v11.16b
- lsr r5, r1, #3 @ byte_len
- mov r15, r5
-
- ldr q27, [r8, #144] @ load rk9
- add r4, r0, r1, lsr #3 @ end_input_ptr
- sub r5, r5, #1 @ byte_len - 1
-
- lsr r12, r11, #32
- ldr q15, [r3, #112] @ load h4l | h4h
+ lsr x5, x1, #3 //byte_len
+ mov x15, x5
+
+ ldr q27, [x8, #144] //load rk9
+ add x4, x0, x1, lsr #3 //end_input_ptr
+ sub x5, x5, #1 //byte_len - 1
+
+ lsr x12, x11, #32
+ ldr q15, [x3, #112] //load h4l | h4h
ext v15.16b, v15.16b, v15.16b, #8
- fmov d1, r10 @ CTR block 1
- rev r12, r12 @ rev_ctr32
-
- add r12, r12, #1 @ increment rev_ctr32
- orr r11, r11, r11
- ldr q18, [r8, #0] @ load rk0
-
- rev r9, r12 @ CTR block 1
- add r12, r12, #1 @ CTR block 1
- fmov d3, r10 @ CTR block 3
-
- orr r9, r11, r9, lsl #32 @ CTR block 1
- ld1 { q0}, [r16] @ special case vector load initial counter so we can start first AES block as quickly as possible
-
- fmov v1.d[1], r9 @ CTR block 1
- rev r9, r12 @ CTR block 2
-
- fmov d2, r10 @ CTR block 2
- orr r9, r11, r9, lsl #32 @ CTR block 2
- add r12, r12, #1 @ CTR block 2
-
- fmov v2.d[1], r9 @ CTR block 2
- rev r9, r12 @ CTR block 3
-
- orr r9, r11, r9, lsl #32 @ CTR block 3
- ldr q19, [r8, #16] @ load rk1
-
- add r12, r12, #1 @ CTR block 3
- fmov v3.d[1], r9 @ CTR block 3
-
- ldr q14, [r3, #80] @ load h3l | h3h
+ fmov d1, x10 //CTR block 1
+ rev w12, w12 //rev_ctr32
+
+ add w12, w12, #1 //increment rev_ctr32
+ orr w11, w11, w11
+ ldr q18, [x8, #0] //load rk0
+
+ rev w9, w12 //CTR block 1
+ add w12, w12, #1 //CTR block 1
+ fmov d3, x10 //CTR block 3
+
+ orr x9, x11, x9, lsl #32 //CTR block 1
+ ld1 { v0.16b}, [x16] //special case vector load initial counter so we can start first AES block as quickly as possible
+
+ fmov v1.d[1], x9 //CTR block 1
+ rev w9, w12 //CTR block 2
+
+ fmov d2, x10 //CTR block 2
+ orr x9, x11, x9, lsl #32 //CTR block 2
+ add w12, w12, #1 //CTR block 2
+
+ fmov v2.d[1], x9 //CTR block 2
+ rev w9, w12 //CTR block 3
+
+ orr x9, x11, x9, lsl #32 //CTR block 3
+ ldr q19, [x8, #16] //load rk1
+
+ add w12, w12, #1 //CTR block 3
+ fmov v3.d[1], x9 //CTR block 3
+
+ ldr q14, [x3, #80] //load h3l | h3h
ext v14.16b, v14.16b, v14.16b, #8
- aese q1, v18.16b
- aesmc q1, q1 @ AES block 1 - round 0
- ldr q20, [r8, #32] @ load rk2
-
- aese q2, v18.16b
- aesmc q2, q2 @ AES block 2 - round 0
- ldr q12, [r3, #32] @ load h1l | h1h
+ aese v1.16b, v18.16b
+ aesmc v1.16b, v1.16b //AES block 1 - round 0
+ ldr q20, [x8, #32] //load rk2
+
+ aese v2.16b, v18.16b
+ aesmc v2.16b, v2.16b //AES block 2 - round 0
+ ldr q12, [x3, #32] //load h1l | h1h
ext v12.16b, v12.16b, v12.16b, #8
- aese q0, v18.16b
- aesmc q0, q0 @ AES block 0 - round 0
- ldr q26, [r8, #128] @ load rk8
-
- aese q3, v18.16b
- aesmc q3, q3 @ AES block 3 - round 0
- ldr q21, [r8, #48] @ load rk3
-
- aese q2, v19.16b
- aesmc q2, q2 @ AES block 2 - round 1
- trn2 v17.2d, v14.2d, v15.2d @ h4l | h3l
-
- aese q0, v19.16b
- aesmc q0, q0 @ AES block 0 - round 1
- ldr q24, [r8, #96] @ load rk6
-
- aese q1, v19.16b
- aesmc q1, q1 @ AES block 1 - round 1
- ldr q25, [r8, #112] @ load rk7
-
- aese q3, v19.16b
- aesmc q3, q3 @ AES block 3 - round 1
- trn1 q9, v14.2d, v15.2d @ h4h | h3h
-
- aese q0, v20.16b
- aesmc q0, q0 @ AES block 0 - round 2
- ldr q23, [r8, #80] @ load rk5
-
- aese q1, v20.16b
- aesmc q1, q1 @ AES block 1 - round 2
- ldr q13, [r3, #64] @ load h2l | h2h
+ aese v0.16b, v18.16b
+ aesmc v0.16b, v0.16b //AES block 0 - round 0
+ ldr q26, [x8, #128] //load rk8
+
+ aese v3.16b, v18.16b
+ aesmc v3.16b, v3.16b //AES block 3 - round 0
+ ldr q21, [x8, #48] //load rk3
+
+ aese v2.16b, v19.16b
+ aesmc v2.16b, v2.16b //AES block 2 - round 1
+ trn2 v17.2d, v14.2d, v15.2d //h4l | h3l
+
+ aese v0.16b, v19.16b
+ aesmc v0.16b, v0.16b //AES block 0 - round 1
+ ldr q24, [x8, #96] //load rk6
+
+ aese v1.16b, v19.16b
+ aesmc v1.16b, v1.16b //AES block 1 - round 1
+ ldr q25, [x8, #112] //load rk7
+
+ aese v3.16b, v19.16b
+ aesmc v3.16b, v3.16b //AES block 3 - round 1
+ trn1 v9.2d, v14.2d, v15.2d //h4h | h3h
+
+ aese v0.16b, v20.16b
+ aesmc v0.16b, v0.16b //AES block 0 - round 2
+ ldr q23, [x8, #80] //load rk5
+
+ aese v1.16b, v20.16b
+ aesmc v1.16b, v1.16b //AES block 1 - round 2
+ ldr q13, [x3, #64] //load h2l | h2h
ext v13.16b, v13.16b, v13.16b, #8
- aese q3, v20.16b
- aesmc q3, q3 @ AES block 3 - round 2
-
- aese q2, v20.16b
- aesmc q2, q2 @ AES block 2 - round 2
- eor v17.16b, v17.16b, q9 @ h4k | h3k
-
- aese q0, v21.16b
- aesmc q0, q0 @ AES block 0 - round 3
-
- aese q1, v21.16b
- aesmc q1, q1 @ AES block 1 - round 3
-
- aese q2, v21.16b
- aesmc q2, q2 @ AES block 2 - round 3
- ldr q22, [r8, #64] @ load rk4
-
- aese q3, v21.16b
- aesmc q3, q3 @ AES block 3 - round 3
-
- and r5, r5, #0xffffffffffffffc0 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
- trn2 v16.2d, v12.2d, v13.2d @ h2l | h1l
-
- aese q3, v22.16b
- aesmc q3, q3 @ AES block 3 - round 4
- add r5, r5, r0
-
- aese q2, v22.16b
- aesmc q2, q2 @ AES block 2 - round 4
- cmp r0, r5 @ check if we have <= 4 blocks
-
- aese q0, v22.16b
- aesmc q0, q0 @ AES block 0 - round 4
-
- aese q3, v23.16b
- aesmc q3, q3 @ AES block 3 - round 5
-
- aese q2, v23.16b
- aesmc q2, q2 @ AES block 2 - round 5
-
- aese q0, v23.16b
- aesmc q0, q0 @ AES block 0 - round 5
-
- aese q3, v24.16b
- aesmc q3, q3 @ AES block 3 - round 6
-
- aese q1, v22.16b
- aesmc q1, q1 @ AES block 1 - round 4
-
- aese q2, v24.16b
- aesmc q2, q2 @ AES block 2 - round 6
- trn1 q8, v12.2d, v13.2d @ h2h | h1h
-
- aese q0, v24.16b
- aesmc q0, q0 @ AES block 0 - round 6
-
- aese q1, v23.16b
- aesmc q1, q1 @ AES block 1 - round 5
-
- aese q3, v25.16b
- aesmc q3, q3 @ AES block 3 - round 7
-
- aese q0, v25.16b
- aesmc q0, q0 @ AES block 0 - round 7
-
Home |
Main Index |
Thread Index |
Old Index