Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/crypto/external/bsd/openssl/lib/libcrypto/arch Add new files
details: https://anonhg.NetBSD.org/src/rev/d69becfba5c3
branches: trunk
changeset: 374665:d69becfba5c3
user: christos <christos%NetBSD.org@localhost>
date: Tue May 09 17:22:43 2023 +0000
description:
Add new files
diffstat:
crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/aes-gcm-armv8_64.S | 6026 +++++++++
crypto/external/bsd/openssl/lib/libcrypto/arch/arm/aes-gcm-armv8_64.S | 6027 ++++++++++
crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc/ecp_nistp521-ppc64.S | 347 +
crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc64/ecp_nistp521-ppc64.S | 357 +
4 files changed, 12757 insertions(+), 0 deletions(-)
diffs (truncated from 12773 to 300 lines):
diff -r 333f2fdd3ef1 -r d69becfba5c3 crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/aes-gcm-armv8_64.S
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/aarch64/aes-gcm-armv8_64.S Tue May 09 17:22:43 2023 +0000
@@ -0,0 +1,6026 @@
+#include "arm_arch.h"
+
+#if __ARM_MAX_ARCH__>=8
+.fpu neon
+#ifdef __thumb2__
+.syntax unified
+.thumb
+# define INST(a,b,c,d) c,0xef,a,b
+#else
+.code 32
+# define INST(a,b,c,d) a,b,c,0xf2
+#endif
+
+.text
+.globl aes_gcm_enc_128_kernel
+.type aes_gcm_enc_128_kernel,%function
+.align 4
+aes_gcm_enc_128_kernel:
+ cbz r1, .L128_enc_ret
+ stp r19, r20, [sp, #-112]!
+ mov r16, r4
+ mov r8, r5
+ stp r21, r22, [sp, #16]
+ stp r23, r24, [sp, #32]
+ stp d8, d9, [sp, #48]
+ stp d10, d11, [sp, #64]
+ stp d12, d13, [sp, #80]
+ stp d14, d15, [sp, #96]
+
+ ldp r10, r11, [r16] @ ctr96_b64, ctr96_t32
+ ldp r13, r14, [r8, #160] @ load rk10
+
+ ld1 {v11.16b}, [r3]
+ ext v11.16b, v11.16b, v11.16b, #8
+ rev64 v11.16b, v11.16b
+ lsr r5, r1, #3 @ byte_len
+ mov r15, r5
+
+ ldr q27, [r8, #144] @ load rk9
+ add r4, r0, r1, lsr #3 @ end_input_ptr
+ sub r5, r5, #1 @ byte_len - 1
+
+ lsr r12, r11, #32
+ ldr q15, [r3, #112] @ load h4l | h4h
+ ext v15.16b, v15.16b, v15.16b, #8
+
+ fmov d1, r10 @ CTR block 1
+ rev r12, r12 @ rev_ctr32
+
+ add r12, r12, #1 @ increment rev_ctr32
+ orr r11, r11, r11
+ ldr q18, [r8, #0] @ load rk0
+
+ rev r9, r12 @ CTR block 1
+ add r12, r12, #1 @ CTR block 1
+ fmov d3, r10 @ CTR block 3
+
+ orr r9, r11, r9, lsl #32 @ CTR block 1
+ ld1 { q0}, [r16] @ special case vector load initial counter so we can start first AES block as quickly as possible
+
+ fmov v1.d[1], r9 @ CTR block 1
+ rev r9, r12 @ CTR block 2
+
+ fmov d2, r10 @ CTR block 2
+ orr r9, r11, r9, lsl #32 @ CTR block 2
+ add r12, r12, #1 @ CTR block 2
+
+ fmov v2.d[1], r9 @ CTR block 2
+ rev r9, r12 @ CTR block 3
+
+ orr r9, r11, r9, lsl #32 @ CTR block 3
+ ldr q19, [r8, #16] @ load rk1
+
+ add r12, r12, #1 @ CTR block 3
+ fmov v3.d[1], r9 @ CTR block 3
+
+ ldr q14, [r3, #80] @ load h3l | h3h
+ ext v14.16b, v14.16b, v14.16b, #8
+
+ aese q1, v18.16b
+ aesmc q1, q1 @ AES block 1 - round 0
+ ldr q20, [r8, #32] @ load rk2
+
+ aese q2, v18.16b
+ aesmc q2, q2 @ AES block 2 - round 0
+ ldr q12, [r3, #32] @ load h1l | h1h
+ ext v12.16b, v12.16b, v12.16b, #8
+
+ aese q0, v18.16b
+ aesmc q0, q0 @ AES block 0 - round 0
+ ldr q26, [r8, #128] @ load rk8
+
+ aese q3, v18.16b
+ aesmc q3, q3 @ AES block 3 - round 0
+ ldr q21, [r8, #48] @ load rk3
+
+ aese q2, v19.16b
+ aesmc q2, q2 @ AES block 2 - round 1
+ trn2 v17.2d, v14.2d, v15.2d @ h4l | h3l
+
+ aese q0, v19.16b
+ aesmc q0, q0 @ AES block 0 - round 1
+ ldr q24, [r8, #96] @ load rk6
+
+ aese q1, v19.16b
+ aesmc q1, q1 @ AES block 1 - round 1
+ ldr q25, [r8, #112] @ load rk7
+
+ aese q3, v19.16b
+ aesmc q3, q3 @ AES block 3 - round 1
+ trn1 q9, v14.2d, v15.2d @ h4h | h3h
+
+ aese q0, v20.16b
+ aesmc q0, q0 @ AES block 0 - round 2
+ ldr q23, [r8, #80] @ load rk5
+
+ aese q1, v20.16b
+ aesmc q1, q1 @ AES block 1 - round 2
+ ldr q13, [r3, #64] @ load h2l | h2h
+ ext v13.16b, v13.16b, v13.16b, #8
+
+ aese q3, v20.16b
+ aesmc q3, q3 @ AES block 3 - round 2
+
+ aese q2, v20.16b
+ aesmc q2, q2 @ AES block 2 - round 2
+ eor v17.16b, v17.16b, q9 @ h4k | h3k
+
+ aese q0, v21.16b
+ aesmc q0, q0 @ AES block 0 - round 3
+
+ aese q1, v21.16b
+ aesmc q1, q1 @ AES block 1 - round 3
+
+ aese q2, v21.16b
+ aesmc q2, q2 @ AES block 2 - round 3
+ ldr q22, [r8, #64] @ load rk4
+
+ aese q3, v21.16b
+ aesmc q3, q3 @ AES block 3 - round 3
+
+ and r5, r5, #0xffffffffffffffc0 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
+ trn2 v16.2d, v12.2d, v13.2d @ h2l | h1l
+
+ aese q3, v22.16b
+ aesmc q3, q3 @ AES block 3 - round 4
+ add r5, r5, r0
+
+ aese q2, v22.16b
+ aesmc q2, q2 @ AES block 2 - round 4
+ cmp r0, r5 @ check if we have <= 4 blocks
+
+ aese q0, v22.16b
+ aesmc q0, q0 @ AES block 0 - round 4
+
+ aese q3, v23.16b
+ aesmc q3, q3 @ AES block 3 - round 5
+
+ aese q2, v23.16b
+ aesmc q2, q2 @ AES block 2 - round 5
+
+ aese q0, v23.16b
+ aesmc q0, q0 @ AES block 0 - round 5
+
+ aese q3, v24.16b
+ aesmc q3, q3 @ AES block 3 - round 6
+
+ aese q1, v22.16b
+ aesmc q1, q1 @ AES block 1 - round 4
+
+ aese q2, v24.16b
+ aesmc q2, q2 @ AES block 2 - round 6
+ trn1 q8, v12.2d, v13.2d @ h2h | h1h
+
+ aese q0, v24.16b
+ aesmc q0, q0 @ AES block 0 - round 6
+
+ aese q1, v23.16b
+ aesmc q1, q1 @ AES block 1 - round 5
+
+ aese q3, v25.16b
+ aesmc q3, q3 @ AES block 3 - round 7
+
+ aese q0, v25.16b
+ aesmc q0, q0 @ AES block 0 - round 7
+
+ aese q1, v24.16b
+ aesmc q1, q1 @ AES block 1 - round 6
+
+ aese q2, v25.16b
+ aesmc q2, q2 @ AES block 2 - round 7
+
+ aese q0, v26.16b
+ aesmc q0, q0 @ AES block 0 - round 8
+
+ aese q1, v25.16b
+ aesmc q1, q1 @ AES block 1 - round 7
+
+ aese q2, v26.16b
+ aesmc q2, q2 @ AES block 2 - round 8
+
+ aese q3, v26.16b
+ aesmc q3, q3 @ AES block 3 - round 8
+
+ aese q1, v26.16b
+ aesmc q1, q1 @ AES block 1 - round 8
+
+ aese q2, v27.16b @ AES block 2 - round 9
+
+ aese q0, v27.16b @ AES block 0 - round 9
+
+ eor v16.16b, v16.16b, q8 @ h2k | h1k
+
+ aese q1, v27.16b @ AES block 1 - round 9
+
+ aese q3, v27.16b @ AES block 3 - round 9
+ bge .L128_enc_tail @ handle tail
+
+ ldp r6, r7, [r0, #0] @ AES block 0 - load plaintext
+
+ ldp r21, r22, [r0, #32] @ AES block 2 - load plaintext
+
+ ldp r19, r20, [r0, #16] @ AES block 1 - load plaintext
+
+ ldp r23, r24, [r0, #48] @ AES block 3 - load plaintext
+
+ eor r6, r6, r13 @ AES block 0 - round 10 low
+ eor r7, r7, r14 @ AES block 0 - round 10 high
+
+ eor r21, r21, r13 @ AES block 2 - round 10 low
+ fmov d4, r6 @ AES block 0 - mov low
+
+ eor r19, r19, r13 @ AES block 1 - round 10 low
+ eor r22, r22, r14 @ AES block 2 - round 10 high
+ fmov v4.d[1], r7 @ AES block 0 - mov high
+
+ fmov d5, r19 @ AES block 1 - mov low
+ eor r20, r20, r14 @ AES block 1 - round 10 high
+
+ eor r23, r23, r13 @ AES block 3 - round 10 low
+ fmov v5.d[1], r20 @ AES block 1 - mov high
+
+ fmov d6, r21 @ AES block 2 - mov low
+ eor r24, r24, r14 @ AES block 3 - round 10 high
+ rev r9, r12 @ CTR block 4
+
+ fmov v6.d[1], r22 @ AES block 2 - mov high
+ orr r9, r11, r9, lsl #32 @ CTR block 4
+
+ eor q4, q4, q0 @ AES block 0 - result
+ fmov d0, r10 @ CTR block 4
+ add r12, r12, #1 @ CTR block 4
+
+ fmov v0.d[1], r9 @ CTR block 4
+ rev r9, r12 @ CTR block 5
+
+ eor q5, q5, q1 @ AES block 1 - result
+ fmov d1, r10 @ CTR block 5
+ orr r9, r11, r9, lsl #32 @ CTR block 5
+
+ add r12, r12, #1 @ CTR block 5
+ add r0, r0, #64 @ AES input_ptr update
+ fmov v1.d[1], r9 @ CTR block 5
+
+ fmov d7, r23 @ AES block 3 - mov low
+ rev r9, r12 @ CTR block 6
+ st1 { q4}, [r2], #16 @ AES block 0 - store result
+
+ fmov v7.d[1], r24 @ AES block 3 - mov high
+ orr r9, r11, r9, lsl #32 @ CTR block 6
+
+ add r12, r12, #1 @ CTR block 6
+ eor q6, q6, q2 @ AES block 2 - result
+ st1 { q5}, [r2], #16 @ AES block 1 - store result
+
+ fmov d2, r10 @ CTR block 6
+ cmp r0, r5 @ check if we have <= 8 blocks
+
+ fmov v2.d[1], r9 @ CTR block 6
+ rev r9, r12 @ CTR block 7
+ st1 { q6}, [r2], #16 @ AES block 2 - store result
+
+ orr r9, r11, r9, lsl #32 @ CTR block 7
+
+ eor q7, q7, q3 @ AES block 3 - result
+ st1 { q7}, [r2], #16 @ AES block 3 - store result
+ bge .L128_enc_prepretail @ do prepretail
+
+.L128_enc_main_loop:@ main loop start
+ ldp r23, r24, [r0, #48] @ AES block 4k+3 - load plaintext
+ rev64 q4, q4 @ GHASH block 4k (only t0 is free)
+ rev64 q6, q6 @ GHASH block 4k+2 (t0, t1, and t2 free)
+
+ aese q2, v18.16b
+ aesmc q2, q2 @ AES block 4k+6 - round 0
+ fmov d3, r10 @ CTR block 4k+3
Home |
Main Index |
Thread Index |
Old Index