Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64 Explic...



details:   https://anonhg.NetBSD.org/src/rev/03dfc94d2e89
branches:  trunk
changeset: 338260:03dfc94d2e89
user:      joerg <joerg%NetBSD.org@localhost>
date:      Sat May 16 19:08:37 2015 +0000

description:
Explicitly pass CC down. When building with clang, force external
assembler as some of the Perl scripts use -Wa,-v. Regenerate for AVX
support.

diffstat:

 crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/Makefile            |     8 +-
 crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/aesni-sha1-x86_64.S |  1342 ++++++++++
 crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/sha1-x86_64.S       |  1156 ++++++++
 3 files changed, 2504 insertions(+), 2 deletions(-)

diffs (truncated from 2556 to 300 lines):

diff -r b133edc94579 -r 03dfc94d2e89 crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/Makefile
--- a/crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/Makefile    Sat May 16 17:32:54 2015 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/Makefile    Sat May 16 19:08:37 2015 +0000
@@ -1,14 +1,18 @@
-#      $NetBSD: Makefile,v 1.6 2012/08/04 11:03:34 christos Exp $
+#      $NetBSD: Makefile,v 1.7 2015/05/16 19:08:37 joerg Exp $
 
 .include "bsd.own.mk"
 
 CRYPTODIST=${NETBSDSRCDIR}/crypto
 .include "${NETBSDSRCDIR}/crypto/Makefile.openssl"
 
+.if make(regen) && ${HAVE_LLVM:U} == "yes"
+CC+= -fno-integrated-as
+.endif
+
 regen:
        for i in $$(find ${OPENSSLSRC} -name \*${MACHINE_ARCH}.pl) \
                ${OPENSSLSRC}/crypto/${MACHINE_ARCH}cpuid.pl ; do \
-                (echo "#include <machine/asm.h>"; perl $$i elf | sed \
+                (echo "#include <machine/asm.h>"; CC=${CC:Q} perl $$i elf | sed \
                    -e 's/\(OPENSSL[A-Za-z0-9_+]*\)(%rip)/\1@GOTPCREL(%rip)/' \
                    -e 's/.hidden       OPENSSL_cpuid_setup/.globl      OPENSSL_cpuid_setup/' \
                    -e 's/call  OPENSSL_cpuid_setup/call        PIC_PLT(OPENSSL_cpuid_setup)/') \
diff -r b133edc94579 -r 03dfc94d2e89 crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/aesni-sha1-x86_64.S
--- a/crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/aesni-sha1-x86_64.S Sat May 16 17:32:54 2015 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/aesni-sha1-x86_64.S Sat May 16 19:08:37 2015 +0000
@@ -9,6 +9,11 @@
 
        movl    OPENSSL_ia32cap_P+0@GOTPCREL(%rip),%r10d
        movl    OPENSSL_ia32cap_P+4@GOTPCREL(%rip),%r11d
+       andl    $268435456,%r11d
+       andl    $1073741824,%r10d
+       orl     %r11d,%r10d
+       cmpl    $1342177280,%r10d
+       je      aesni_cbc_sha1_enc_avx
        jmp     aesni_cbc_sha1_enc_ssse3
        .byte   0xf3,0xc3
 .size  aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc
@@ -1385,6 +1390,1343 @@
 .Lepilogue_ssse3:
        .byte   0xf3,0xc3
 .size  aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3
+.type  aesni_cbc_sha1_enc_avx,@function
+.align 16
+aesni_cbc_sha1_enc_avx:
+       movq    8(%rsp),%r10
+
+
+       pushq   %rbx
+       pushq   %rbp
+       pushq   %r12
+       pushq   %r13
+       pushq   %r14
+       pushq   %r15
+       leaq    -104(%rsp),%rsp
+
+
+       vzeroall
+       movq    %rdi,%r12
+       movq    %rsi,%r13
+       movq    %rdx,%r14
+       movq    %rcx,%r15
+       vmovdqu (%r8),%xmm11
+       movq    %r8,88(%rsp)
+       shlq    $6,%r14
+       subq    %r12,%r13
+       movl    240(%r15),%r8d
+       addq    $112,%r15
+       addq    %r10,%r14
+
+       leaq    K_XX_XX(%rip),%r11
+       movl    0(%r9),%eax
+       movl    4(%r9),%ebx
+       movl    8(%r9),%ecx
+       movl    12(%r9),%edx
+       movl    %ebx,%esi
+       movl    16(%r9),%ebp
+
+       vmovdqa 64(%r11),%xmm6
+       vmovdqa 0(%r11),%xmm9
+       vmovdqu 0(%r10),%xmm0
+       vmovdqu 16(%r10),%xmm1
+       vmovdqu 32(%r10),%xmm2
+       vmovdqu 48(%r10),%xmm3
+       vpshufb %xmm6,%xmm0,%xmm0
+       addq    $64,%r10
+       vpshufb %xmm6,%xmm1,%xmm1
+       vpshufb %xmm6,%xmm2,%xmm2
+       vpshufb %xmm6,%xmm3,%xmm3
+       vpaddd  %xmm9,%xmm0,%xmm4
+       vpaddd  %xmm9,%xmm1,%xmm5
+       vpaddd  %xmm9,%xmm2,%xmm6
+       vmovdqa %xmm4,0(%rsp)
+       vmovdqa %xmm5,16(%rsp)
+       vmovdqa %xmm6,32(%rsp)
+       vmovups -112(%r15),%xmm13
+       vmovups 16-112(%r15),%xmm14
+       jmp     .Loop_avx
+.align 16
+.Loop_avx:
+       addl    0(%rsp),%ebp
+       vmovups 0(%r12),%xmm12
+       vxorps  %xmm13,%xmm12,%xmm12
+       vxorps  %xmm12,%xmm11,%xmm11
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups -80(%r15),%xmm15
+       xorl    %edx,%ecx
+       vpalignr        $8,%xmm0,%xmm1,%xmm4
+       movl    %eax,%edi
+       shldl   $5,%eax,%eax
+       vpaddd  %xmm3,%xmm9,%xmm9
+       andl    %ecx,%esi
+       xorl    %edx,%ecx
+       vpsrldq $4,%xmm3,%xmm8
+       xorl    %edx,%esi
+       addl    %eax,%ebp
+       vpxor   %xmm0,%xmm4,%xmm4
+       shrdl   $2,%ebx,%ebx
+       addl    %esi,%ebp
+       vpxor   %xmm2,%xmm8,%xmm8
+       addl    4(%rsp),%edx
+       xorl    %ecx,%ebx
+       movl    %ebp,%esi
+       shldl   $5,%ebp,%ebp
+       vpxor   %xmm8,%xmm4,%xmm4
+       andl    %ebx,%edi
+       xorl    %ecx,%ebx
+       vmovdqa %xmm9,48(%rsp)
+       xorl    %ecx,%edi
+       vaesenc %xmm15,%xmm11,%xmm11
+       vmovups -64(%r15),%xmm14
+       addl    %ebp,%edx
+       vpsrld  $31,%xmm4,%xmm8
+       shrdl   $7,%eax,%eax
+       addl    %edi,%edx
+       addl    8(%rsp),%ecx
+       xorl    %ebx,%eax
+       vpslldq $12,%xmm4,%xmm10
+       vpaddd  %xmm4,%xmm4,%xmm4
+       movl    %edx,%edi
+       shldl   $5,%edx,%edx
+       andl    %eax,%esi
+       xorl    %ebx,%eax
+       vpsrld  $30,%xmm10,%xmm9
+       vpor    %xmm8,%xmm4,%xmm4
+       xorl    %ebx,%esi
+       addl    %edx,%ecx
+       shrdl   $7,%ebp,%ebp
+       addl    %esi,%ecx
+       vpslld  $2,%xmm10,%xmm10
+       vpxor   %xmm9,%xmm4,%xmm4
+       addl    12(%rsp),%ebx
+       xorl    %eax,%ebp
+       movl    %ecx,%esi
+       shldl   $5,%ecx,%ecx
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups -48(%r15),%xmm15
+       vpxor   %xmm10,%xmm4,%xmm4
+       andl    %ebp,%edi
+       xorl    %eax,%ebp
+       vmovdqa 0(%r11),%xmm10
+       xorl    %eax,%edi
+       addl    %ecx,%ebx
+       shrdl   $7,%edx,%edx
+       addl    %edi,%ebx
+       addl    16(%rsp),%eax
+       xorl    %ebp,%edx
+       vpalignr        $8,%xmm1,%xmm2,%xmm5
+       movl    %ebx,%edi
+       shldl   $5,%ebx,%ebx
+       vpaddd  %xmm4,%xmm10,%xmm10
+       andl    %edx,%esi
+       xorl    %ebp,%edx
+       vpsrldq $4,%xmm4,%xmm9
+       xorl    %ebp,%esi
+       addl    %ebx,%eax
+       vpxor   %xmm1,%xmm5,%xmm5
+       shrdl   $7,%ecx,%ecx
+       addl    %esi,%eax
+       vpxor   %xmm3,%xmm9,%xmm9
+       addl    20(%rsp),%ebp
+       vaesenc %xmm15,%xmm11,%xmm11
+       vmovups -32(%r15),%xmm14
+       xorl    %edx,%ecx
+       movl    %eax,%esi
+       shldl   $5,%eax,%eax
+       vpxor   %xmm9,%xmm5,%xmm5
+       andl    %ecx,%edi
+       xorl    %edx,%ecx
+       vmovdqa %xmm10,0(%rsp)
+       xorl    %edx,%edi
+       addl    %eax,%ebp
+       vpsrld  $31,%xmm5,%xmm9
+       shrdl   $7,%ebx,%ebx
+       addl    %edi,%ebp
+       addl    24(%rsp),%edx
+       xorl    %ecx,%ebx
+       vpslldq $12,%xmm5,%xmm8
+       vpaddd  %xmm5,%xmm5,%xmm5
+       movl    %ebp,%edi
+       shldl   $5,%ebp,%ebp
+       andl    %ebx,%esi
+       xorl    %ecx,%ebx
+       vpsrld  $30,%xmm8,%xmm10
+       vpor    %xmm9,%xmm5,%xmm5
+       xorl    %ecx,%esi
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups -16(%r15),%xmm15
+       addl    %ebp,%edx
+       shrdl   $7,%eax,%eax
+       addl    %esi,%edx
+       vpslld  $2,%xmm8,%xmm8
+       vpxor   %xmm10,%xmm5,%xmm5
+       addl    28(%rsp),%ecx
+       xorl    %ebx,%eax
+       movl    %edx,%esi
+       shldl   $5,%edx,%edx
+       vpxor   %xmm8,%xmm5,%xmm5
+       andl    %eax,%edi
+       xorl    %ebx,%eax
+       vmovdqa 16(%r11),%xmm8
+       xorl    %ebx,%edi
+       addl    %edx,%ecx
+       shrdl   $7,%ebp,%ebp
+       addl    %edi,%ecx
+       addl    32(%rsp),%ebx
+       xorl    %eax,%ebp
+       vpalignr        $8,%xmm2,%xmm3,%xmm6
+       movl    %ecx,%edi
+       shldl   $5,%ecx,%ecx
+       vaesenc %xmm15,%xmm11,%xmm11
+       vmovups 0(%r15),%xmm14
+       vpaddd  %xmm5,%xmm8,%xmm8
+       andl    %ebp,%esi
+       xorl    %eax,%ebp
+       vpsrldq $4,%xmm5,%xmm10
+       xorl    %eax,%esi
+       addl    %ecx,%ebx
+       vpxor   %xmm2,%xmm6,%xmm6
+       shrdl   $7,%edx,%edx
+       addl    %esi,%ebx
+       vpxor   %xmm4,%xmm10,%xmm10
+       addl    36(%rsp),%eax
+       xorl    %ebp,%edx
+       movl    %ebx,%esi
+       shldl   $5,%ebx,%ebx
+       vpxor   %xmm10,%xmm6,%xmm6
+       andl    %edx,%edi
+       xorl    %ebp,%edx
+       vmovdqa %xmm8,16(%rsp)
+       xorl    %ebp,%edi
+       addl    %ebx,%eax
+       vpsrld  $31,%xmm6,%xmm10
+       shrdl   $7,%ecx,%ecx
+       addl    %edi,%eax
+       addl    40(%rsp),%ebp
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups 16(%r15),%xmm15
+       xorl    %edx,%ecx
+       vpslldq $12,%xmm6,%xmm9
+       vpaddd  %xmm6,%xmm6,%xmm6
+       movl    %eax,%edi
+       shldl   $5,%eax,%eax
+       andl    %ecx,%esi
+       xorl    %edx,%ecx
+       vpsrld  $30,%xmm9,%xmm8
+       vpor    %xmm10,%xmm6,%xmm6
+       xorl    %edx,%esi
+       addl    %eax,%ebp
+       shrdl   $7,%ebx,%ebx
+       addl    %esi,%ebp
+       vpslld  $2,%xmm9,%xmm9
+       vpxor   %xmm8,%xmm6,%xmm6
+       addl    44(%rsp),%edx
+       xorl    %ecx,%ebx
+       movl    %ebp,%esi
+       shldl   $5,%ebp,%ebp
+       vpxor   %xmm9,%xmm6,%xmm6
+       andl    %ebx,%edi
+       xorl    %ecx,%ebx
+       vmovdqa 16(%r11),%xmm9
+       xorl    %ecx,%edi
+       vaesenc %xmm15,%xmm11,%xmm11
+       vmovups 32(%r15),%xmm14
+       addl    %ebp,%edx
+       shrdl   $7,%eax,%eax
+       addl    %edi,%edx
+       addl    48(%rsp),%ecx
+       xorl    %ebx,%eax
+       vpalignr        $8,%xmm3,%xmm4,%xmm7
+       movl    %edx,%edi
+       shldl   $5,%edx,%edx
+       vpaddd  %xmm6,%xmm9,%xmm9
+       andl    %eax,%esi
+       xorl    %ebx,%eax
+       vpsrldq $4,%xmm6,%xmm8
+       xorl    %ebx,%esi
+       addl    %edx,%ecx
+       vpxor   %xmm3,%xmm7,%xmm7



Home | Main Index | Thread Index | Old Index