Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386 Restore ...



details:   https://anonhg.NetBSD.org/src/rev/98eae54c226a
branches:  trunk
changeset: 359447:98eae54c226a
user:      nakayama <nakayama%NetBSD.org@localhost>
date:      Sat Feb 10 13:29:55 2018 +0000

description:
Restore pic and sse2 enabled code to make libcrypto works as before.

diffstat:

 crypto/external/bsd/openssl/lib/libcrypto/arch/i386/Makefile           |     9 +-
 crypto/external/bsd/openssl/lib/libcrypto/arch/i386/aes-586.S          |    12 +-
 crypto/external/bsd/openssl/lib/libcrypto/arch/i386/aesni-x86.S        |     2 +-
 crypto/external/bsd/openssl/lib/libcrypto/arch/i386/bn-586.S           |   438 +-
 crypto/external/bsd/openssl/lib/libcrypto/arch/i386/chacha-x86.S       |   186 +-
 crypto/external/bsd/openssl/lib/libcrypto/arch/i386/crypt586.S         |    12 +-
 crypto/external/bsd/openssl/lib/libcrypto/arch/i386/ecp_nistz256-x86.S |   306 +-
 crypto/external/bsd/openssl/lib/libcrypto/arch/i386/ghash-x86.S        |  1449 ++++-
 crypto/external/bsd/openssl/lib/libcrypto/arch/i386/poly1305-x86.S     |  1095 ++++-
 crypto/external/bsd/openssl/lib/libcrypto/arch/i386/rc4-586.S          |    85 +-
 crypto/external/bsd/openssl/lib/libcrypto/arch/i386/sha1-586.S         |  1421 ++++++-
 crypto/external/bsd/openssl/lib/libcrypto/arch/i386/sha256-586.S       |  1644 ++++++-
 crypto/external/bsd/openssl/lib/libcrypto/arch/i386/sha512-586.S       |  2272 +++++++++-
 crypto/external/bsd/openssl/lib/libcrypto/arch/i386/x86cpuid.S         |   228 +-
 14 files changed, 8204 insertions(+), 955 deletions(-)

diffs (truncated from 10537 to 300 lines):

diff -r a42f0a2b4d4b -r 98eae54c226a crypto/external/bsd/openssl/lib/libcrypto/arch/i386/Makefile
--- a/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/Makefile      Sat Feb 10 11:50:39 2018 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/Makefile      Sat Feb 10 13:29:55 2018 +0000
@@ -1,4 +1,4 @@
-#      $NetBSD: Makefile,v 1.11 2018/02/10 06:22:22 christos Exp $
+#      $NetBSD: Makefile,v 1.12 2018/02/10 13:29:55 nakayama Exp $
 
 .include "bsd.own.mk"
 
@@ -9,11 +9,8 @@
        for i in $$(find ${OPENSSLSRC} -name \*86.pl) \
                  ${OPENSSLSRC}/crypto/x86cpuid.pl; do \
                perl -I${OPENSSLSRC}/crypto/perlasm \
-               -I${OPENSSLSRC}/crypto/bn/asm $$i elf /dev/stdout \
+               -I${OPENSSLSRC}/crypto/bn/asm $$i elf -fPIC -DOPENSSL_IA32_SSE2 /dev/stdout \
                | sed -e 's,^\.file.*$$,#include <machine/asm.h>,' \
-                       -e 's/  call    OPENSSL_cpuid_setup/    PIC_PROLOGUE!   call    PIC_PLT(OPENSSL_cpuid_setup)!   PIC_EPILOGUE/' \
-                       -e 's/  leal    DES_SPtrans,%edx/       PIC_PROLOGUE!   leal    PIC_GOT(DES_SPtrans),%edx!      PIC_EPILOGUE/' \
-                       -e 's/  leal    OPENSSL_ia32cap_P,%eax/ PIC_PROLOGUE!   leal    PIC_GOT(OPENSSL_ia32cap_P),%eax!        PIC_EPILOGUE/' \
-                       | tr '!' '\n' \
+                       -e 's/  call    OPENSSL_cpuid_setup/    PIC_PROLOGUE!   call    PIC_PLT(OPENSSL_cpuid_setup)!   PIC_EPILOGUE/' | tr '!' '\n' \
                > $$(basename $$i .pl).S; \
        done
diff -r a42f0a2b4d4b -r 98eae54c226a crypto/external/bsd/openssl/lib/libcrypto/arch/i386/aes-586.S
--- a/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/aes-586.S     Sat Feb 10 11:50:39 2018 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/aes-586.S     Sat Feb 10 13:29:55 2018 +0000
@@ -1000,9 +1000,7 @@
        call    .L004pic_point
 .L004pic_point:
        popl    %ebp
-       PIC_PROLOGUE
-       leal    PIC_GOT(OPENSSL_ia32cap_P),%eax
-       PIC_EPILOGUE
+       leal    OPENSSL_ia32cap_P-.L004pic_point(%ebp),%eax
        leal    .LAES_Te-.L004pic_point(%ebp),%ebp
        leal    764(%esp),%ebx
        subl    %ebp,%ebx
@@ -2194,9 +2192,7 @@
        call    .L010pic_point
 .L010pic_point:
        popl    %ebp
-       PIC_PROLOGUE
-       leal    PIC_GOT(OPENSSL_ia32cap_P),%eax
-       PIC_EPILOGUE
+       leal    OPENSSL_ia32cap_P-.L010pic_point(%ebp),%eax
        leal    .LAES_Td-.L010pic_point(%ebp),%ebp
        leal    764(%esp),%ebx
        subl    %ebp,%ebx
@@ -2252,9 +2248,7 @@
        call    .L013pic_point
 .L013pic_point:
        popl    %ebp
-       PIC_PROLOGUE
-       leal    PIC_GOT(OPENSSL_ia32cap_P),%eax
-       PIC_EPILOGUE
+       leal    OPENSSL_ia32cap_P-.L013pic_point(%ebp),%eax
        cmpl    $0,40(%esp)
        leal    .LAES_Te-.L013pic_point(%ebp),%ebp
        jne     .L014picked_te
diff -r a42f0a2b4d4b -r 98eae54c226a crypto/external/bsd/openssl/lib/libcrypto/arch/i386/aesni-x86.S
--- a/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/aesni-x86.S   Sat Feb 10 11:50:39 2018 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/aesni-x86.S   Sat Feb 10 13:29:55 2018 +0000
@@ -2854,7 +2854,7 @@
 .L112pic:
        popl    %ebx
        leal    .Lkey_const-.L112pic(%ebx),%ebx
-       leal    OPENSSL_ia32cap_P,%ebp
+       leal    OPENSSL_ia32cap_P-.Lkey_const(%ebx),%ebp
        movups  (%eax),%xmm0
        xorps   %xmm4,%xmm4
        movl    4(%ebp),%ebp
diff -r a42f0a2b4d4b -r 98eae54c226a crypto/external/bsd/openssl/lib/libcrypto/arch/i386/bn-586.S
--- a/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/bn-586.S      Sat Feb 10 11:50:39 2018 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/bn-586.S      Sat Feb 10 13:29:55 2018 +0000
@@ -5,6 +5,102 @@
 .align 16
 bn_mul_add_words:
 .L_bn_mul_add_words_begin:
+       call    .L000PIC_me_up
+.L000PIC_me_up:
+       popl    %eax
+       leal    OPENSSL_ia32cap_P-.L000PIC_me_up(%eax),%eax
+       btl     $26,(%eax)
+       jnc     .L001maw_non_sse2
+       movl    4(%esp),%eax
+       movl    8(%esp),%edx
+       movl    12(%esp),%ecx
+       movd    16(%esp),%mm0
+       pxor    %mm1,%mm1
+       jmp     .L002maw_sse2_entry
+.align 16
+.L003maw_sse2_unrolled:
+       movd    (%eax),%mm3
+       paddq   %mm3,%mm1
+       movd    (%edx),%mm2
+       pmuludq %mm0,%mm2
+       movd    4(%edx),%mm4
+       pmuludq %mm0,%mm4
+       movd    8(%edx),%mm6
+       pmuludq %mm0,%mm6
+       movd    12(%edx),%mm7
+       pmuludq %mm0,%mm7
+       paddq   %mm2,%mm1
+       movd    4(%eax),%mm3
+       paddq   %mm4,%mm3
+       movd    8(%eax),%mm5
+       paddq   %mm6,%mm5
+       movd    12(%eax),%mm4
+       paddq   %mm4,%mm7
+       movd    %mm1,(%eax)
+       movd    16(%edx),%mm2
+       pmuludq %mm0,%mm2
+       psrlq   $32,%mm1
+       movd    20(%edx),%mm4
+       pmuludq %mm0,%mm4
+       paddq   %mm3,%mm1
+       movd    24(%edx),%mm6
+       pmuludq %mm0,%mm6
+       movd    %mm1,4(%eax)
+       psrlq   $32,%mm1
+       movd    28(%edx),%mm3
+       addl    $32,%edx
+       pmuludq %mm0,%mm3
+       paddq   %mm5,%mm1
+       movd    16(%eax),%mm5
+       paddq   %mm5,%mm2
+       movd    %mm1,8(%eax)
+       psrlq   $32,%mm1
+       paddq   %mm7,%mm1
+       movd    20(%eax),%mm5
+       paddq   %mm5,%mm4
+       movd    %mm1,12(%eax)
+       psrlq   $32,%mm1
+       paddq   %mm2,%mm1
+       movd    24(%eax),%mm5
+       paddq   %mm5,%mm6
+       movd    %mm1,16(%eax)
+       psrlq   $32,%mm1
+       paddq   %mm4,%mm1
+       movd    28(%eax),%mm5
+       paddq   %mm5,%mm3
+       movd    %mm1,20(%eax)
+       psrlq   $32,%mm1
+       paddq   %mm6,%mm1
+       movd    %mm1,24(%eax)
+       psrlq   $32,%mm1
+       paddq   %mm3,%mm1
+       movd    %mm1,28(%eax)
+       leal    32(%eax),%eax
+       psrlq   $32,%mm1
+       subl    $8,%ecx
+       jz      .L004maw_sse2_exit
+.L002maw_sse2_entry:
+       testl   $4294967288,%ecx
+       jnz     .L003maw_sse2_unrolled
+.align 4
+.L005maw_sse2_loop:
+       movd    (%edx),%mm2
+       movd    (%eax),%mm3
+       pmuludq %mm0,%mm2
+       leal    4(%edx),%edx
+       paddq   %mm3,%mm1
+       paddq   %mm2,%mm1
+       movd    %mm1,(%eax)
+       subl    $1,%ecx
+       psrlq   $32,%mm1
+       leal    4(%eax),%eax
+       jnz     .L005maw_sse2_loop
+.L004maw_sse2_exit:
+       movd    %mm1,%eax
+       emms
+       ret
+.align 16
+.L001maw_non_sse2:
        pushl   %ebp
        pushl   %ebx
        pushl   %esi
@@ -17,9 +113,9 @@
        andl    $4294967288,%ecx
        movl    32(%esp),%ebp
        pushl   %ecx
-       jz      .L000maw_finish
+       jz      .L006maw_finish
 .align 16
-.L001maw_loop:
+.L007maw_loop:
 
        movl    (%ebx),%eax
        mull    %ebp
@@ -96,13 +192,13 @@
        subl    $8,%ecx
        leal    32(%ebx),%ebx
        leal    32(%edi),%edi
-       jnz     .L001maw_loop
-.L000maw_finish:
+       jnz     .L007maw_loop
+.L006maw_finish:
        movl    32(%esp),%ecx
        andl    $7,%ecx
-       jnz     .L002maw_finish2
-       jmp     .L003maw_end
-.L002maw_finish2:
+       jnz     .L008maw_finish2
+       jmp     .L009maw_end
+.L008maw_finish2:
 
        movl    (%ebx),%eax
        mull    %ebp
@@ -113,7 +209,7 @@
        decl    %ecx
        movl    %eax,(%edi)
        movl    %edx,%esi
-       jz      .L003maw_end
+       jz      .L009maw_end
 
        movl    4(%ebx),%eax
        mull    %ebp
@@ -124,7 +220,7 @@
        decl    %ecx
        movl    %eax,4(%edi)
        movl    %edx,%esi
-       jz      .L003maw_end
+       jz      .L009maw_end
 
        movl    8(%ebx),%eax
        mull    %ebp
@@ -135,7 +231,7 @@
        decl    %ecx
        movl    %eax,8(%edi)
        movl    %edx,%esi
-       jz      .L003maw_end
+       jz      .L009maw_end
 
        movl    12(%ebx),%eax
        mull    %ebp
@@ -146,7 +242,7 @@
        decl    %ecx
        movl    %eax,12(%edi)
        movl    %edx,%esi
-       jz      .L003maw_end
+       jz      .L009maw_end
 
        movl    16(%ebx),%eax
        mull    %ebp
@@ -157,7 +253,7 @@
        decl    %ecx
        movl    %eax,16(%edi)
        movl    %edx,%esi
-       jz      .L003maw_end
+       jz      .L009maw_end
 
        movl    20(%ebx),%eax
        mull    %ebp
@@ -168,7 +264,7 @@
        decl    %ecx
        movl    %eax,20(%edi)
        movl    %edx,%esi
-       jz      .L003maw_end
+       jz      .L009maw_end
 
        movl    24(%ebx),%eax
        mull    %ebp
@@ -178,7 +274,7 @@
        adcl    $0,%edx
        movl    %eax,24(%edi)
        movl    %edx,%esi
-.L003maw_end:
+.L009maw_end:
        movl    %esi,%eax
        popl    %ecx
        popl    %edi
@@ -192,6 +288,33 @@
 .align 16
 bn_mul_words:
 .L_bn_mul_words_begin:
+       call    .L010PIC_me_up
+.L010PIC_me_up:
+       popl    %eax
+       leal    OPENSSL_ia32cap_P-.L010PIC_me_up(%eax),%eax
+       btl     $26,(%eax)
+       jnc     .L011mw_non_sse2
+       movl    4(%esp),%eax
+       movl    8(%esp),%edx
+       movl    12(%esp),%ecx
+       movd    16(%esp),%mm0
+       pxor    %mm1,%mm1
+.align 16
+.L012mw_sse2_loop:
+       movd    (%edx),%mm2
+       pmuludq %mm0,%mm2
+       leal    4(%edx),%edx
+       paddq   %mm2,%mm1
+       movd    %mm1,(%eax)
+       subl    $1,%ecx
+       psrlq   $32,%mm1
+       leal    4(%eax),%eax
+       jnz     .L012mw_sse2_loop
+       movd    %mm1,%eax
+       emms
+       ret



Home | Main Index | Thread Index | Old Index