Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386 Restore ...
details: https://anonhg.NetBSD.org/src/rev/98eae54c226a
branches: trunk
changeset: 359447:98eae54c226a
user: nakayama <nakayama%NetBSD.org@localhost>
date: Sat Feb 10 13:29:55 2018 +0000
description:
Restore pic and sse2 enabled code to make libcrypto works as before.
diffstat:
crypto/external/bsd/openssl/lib/libcrypto/arch/i386/Makefile | 9 +-
crypto/external/bsd/openssl/lib/libcrypto/arch/i386/aes-586.S | 12 +-
crypto/external/bsd/openssl/lib/libcrypto/arch/i386/aesni-x86.S | 2 +-
crypto/external/bsd/openssl/lib/libcrypto/arch/i386/bn-586.S | 438 +-
crypto/external/bsd/openssl/lib/libcrypto/arch/i386/chacha-x86.S | 186 +-
crypto/external/bsd/openssl/lib/libcrypto/arch/i386/crypt586.S | 12 +-
crypto/external/bsd/openssl/lib/libcrypto/arch/i386/ecp_nistz256-x86.S | 306 +-
crypto/external/bsd/openssl/lib/libcrypto/arch/i386/ghash-x86.S | 1449 ++++-
crypto/external/bsd/openssl/lib/libcrypto/arch/i386/poly1305-x86.S | 1095 ++++-
crypto/external/bsd/openssl/lib/libcrypto/arch/i386/rc4-586.S | 85 +-
crypto/external/bsd/openssl/lib/libcrypto/arch/i386/sha1-586.S | 1421 ++++++-
crypto/external/bsd/openssl/lib/libcrypto/arch/i386/sha256-586.S | 1644 ++++++-
crypto/external/bsd/openssl/lib/libcrypto/arch/i386/sha512-586.S | 2272 +++++++++-
crypto/external/bsd/openssl/lib/libcrypto/arch/i386/x86cpuid.S | 228 +-
14 files changed, 8204 insertions(+), 955 deletions(-)
diffs (truncated from 10537 to 300 lines):
diff -r a42f0a2b4d4b -r 98eae54c226a crypto/external/bsd/openssl/lib/libcrypto/arch/i386/Makefile
--- a/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/Makefile Sat Feb 10 11:50:39 2018 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/Makefile Sat Feb 10 13:29:55 2018 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.11 2018/02/10 06:22:22 christos Exp $
+# $NetBSD: Makefile,v 1.12 2018/02/10 13:29:55 nakayama Exp $
.include "bsd.own.mk"
@@ -9,11 +9,8 @@
for i in $$(find ${OPENSSLSRC} -name \*86.pl) \
${OPENSSLSRC}/crypto/x86cpuid.pl; do \
perl -I${OPENSSLSRC}/crypto/perlasm \
- -I${OPENSSLSRC}/crypto/bn/asm $$i elf /dev/stdout \
+ -I${OPENSSLSRC}/crypto/bn/asm $$i elf -fPIC -DOPENSSL_IA32_SSE2 /dev/stdout \
| sed -e 's,^\.file.*$$,#include <machine/asm.h>,' \
- -e 's/ call OPENSSL_cpuid_setup/ PIC_PROLOGUE! call PIC_PLT(OPENSSL_cpuid_setup)! PIC_EPILOGUE/' \
- -e 's/ leal DES_SPtrans,%edx/ PIC_PROLOGUE! leal PIC_GOT(DES_SPtrans),%edx! PIC_EPILOGUE/' \
- -e 's/ leal OPENSSL_ia32cap_P,%eax/ PIC_PROLOGUE! leal PIC_GOT(OPENSSL_ia32cap_P),%eax! PIC_EPILOGUE/' \
- | tr '!' '\n' \
+ -e 's/ call OPENSSL_cpuid_setup/ PIC_PROLOGUE! call PIC_PLT(OPENSSL_cpuid_setup)! PIC_EPILOGUE/' | tr '!' '\n' \
> $$(basename $$i .pl).S; \
done
diff -r a42f0a2b4d4b -r 98eae54c226a crypto/external/bsd/openssl/lib/libcrypto/arch/i386/aes-586.S
--- a/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/aes-586.S Sat Feb 10 11:50:39 2018 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/aes-586.S Sat Feb 10 13:29:55 2018 +0000
@@ -1000,9 +1000,7 @@
call .L004pic_point
.L004pic_point:
popl %ebp
- PIC_PROLOGUE
- leal PIC_GOT(OPENSSL_ia32cap_P),%eax
- PIC_EPILOGUE
+ leal OPENSSL_ia32cap_P-.L004pic_point(%ebp),%eax
leal .LAES_Te-.L004pic_point(%ebp),%ebp
leal 764(%esp),%ebx
subl %ebp,%ebx
@@ -2194,9 +2192,7 @@
call .L010pic_point
.L010pic_point:
popl %ebp
- PIC_PROLOGUE
- leal PIC_GOT(OPENSSL_ia32cap_P),%eax
- PIC_EPILOGUE
+ leal OPENSSL_ia32cap_P-.L010pic_point(%ebp),%eax
leal .LAES_Td-.L010pic_point(%ebp),%ebp
leal 764(%esp),%ebx
subl %ebp,%ebx
@@ -2252,9 +2248,7 @@
call .L013pic_point
.L013pic_point:
popl %ebp
- PIC_PROLOGUE
- leal PIC_GOT(OPENSSL_ia32cap_P),%eax
- PIC_EPILOGUE
+ leal OPENSSL_ia32cap_P-.L013pic_point(%ebp),%eax
cmpl $0,40(%esp)
leal .LAES_Te-.L013pic_point(%ebp),%ebp
jne .L014picked_te
diff -r a42f0a2b4d4b -r 98eae54c226a crypto/external/bsd/openssl/lib/libcrypto/arch/i386/aesni-x86.S
--- a/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/aesni-x86.S Sat Feb 10 11:50:39 2018 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/aesni-x86.S Sat Feb 10 13:29:55 2018 +0000
@@ -2854,7 +2854,7 @@
.L112pic:
popl %ebx
leal .Lkey_const-.L112pic(%ebx),%ebx
- leal OPENSSL_ia32cap_P,%ebp
+ leal OPENSSL_ia32cap_P-.Lkey_const(%ebx),%ebp
movups (%eax),%xmm0
xorps %xmm4,%xmm4
movl 4(%ebp),%ebp
diff -r a42f0a2b4d4b -r 98eae54c226a crypto/external/bsd/openssl/lib/libcrypto/arch/i386/bn-586.S
--- a/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/bn-586.S Sat Feb 10 11:50:39 2018 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/bn-586.S Sat Feb 10 13:29:55 2018 +0000
@@ -5,6 +5,102 @@
.align 16
bn_mul_add_words:
.L_bn_mul_add_words_begin:
+ call .L000PIC_me_up
+.L000PIC_me_up:
+ popl %eax
+ leal OPENSSL_ia32cap_P-.L000PIC_me_up(%eax),%eax
+ btl $26,(%eax)
+ jnc .L001maw_non_sse2
+ movl 4(%esp),%eax
+ movl 8(%esp),%edx
+ movl 12(%esp),%ecx
+ movd 16(%esp),%mm0
+ pxor %mm1,%mm1
+ jmp .L002maw_sse2_entry
+.align 16
+.L003maw_sse2_unrolled:
+ movd (%eax),%mm3
+ paddq %mm3,%mm1
+ movd (%edx),%mm2
+ pmuludq %mm0,%mm2
+ movd 4(%edx),%mm4
+ pmuludq %mm0,%mm4
+ movd 8(%edx),%mm6
+ pmuludq %mm0,%mm6
+ movd 12(%edx),%mm7
+ pmuludq %mm0,%mm7
+ paddq %mm2,%mm1
+ movd 4(%eax),%mm3
+ paddq %mm4,%mm3
+ movd 8(%eax),%mm5
+ paddq %mm6,%mm5
+ movd 12(%eax),%mm4
+ paddq %mm4,%mm7
+ movd %mm1,(%eax)
+ movd 16(%edx),%mm2
+ pmuludq %mm0,%mm2
+ psrlq $32,%mm1
+ movd 20(%edx),%mm4
+ pmuludq %mm0,%mm4
+ paddq %mm3,%mm1
+ movd 24(%edx),%mm6
+ pmuludq %mm0,%mm6
+ movd %mm1,4(%eax)
+ psrlq $32,%mm1
+ movd 28(%edx),%mm3
+ addl $32,%edx
+ pmuludq %mm0,%mm3
+ paddq %mm5,%mm1
+ movd 16(%eax),%mm5
+ paddq %mm5,%mm2
+ movd %mm1,8(%eax)
+ psrlq $32,%mm1
+ paddq %mm7,%mm1
+ movd 20(%eax),%mm5
+ paddq %mm5,%mm4
+ movd %mm1,12(%eax)
+ psrlq $32,%mm1
+ paddq %mm2,%mm1
+ movd 24(%eax),%mm5
+ paddq %mm5,%mm6
+ movd %mm1,16(%eax)
+ psrlq $32,%mm1
+ paddq %mm4,%mm1
+ movd 28(%eax),%mm5
+ paddq %mm5,%mm3
+ movd %mm1,20(%eax)
+ psrlq $32,%mm1
+ paddq %mm6,%mm1
+ movd %mm1,24(%eax)
+ psrlq $32,%mm1
+ paddq %mm3,%mm1
+ movd %mm1,28(%eax)
+ leal 32(%eax),%eax
+ psrlq $32,%mm1
+ subl $8,%ecx
+ jz .L004maw_sse2_exit
+.L002maw_sse2_entry:
+ testl $4294967288,%ecx
+ jnz .L003maw_sse2_unrolled
+.align 4
+.L005maw_sse2_loop:
+ movd (%edx),%mm2
+ movd (%eax),%mm3
+ pmuludq %mm0,%mm2
+ leal 4(%edx),%edx
+ paddq %mm3,%mm1
+ paddq %mm2,%mm1
+ movd %mm1,(%eax)
+ subl $1,%ecx
+ psrlq $32,%mm1
+ leal 4(%eax),%eax
+ jnz .L005maw_sse2_loop
+.L004maw_sse2_exit:
+ movd %mm1,%eax
+ emms
+ ret
+.align 16
+.L001maw_non_sse2:
pushl %ebp
pushl %ebx
pushl %esi
@@ -17,9 +113,9 @@
andl $4294967288,%ecx
movl 32(%esp),%ebp
pushl %ecx
- jz .L000maw_finish
+ jz .L006maw_finish
.align 16
-.L001maw_loop:
+.L007maw_loop:
movl (%ebx),%eax
mull %ebp
@@ -96,13 +192,13 @@
subl $8,%ecx
leal 32(%ebx),%ebx
leal 32(%edi),%edi
- jnz .L001maw_loop
-.L000maw_finish:
+ jnz .L007maw_loop
+.L006maw_finish:
movl 32(%esp),%ecx
andl $7,%ecx
- jnz .L002maw_finish2
- jmp .L003maw_end
-.L002maw_finish2:
+ jnz .L008maw_finish2
+ jmp .L009maw_end
+.L008maw_finish2:
movl (%ebx),%eax
mull %ebp
@@ -113,7 +209,7 @@
decl %ecx
movl %eax,(%edi)
movl %edx,%esi
- jz .L003maw_end
+ jz .L009maw_end
movl 4(%ebx),%eax
mull %ebp
@@ -124,7 +220,7 @@
decl %ecx
movl %eax,4(%edi)
movl %edx,%esi
- jz .L003maw_end
+ jz .L009maw_end
movl 8(%ebx),%eax
mull %ebp
@@ -135,7 +231,7 @@
decl %ecx
movl %eax,8(%edi)
movl %edx,%esi
- jz .L003maw_end
+ jz .L009maw_end
movl 12(%ebx),%eax
mull %ebp
@@ -146,7 +242,7 @@
decl %ecx
movl %eax,12(%edi)
movl %edx,%esi
- jz .L003maw_end
+ jz .L009maw_end
movl 16(%ebx),%eax
mull %ebp
@@ -157,7 +253,7 @@
decl %ecx
movl %eax,16(%edi)
movl %edx,%esi
- jz .L003maw_end
+ jz .L009maw_end
movl 20(%ebx),%eax
mull %ebp
@@ -168,7 +264,7 @@
decl %ecx
movl %eax,20(%edi)
movl %edx,%esi
- jz .L003maw_end
+ jz .L009maw_end
movl 24(%ebx),%eax
mull %ebp
@@ -178,7 +274,7 @@
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
-.L003maw_end:
+.L009maw_end:
movl %esi,%eax
popl %ecx
popl %edi
@@ -192,6 +288,33 @@
.align 16
bn_mul_words:
.L_bn_mul_words_begin:
+ call .L010PIC_me_up
+.L010PIC_me_up:
+ popl %eax
+ leal OPENSSL_ia32cap_P-.L010PIC_me_up(%eax),%eax
+ btl $26,(%eax)
+ jnc .L011mw_non_sse2
+ movl 4(%esp),%eax
+ movl 8(%esp),%edx
+ movl 12(%esp),%ecx
+ movd 16(%esp),%mm0
+ pxor %mm1,%mm1
+.align 16
+.L012mw_sse2_loop:
+ movd (%edx),%mm2
+ pmuludq %mm0,%mm2
+ leal 4(%edx),%edx
+ paddq %mm2,%mm1
+ movd %mm1,(%eax)
+ subl $1,%ecx
+ psrlq $32,%mm1
+ leal 4(%eax),%eax
+ jnz .L012mw_sse2_loop
+ movd %mm1,%eax
+ emms
+ ret
Home |
Main Index |
Thread Index |
Old Index