Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/crypto/external/bsd/openssl/dist/crypto revert change from o...
details: https://anonhg.NetBSD.org/src/rev/4caa0e1fa2b3
branches: trunk
changeset: 344308:4caa0e1fa2b3
user: christos <christos%NetBSD.org@localhost>
date: Mon Mar 21 19:12:26 2016 +0000
description:
revert change from openssl-1.1.0-pre4, breaks gcc-4.8
diffstat:
crypto/external/bsd/openssl/dist/crypto/sha/asm/sha1-x86_64.pl | 1240 +--------
crypto/external/bsd/openssl/dist/crypto/x86_64cpuid.pl | 119 +-
2 files changed, 217 insertions(+), 1142 deletions(-)
diffs (truncated from 1980 to 300 lines):
diff -r 5a23695a23b0 -r 4caa0e1fa2b3 crypto/external/bsd/openssl/dist/crypto/sha/asm/sha1-x86_64.pl
--- a/crypto/external/bsd/openssl/dist/crypto/sha/asm/sha1-x86_64.pl Mon Mar 21 05:23:39 2016 +0000
+++ b/crypto/external/bsd/openssl/dist/crypto/sha/asm/sha1-x86_64.pl Mon Mar 21 19:12:26 2016 +0000
@@ -1,7 +1,7 @@
#!/usr/bin/env perl
#
# ====================================================================
-# Written by Andy Polyakov <appro%openssl.org@localhost> for the OpenSSL
+# Written by Andy Polyakov <appro%fy.chalmers.se@localhost> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
@@ -49,38 +49,17 @@
#
# Add AVX code path. See sha1-586.pl for further information.
-# May 2013.
-#
-# Add AVX2+BMI code path. Initial attempt (utilizing BMI instructions
-# and loading pair of consecutive blocks to 256-bit %ymm registers)
-# did not provide impressive performance improvement till a crucial
-# hint regarding the number of Xupdate iterations to pre-compute in
-# advance was provided by Ilya Albrekht of Intel Corp.
-
-# March 2014.
-#
-# Add support for Intel SHA Extensions.
-
######################################################################
# Current performance is summarized in following table. Numbers are
# CPU clock cycles spent to process single byte (less is better).
#
-# x86_64 SSSE3 AVX[2]
-# P4 9.05 -
-# Opteron 6.26 -
-# Core2 6.55 6.05/+8% -
-# Westmere 6.73 5.30/+27% -
-# Sandy Bridge 7.70 6.10/+26% 4.99/+54%
-# Ivy Bridge 6.06 4.67/+30% 4.60/+32%
-# Haswell 5.45 4.15/+31% 3.57/+53%
-# Skylake 5.18 4.06/+28% 3.54/+46%
-# Bulldozer 9.11 5.95/+53%
-# VIA Nano 9.32 7.15/+30%
-# Atom 10.3 9.17/+12%
-# Silvermont 13.1(*) 9.37/+40%
-#
-# (*) obviously suboptimal result, nothing was done about it,
-# because SSSE3 code is compiled unconditionally;
+# x86_64 SSSE3 AVX
+# P4 9.8 -
+# Opteron 6.6 -
+# Core2 6.7 6.1/+10% -
+# Atom 11.0 9.7/+13% -
+# Westmere 7.1 5.6/+27% -
+# Sandy Bridge 7.9 6.3/+25% 5.2/+51%
$flavour = shift;
$output = shift;
@@ -93,27 +72,15 @@
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
-if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
- =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
- $avx = ($1>=2.19) + ($1>=2.22);
-}
-
-if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
- `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) {
- $avx = ($1>=2.09) + ($1>=2.10);
-}
-
-if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
- `ml64 2>&1` =~ /Version ([0-9]+)\./) {
- $avx = ($1>=10) + ($1>=11);
-}
-
-if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([2-9]\.[0-9]+)/) {
- $avx = ($2>=3.0) + ($2>3.0);
-}
-
-$shaext=1; ### set to zero if compiling for 1.0.1
-$avx=1 if (!$shaext && $avx);
+$avx=1 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+ =~ /GNU assembler version ([2-9]\.[0-9]+)/ &&
+ $1>=2.19);
+$avx=1 if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
+ `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/ &&
+ $1>=2.09);
+$avx=1 if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
+ `ml64 2>&1` =~ /Version ([0-9]+)\./ &&
+ $1>=10);
open OUT,"| \"$^X\" $xlate $flavour $output";
*STDOUT=*OUT;
@@ -130,7 +97,7 @@
$t0="%eax";
$t1="%ebx";
$t2="%ecx";
-@xi=("%edx","%ebp","%r14d");
+@xi=("%edx","%ebp");
$A="%esi";
$B="%edi";
$C="%r11d";
@@ -145,40 +112,42 @@
$code.=<<___ if ($i==0);
mov `4*$i`($inp),$xi[0]
bswap $xi[0]
+ mov $xi[0],`4*$i`(%rsp)
___
$code.=<<___ if ($i<15);
+ mov $c,$t0
mov `4*$j`($inp),$xi[1]
- mov $d,$t0
- mov $xi[0],`4*$i`(%rsp)
mov $a,$t2
+ xor $d,$t0
bswap $xi[1]
- xor $c,$t0
rol \$5,$t2
+ lea 0x5a827999($xi[0],$e),$e
and $b,$t0
- lea 0x5a827999($xi[0],$e),$e
+ mov $xi[1],`4*$j`(%rsp)
add $t2,$e
xor $d,$t0
rol \$30,$b
add $t0,$e
___
$code.=<<___ if ($i>=15);
- xor `4*($j%16)`(%rsp),$xi[1]
- mov $d,$t0
- mov $xi[0],`4*($i%16)`(%rsp)
+ mov `4*($j%16)`(%rsp),$xi[1]
+ mov $c,$t0
mov $a,$t2
xor `4*(($j+2)%16)`(%rsp),$xi[1]
- xor $c,$t0
+ xor $d,$t0
rol \$5,$t2
xor `4*(($j+8)%16)`(%rsp),$xi[1]
and $b,$t0
lea 0x5a827999($xi[0],$e),$e
- rol \$30,$b
+ xor `4*(($j+13)%16)`(%rsp),$xi[1]
xor $d,$t0
+ rol \$1,$xi[1]
add $t2,$e
- rol \$1,$xi[1]
+ rol \$30,$b
+ mov $xi[1],`4*($j%16)`(%rsp)
add $t0,$e
___
-push(@xi,shift(@xi));
+unshift(@xi,pop(@xi));
}
sub BODY_20_39 {
@@ -186,58 +155,62 @@
my $j=$i+1;
my $K=($i<40)?0x6ed9eba1:0xca62c1d6;
$code.=<<___ if ($i<79);
- xor `4*($j%16)`(%rsp),$xi[1]
- mov $b,$t0
- `"mov $xi[0],".4*($i%16)."(%rsp)" if ($i<72)`
+ mov `4*($j%16)`(%rsp),$xi[1]
+ mov $c,$t0
mov $a,$t2
xor `4*(($j+2)%16)`(%rsp),$xi[1]
- xor $d,$t0
+ xor $b,$t0
rol \$5,$t2
+ lea $K($xi[0],$e),$e
xor `4*(($j+8)%16)`(%rsp),$xi[1]
- lea $K($xi[0],$e),$e
- xor $c,$t0
+ xor $d,$t0
add $t2,$e
+ xor `4*(($j+13)%16)`(%rsp),$xi[1]
rol \$30,$b
add $t0,$e
rol \$1,$xi[1]
___
+$code.=<<___ if ($i<76);
+ mov $xi[1],`4*($j%16)`(%rsp)
+___
$code.=<<___ if ($i==79);
- mov $b,$t0
+ mov $c,$t0
mov $a,$t2
- xor $d,$t0
+ xor $b,$t0
lea $K($xi[0],$e),$e
rol \$5,$t2
- xor $c,$t0
+ xor $d,$t0
add $t2,$e
rol \$30,$b
add $t0,$e
___
-push(@xi,shift(@xi));
+unshift(@xi,pop(@xi));
}
sub BODY_40_59 {
my ($i,$a,$b,$c,$d,$e)=@_;
my $j=$i+1;
$code.=<<___;
- xor `4*($j%16)`(%rsp),$xi[1]
- mov $d,$t0
- mov $xi[0],`4*($i%16)`(%rsp)
- mov $d,$t1
+ mov `4*($j%16)`(%rsp),$xi[1]
+ mov $c,$t0
+ mov $c,$t1
xor `4*(($j+2)%16)`(%rsp),$xi[1]
- and $c,$t0
+ and $d,$t0
mov $a,$t2
xor `4*(($j+8)%16)`(%rsp),$xi[1]
+ xor $d,$t1
lea 0x8f1bbcdc($xi[0],$e),$e
- xor $c,$t1
rol \$5,$t2
+ xor `4*(($j+13)%16)`(%rsp),$xi[1]
add $t0,$e
- rol \$1,$xi[1]
and $b,$t1
- add $t2,$e
- rol \$30,$b
+ rol \$1,$xi[1]
add $t1,$e
+ rol \$30,$b
+ mov $xi[1],`4*($j%16)`(%rsp)
+ add $t2,$e
___
-push(@xi,shift(@xi));
+unshift(@xi,pop(@xi));
}
$code.=<<___;
@@ -248,45 +221,31 @@
.type sha1_block_data_order,\@function,3
.align 16
sha1_block_data_order:
- mov OPENSSL_ia32cap_P+0(%rip),%r9d
- mov OPENSSL_ia32cap_P+4(%rip),%r8d
- mov OPENSSL_ia32cap_P+8(%rip),%r10d
- test \$`1<<9`,%r8d # check SSSE3 bit
- jz .Lialu
-___
-$code.=<<___ if ($shaext);
- test \$`1<<29`,%r10d # check SHA bit
- jnz _shaext_shortcut
-___
-$code.=<<___ if ($avx>1);
- and \$`1<<3|1<<5|1<<8`,%r10d # check AVX2+BMI1+BMI2
- cmp \$`1<<3|1<<5|1<<8`,%r10d
- je _avx2_shortcut
+ mov OPENSSL_ia32cap_P+0(%rip),%r8
+ mov 4(%r8),%r8d
+ bt \$9,%r8d
+ jnc .Lialu
___
$code.=<<___ if ($avx);
- and \$`1<<28`,%r8d # mask AVX bit
- and \$`1<<30`,%r9d # mask "Intel CPU" bit
- or %r9d,%r8d
- cmp \$`1<<28|1<<30`,%r8d
- je _avx_shortcut
+ bt \$28,%r8d
+ jc _avx_shortcut
___
$code.=<<___;
jmp _ssse3_shortcut
.align 16
.Lialu:
- mov %rsp,%rax
push %rbx
push %rbp
push %r12
push %r13
- push %r14
+ mov %rsp,%r11
mov %rdi,$ctx # reassigned argument
sub \$`8+16*4`,%rsp
mov %rsi,$inp # reassigned argument
and \$-64,%rsp
mov %rdx,$num # reassigned argument
- mov %rax,`16*4`(%rsp)
+ mov %r11,`16*4`(%rsp)
.Lprologue:
mov 0($ctx),$A
@@ -320,187 +279,53 @@
jnz .Lloop
mov `16*4`(%rsp),%rsi
- mov -40(%rsi),%r14
Home |
Main Index |
Thread Index |
Old Index