Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64 Fix a...
details: https://anonhg.NetBSD.org/src/rev/47affd787197
branches: trunk
changeset: 327226:47affd787197
user: nakayama <nakayama%NetBSD.org@localhost>
date: Sun Mar 02 08:50:34 2014 +0000
description:
Fix assembler code generation: pass option -m64 properly, and
generate more code.
diffstat:
crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/Makefile | 8 +-
crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/des_enc-sparc.S | 5280 ++++++++++
crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/ghash-sparcv9.S | 6 +-
crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/sha1-sparcv9.S | 6 +-
crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/sha1-sparcv9a.S | 8 +-
crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/sha512-sparcv9.S | 4041 ++++---
6 files changed, 7510 insertions(+), 1839 deletions(-)
diffs (truncated from 9460 to 300 lines):
diff -r 68faef02ff8e -r 47affd787197 crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/Makefile
--- a/crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/Makefile Sun Mar 02 08:20:09 2014 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/Makefile Sun Mar 02 08:50:34 2014 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.3 2012/07/31 10:33:45 christos Exp $
+# $NetBSD: Makefile,v 1.4 2014/03/02 08:50:34 nakayama Exp $
.include "bsd.own.mk"
@@ -9,8 +9,12 @@
for i in $$(find ${OPENSSLSRC} -name \*sparcv9\*.pl); do \
j=$$(basename $$i .pl).S; \
case $$j in \
- ghash*|sha*) perl $$i > $$j;; \
+ ghash*|sha*) perl $$i $$j -m64;; \
*) perl $$i -m64 > $$j;; \
esac; \
done
+ #cp ${OPENSSLSRC}/crypto/bn/asm/sparcv8plus.S bn-sparcv9.S
+ m4 ${OPENSSLSRC}/crypto/des/asm/des_enc.m4 | \
+ sed 's,OPENSSL_SYSNAME_ULTRASPARC,__sparc_v9__,g' | \
+ sed 's,\.PIC\.DES_SPtrans,_PIC_DES_SPtrans,g' > des_enc-sparc.S
foo:
diff -r 68faef02ff8e -r 47affd787197 crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/des_enc-sparc.S
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/des_enc-sparc.S Sun Mar 02 08:50:34 2014 +0000
@@ -0,0 +1,5280 @@
+! des_enc.m4
+! des_enc.S (generated from des_enc.m4)
+!
+! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file.
+!
+! Version 1.0. 32-bit version.
+!
+! June 8, 2000.
+!
+! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation
+! by Andy Polyakov.
+!
+! January 1, 2003.
+!
+! Assembler version: Copyright Svend Olaf Mikkelsen.
+!
+! Original C code: Copyright Eric A. Young.
+!
+! This code can be freely used by LibDES/SSLeay/OpenSSL users.
+!
+! The LibDES/SSLeay/OpenSSL copyright notices must be respected.
+!
+! This version can be redistributed.
+!
+! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
+!
+! Global registers 1 to 5 are used. This is the same as done by the
+! cc compiler. The UltraSPARC load/store little endian feature is used.
+!
+! Instruction grouping often refers to one CPU cycle.
+!
+! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
+!
+! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
+!
+! Performance improvement according to './apps/openssl speed des'
+!
+! 32-bit build:
+! 23% faster than cc-5.2 -xarch=v8plus -xO5
+! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
+! 64-bit build:
+! 50% faster than cc-5.2 -xarch=v9 -xO5
+! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
+!
+
+.ident "des_enc.m4 2.1"
+.file "des_enc-sparc.S"
+
+#if defined(__SUNPRO_C) && defined(__sparcv9)
+# define ABI64 /* They've said -xarch=v9 at command line */
+#elif defined(__GNUC__) && defined(__arch64__)
+# define ABI64 /* They've said -m64 at command line */
+#endif
+
+#ifdef ABI64
+ .register %g2,#scratch
+ .register %g3,#scratch
+# define FRAME -192
+# define BIAS 2047
+# define LDPTR ldx
+# define STPTR stx
+# define ARG0 128
+# define ARGSZ 8
+# ifndef __sparc_v9__
+# define __sparc_v9__
+# endif
+#else
+# define FRAME -96
+# define BIAS 0
+# define LDPTR ld
+# define STPTR st
+# define ARG0 68
+# define ARGSZ 4
+#endif
+
+#define LOOPS 7
+
+#define global0 %g0
+#define global1 %g1
+#define global2 %g2
+#define global3 %g3
+#define global4 %g4
+#define global5 %g5
+
+#define local0 %l0
+#define local1 %l1
+#define local2 %l2
+#define local3 %l3
+#define local4 %l4
+#define local5 %l5
+#define local7 %l6
+#define local6 %l7
+
+#define in0 %i0
+#define in1 %i1
+#define in2 %i2
+#define in3 %i3
+#define in4 %i4
+#define in5 %i5
+#define in6 %i6
+#define in7 %i7
+
+#define out0 %o0
+#define out1 %o1
+#define out2 %o2
+#define out3 %o3
+#define out4 %o4
+#define out5 %o5
+#define out6 %o6
+#define out7 %o7
+
+#define stub stb
+
+
+
+
+! Macro definitions:
+
+
+! ip_macro
+!
+! The logic used in initial and final permutations is the same as in
+! the C code. The permutations are done with a clever , xor, and
+! technique.
+!
+! The macro also loads address sbox 1 to 5 to global 1 to 5, address
+! sbox 6 to local6, and addres sbox 8 to out3.
+!
+! Rotates the halfs 3 left to bring the sbox bits in convenient positions.
+!
+! Loads key first round from address in parameter 5 to out0, out1.
+!
+! After the the original LibDES initial permutation, the resulting left
+! is in the variable initially used for right and vice versa. The macro
+! implements the possibility to keep the halfs in the original registers.
+!
+! parameter 1 left
+! parameter 2 right
+! parameter 3 result left (modify in first round)
+! parameter 4 result right (use in first round)
+! parameter 5 key address
+! parameter 6 1/2 for include encryption/decryption
+! parameter 7 1 for move in1 to in3
+! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
+! parameter 9 1 for load ks3 and ks2 to in4 and in3
+
+
+
+
+! rounds_macro
+!
+! The logic used in the DES rounds is the same as in the C code,
+! except that calculations for sbox 1 and sbox 5 begin before
+! the previous round is finished.
+!
+! In each round one half (work) is modified based on key and the
+! other half (use).
+!
+! In this version we do two rounds in a loop repeated 7 times
+! and two rounds seperately.
+!
+! One half has the bits for the sboxes in the following positions:
+!
+! 777777xx555555xx333333xx111111xx
+!
+! 88xx666666xx444444xx222222xx8888
+!
+! The bits for each sbox are xor-ed with the key bits for that box.
+! The above xx bits are cleared, and the result used for lookup in
+! the sbox table. Each sbox entry contains the 4 output bits permuted
+! into 32 bits according to the P permutation.
+!
+! In the description of DES, left and right are switched after
+! each round, except after last round. In this code the original
+! left and right are kept in the same register in all rounds, meaning
+! that after the 16 rounds the result for right is in the register
+! originally used for left.
+!
+! parameter 1 first work (left in first round)
+! parameter 2 first use (right in first round)
+! parameter 3 enc/dec 1/-1
+! parameter 4 loop label
+! parameter 5 key address register
+! parameter 6 optional address for key next encryption/decryption
+! parameter 7 not empty for include retl
+!
+! also compares in2 to 8
+
+
+
+
+! fp_macro
+!
+! parameter 1 right (original left)
+! parameter 2 left (original right)
+! parameter 3 1 for optional store to [in0]
+! parameter 4 1 for load input/output address to local5/7
+!
+! The final permutation logic switches the halfes, meaning that
+! left and right ends up the the registers originally used.
+
+
+
+
+! fp_ip_macro
+!
+! Does initial permutation for next block mixed with
+! final permutation for current block.
+!
+! parameter 1 original left
+! parameter 2 original right
+! parameter 3 left ip
+! parameter 4 right ip
+! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
+! 2: mov in4 to in3
+!
+! also adds -8 to length in2 and loads loop counter to out4
+
+
+
+
+
+! load_little_endian
+!
+! parameter 1 address
+! parameter 2 destination left
+! parameter 3 destination right
+! parameter 4 temporar
+! parameter 5 label
+
+
+
+
+! load_little_endian_inc
+!
+! parameter 1 address
+! parameter 2 destination left
+! parameter 3 destination right
+! parameter 4 temporar
+! parameter 4 label
+!
+! adds 8 to address
+
+
+
+
+! load_n_bytes
+!
+! Loads 1 to 7 bytes little endian
+! Remaining bytes are zeroed.
+!
+! parameter 1 address
+! parameter 2 length
+! parameter 3 destination register left
+! parameter 4 destination register right
+! parameter 5 temp
+! parameter 6 temp2
+! parameter 7 label
+! parameter 8 return label
+
+
+
+
+! store_little_endian
+!
+! parameter 1 address
+! parameter 2 source left
+! parameter 3 source right
+! parameter 4 temporar
+
+
+
+
Home |
Main Index |
Thread Index |
Old Index