Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/crypto/external/bsd/openssl/lib/libcrypto/arch/m68k Implemen...
details: https://anonhg.NetBSD.org/src/rev/21c06369dbb7
branches: trunk
changeset: 351216:21c06369dbb7
user: isaki <isaki%NetBSD.org@localhost>
date: Tue Feb 07 11:18:43 2017 +0000
description:
Implement m68k assembly version of AES.
It's approx 1.4 times faster than the original one.
diffstat:
crypto/external/bsd/openssl/lib/libcrypto/arch/m68k/aes-m68k.S | 1745 ++++++++++
crypto/external/bsd/openssl/lib/libcrypto/arch/m68k/aes.inc | 4 +
2 files changed, 1749 insertions(+), 0 deletions(-)
diffs (truncated from 1757 to 300 lines):
diff -r f02f8e67e832 -r 21c06369dbb7 crypto/external/bsd/openssl/lib/libcrypto/arch/m68k/aes-m68k.S
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/m68k/aes-m68k.S Tue Feb 07 11:18:43 2017 +0000
@@ -0,0 +1,1745 @@
+| $NetBSD: aes-m68k.S,v 1.1 2017/02/07 11:18:43 isaki Exp $
+
+| Copyright (C) 2016 Tetsuya Isaki. All rights reserved.
+| Copyright (C) 2016 Y.Sugahara (moveccr). All rights reserved.
+|
+| Redistribution and use in source and binary forms, with or without
+| modification, are permitted provided that the following conditions
+| are met:
+| 1. Redistributions of source code must retain the above copyright
+| notice, this list of conditions and the following disclaimer.
+| 2. Redistributions in binary form must reproduce the above copyright
+| notice, this list of conditions and the following disclaimer in the
+| documentation and/or other materials provided with the distribution.
+|
+| THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+| IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+| OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+| IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+| INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+| BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+| LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+| AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+| OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+| SUCH DAMAGE.
+
+#define rd_key (0)
+#define rounds (60 * 4)
+
+| int
+| private_AES_set_encrypt_key(const unsigned char *userKey,
+| const int bits,
+| AES_KEY *key)
+.global private_AES_set_encrypt_key
+private_AES_set_encrypt_key:
+_private_AES_set_encrypt_key:
+ moveml %d2-%d7/%a2-%a6,%sp@-
+ moveal %sp@(44+4),%a0 | userKey
+ moveal %sp@(44+8),%a1 | bits
+ moveal %sp@(44+12),%a3 | key
+
+ tstl %a0
+ beq pek_return1 | return -1 if userKey == NULL
+ tstl %a3
+ beq pek_return1 | return -1 if key == NULL
+
+ cmpaw #128,%a1
+ bne pek192_check | unless bits == 128
+pek128:
+ | %d0-%d7 rk[0..7]
+ | %a0 userKey, Te0
+ | %a1 Te1
+ | %a2 Te2
+ | %a3 key, Te3
+ | %a4 rcon_byte
+ | %a5 &rk[4]
+ | %a6 end of rcon_byte
+
+ moveq #10,%d0
+ movel %d0,%a3@(rounds) | key->rounds = 10
+
+ lea %a3@(rd_key),%a5 | &rk[0]
+
+ | rk[0] = GETU32(userKey );
+ | rk[1] = GETU32(userKey + 4);
+ | rk[2] = GETU32(userKey + 8);
+ | rk[3] = GETU32(userKey + 12);
+ moveml %a0@,%d0-%d3
+ movel %d0,%a5@+
+ movel %d1,%a5@+
+ movel %d2,%a5@+
+ movel %d3,%a5@+
+
+ lea %pc@(Te0),%a0 | %a0 = Te0
+ lea %a0@(256*4),%a1 | %a1 = Te1
+ lea %a1@(256*4),%a2 | %a2 = Te2
+ lea %a2@(256*4),%a3 | %a3 = Te3
+
+ moveq #0,%d7
+ lea %pc@(rcon_byte),%a4
+ lea %a4@(10),%a6
+
+pek128_loop:
+ | d6 consists of four Te index bytes
+ movel %d3,%d5 | d5=rk[3] as {1,2,3,0}
+ moveb %d5,%d7 | d7=temp
+ moveb %a0@(2,%d7:w:4),%d4 | d4=$xxxxxx00
+ swap %d5 | d5={3,0,1,2}
+ lsll #8,%d4 | d4=$xxxx00xx
+ moveb %d5,%d7 | d7=temp>>16
+ moveb %a2@(0,%d7:w:4),%d6 | d6=$xxxxxx22
+ lsrl #8,%d5 | d5={x,3,0,1}
+ moveb %a4@+,%d7 | LSByte ^= rcon[i]
+ eorb %d7,%d6
+ lsll #8,%d6 | d6=$xxxx22xx
+ moveb %d5,%d7 | d7=temp>>24
+ moveb %a1@(3,%d7:w:4),%d4 | d4=$xxxx0011
+ swap %d5 | d5={0,1,x,3}
+ moveb %d5,%d7 | d7=temp>>8
+ moveb %a3@(1,%d7:w:4),%d6 | d6=$xxxx2233
+ swap %d6 | d6=$2233xxxx
+ movew %d4,%d6 | d6=$22330011
+
+ eorl %d6,%d0 | rk[4] = rk[0]^ (Te..)
+ movel %d0,%a5@+
+ eorl %d0,%d1 | rk[5] = rk[1] ^ rk[4];
+ movel %d1,%a5@+
+ eorl %d1,%d2 | rk[6] = rk[2] ^ rk[5];
+ movel %d2,%a5@+
+ eorl %d2,%d3 | rk[7] = rk[3] ^ rk[6];
+ movel %d3,%a5@+
+
+ cmpal %a4,%a6
+ bne pek128_loop |if (++i == 10) return 0;
+
+pek_return0:
+ moveql #0,%d0
+pek_return:
+ moveml %sp@+,%d2-%d7/%a2-%a6
+ rts
+
+pek192_check:
+ | %a0 userKey
+ | %a1 bits
+ | %a3 key
+ cmpaw #192,%a1
+ bne pek256_check | unless bits == 192
+pek192:
+ | %a0 Te0
+ | %a1 Te1
+ | %a2 Te2
+ | %a3 Te3
+ | %a4 rcon_byte
+ | %a5 &rk[6]
+ | %a6 end of rcon_byte
+ moveq #12,%d0
+ movel %d0,%a3@(rounds) | key->rounds = 12
+
+ lea %a3@(rd_key),%a5 | &rk[0]
+
+ | rk[0] = GETU32(userKey );
+ | rk[1] = GETU32(userKey + 4);
+ | rk[2] = GETU32(userKey + 8);
+ | rk[3] = GETU32(userKey + 12);
+ | rk[4] = GETU32(userKey + 16);
+ | rk[5] = GETU32(userKey + 20);
+ moveml %a0@,%d0-%d5 | copy userKey[0..23]
+ movel %d0,%a5@+
+ movel %d1,%a5@+
+ movel %d2,%a5@+
+ movel %d3,%a5@+
+ movel %d4,%a5@+
+ movel %d5,%a5@ | read again later
+
+ lea %pc@(Te0 +2),%a0 | %a0 = Te0 + 2
+ lea %a0@(256*4 -2+3),%a1 | %a1 = Te1 + 3
+ lea %a1@(256*4 -3+0),%a2 | %a2 = Te2 + 0
+ lea %a2@(256*4 +0+1),%a3 | %a3 = Te3 + 1
+
+ moveq #0,%d7
+ lea %pc@(rcon_byte),%a4
+ lea %a4@(8),%a6
+ bra pek192_loop_start
+
+pek192_loop:
+ eorl %d3,%d4 | rk[10] = rk[4] ^ rk[9]
+ movel %d4,%a5@+ | %d4 is rk[10]
+ eorl %d4,%d5 | rk[11] = rk[5] ^ rk[10]
+ movel %d5,%a5@ | %d5 is rk[11]
+
+pek192_loop_start:
+ | %d0..%d5 = rk[0..5]
+ |
+ | temp = rk[5];
+ | rk[6] = (Te0[(temp ) & 0xff] & 0x0000ff00)
+ | rk[6] |= (Te3[(temp >> 8) & 0xff] & 0x00ff0000)
+ | rk[6] |= (Te2[(temp >> 16) & 0xff] & 0xff000000)
+ | ^ rcon[i]
+ | rk[6] |= (Te1[(temp >> 24) ] & 0x000000ff)
+ moveb %d5,%d7 | temp >> 0
+ moveb %a0@(%d7:w:4),%d6 | d6=$xxxxxx00
+ rorl #8,%d6 | d6=$00xxxxxx
+ lsrl #8,%d5 | temp >> 8
+ moveb %d5,%d7
+ moveb %a3@(%d7:w:4),%d6 | d6=$00xxxx11
+ rorl #8,%d6 | d6=$1100xxxx
+ lsrl #8,%d5 | temp >> 16
+ moveb %d5,%d7
+ moveb %a2@(%d7:w:4),%d6 | d6=$1100xx22
+ moveb %a4@+,%d7 | LSByte ^= rcon[i]
+ eorb %d7,%d6
+ rorl #8,%d6 | d6=$221100xx
+ lsrl #8,%d5 | temp >> 24
+ moveb %a1@(%d5:w:4),%d6 | d6=$22110033
+
+ movel %a5@+,%d5 | read rk[5] again
+ | (faster than keeping %a5)
+
+ eorl %d6,%d0 | rk[6] ^= rk[0]
+ movel %d0,%a5@+ | %d0 is rk[6]
+ eorl %d0,%d1 | rk[7] = rk[1] ^ rk[6]
+ movel %d1,%a5@+ | %d1 is rk[7]
+ eorl %d1,%d2 | rk[8] = rk[2] ^ rk[7]
+ movel %d2,%a5@+ | %d2 is rk[8]
+ eorl %d2,%d3 | rk[9] = rk[3] ^ rk[8]
+ movel %d3,%a5@+ | %d3 is rk[9]
+
+ cmpal %a4,%a6
+ bne pek192_loop
+ bra pek_return0
+
+
+pek256_check:
+ | %a0 userKey
+ | %a1 bits
+ | %a3 key
+ cmpaw #256,%a1
+ bne pek_return2 | otherwise return -2
+pek256:
+ | %a0 Te0
+ | %a1 Te1
+ | %a2 Te2
+ | %a3 Te3
+ | %a4 rcon_byte
+ | %a5 &rk[6]
+ | %a6 end of rcon_byte
+ moveq #14,%d0
+ movel %d0,%a3@(rounds) | key->rounds = 14
+
+ lea %a3@(rd_key),%a5 | &rk[0]
+
+ | rk[0] = GETU32(userKey );
+ | rk[1] = GETU32(userKey + 4);
+ | rk[2] = GETU32(userKey + 8);
+ | rk[3] = GETU32(userKey + 12);
+ | rk[4] = GETU32(userKey + 16);
+ | rk[5] = GETU32(userKey + 20);
+ | rk[6] = GETU32(userKey + 24);
+ | rk[7] = GETU32(userKey + 28);
+ moveml %a0@,%d0-%d7 | copy userKey[0..31]
+ movel %d0,%a5@+
+ movel %d1,%a5@+
+ movel %d2,%a5@+
+ movel %d3,%a5@+
+ movel %d4,%a5@+
+ movel %d5,%a5@+
+ movel %d6,%a5@+
+ movel %d7,%a5@+
+
+ lea %pc@(Te0 +2),%a0 | %a0 = Te0 + 2
+ lea %a0@(256*4 -2+3),%a1 | %a1 = Te1 + 3
+ lea %a1@(256*4 -3+0),%a2 | %a2 = Te2 + 0
+ lea %a2@(256*4 +0+1),%a3 | %a3 = Te3 + 1
+
+ lea %pc@(rcon_byte),%a4
+ lea %a4@(7),%a6
+ bra pek256_loop_start
+
+pek256_loop:
+ | %d0: rk[8] -> work
+ | %d1: rk[9] -> work
+ | %d2: rk[10]
+ | %d3: rk[11] -> work
+ | %d4: rk[4]
+ | %d5: work -> rk[5]
+ | %d6: work -> rk[6]
+ | %d7: work -> rk[7]
+ | %a5 = &rk[12]
+ |
+ | temp = rk[11];
+ | rk[12] = (Te1[(temp ) & 0xff] & 0x000000ff);
+ | rk[12] |= (Te0[(temp >> 8) & 0xff] & 0x0000ff00);
+ | rk[12] |= (Te3[(temp >> 16) & 0xff] & 0x00ff0000);
+ | rk[12] |= (Te2[(temp >> 24) ] & 0xff000000);
+ | rk[12] ^= rk[ 4];
+
+ moveml %a5@(-7*4),%d5-%d7 | %d5..%d7 = rk[5..7]
+
+ moveq #0,%d0
+ moveb %d3,%d0 | temp >> 0
+ moveb %a1@(%d0:w:4),%d1 | d1=$xxxxxx00
+ rorl #8,%d1 | d1=$00xxxxxx
+ lsrl #8,%d3 | temp >> 8
+ moveb %d3,%d0
+ moveb %a0@(%d0:w:4),%d1 | d1=$00xxxx11
+ rorl #8,%d1 | d1=$1100xxxx
+ lsrl #8,%d3 | temp >> 16
+ moveb %d3,%d0
+ moveb %a3@(%d0:w:4),%d1 | d1=$1100xx22
+ rorl #8,%d1 | d1=$221100xx
+ lsrl #8,%d3 | temp >> 24
+ moveb %a2@(%d3:w:4),%d1 | d1=$22110033
+ rorl #8,%d1 | d1=$33221100
+
+ eorl %d1,%d4 | rk[12] ^= rk[4]
+ movel %d4,%a5@+ | %d4 is rk[12]
Home |
Main Index |
Thread Index |
Old Index