Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/crypto/chacha/arch/arm Adjust sp, not fp, to allocate a ...
details: https://anonhg.NetBSD.org/src/rev/f56096fd61c7
branches: trunk
changeset: 1013253:f56096fd61c7
user: riastradh <riastradh%NetBSD.org@localhost>
date: Sun Aug 23 16:39:06 2020 +0000
description:
Adjust sp, not fp, to allocate a 32-byte temporary.
Costs another couple MOV instructions, but we can't skimp on this --
there's no red zone below sp for interrupts on arm, so we can't touch
anything there. So just use fp to save sp and then adjust sp itself,
rather than using fp as a temporary register to point just below sp.
Should fix PR port-arm/55598 -- previously the ChaCha self-test
failed 33/10000 trials triggered by sysctl during running system;
with the patch it has failed 0/10000 trials.
(Presumably it happened more often at boot time, leading to 5/26
failures in the test bed, because we just enabled interrupts and some
devices are starting to deliver interrupts.)
diffstat:
sys/crypto/chacha/arch/arm/chacha_neon_32.S | 36 ++++++++++++++++------------
1 files changed, 20 insertions(+), 16 deletions(-)
diffs (140 lines):
diff -r bc5e744c3730 -r f56096fd61c7 sys/crypto/chacha/arch/arm/chacha_neon_32.S
--- a/sys/crypto/chacha/arch/arm/chacha_neon_32.S Sun Aug 23 16:18:12 2020 +0000
+++ b/sys/crypto/chacha/arch/arm/chacha_neon_32.S Sun Aug 23 16:39:06 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: chacha_neon_32.S,v 1.3 2020/08/08 14:47:01 riastradh Exp $ */
+/* $NetBSD: chacha_neon_32.S,v 1.4 2020/08/23 16:39:06 riastradh Exp $ */
/*-
* Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
#include <machine/asm.h>
-RCSID("$NetBSD: chacha_neon_32.S,v 1.3 2020/08/08 14:47:01 riastradh Exp $")
+RCSID("$NetBSD: chacha_neon_32.S,v 1.4 2020/08/23 16:39:06 riastradh Exp $")
.fpu neon
@@ -54,7 +54,7 @@
*/
.macro ROUNDLD a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3, d0,d1,d2,d3
- vld1.8 {\c2-\c3}, [fp, :256]
+ vld1.8 {\c2-\c3}, [sp, :256]
.endm
.macro ROUND a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3, d0,d1,d2,d3, c0l, d0l,d0h,d1l,d1h,d2l,d2h,d3l,d3h
@@ -80,7 +80,7 @@
vadd.u32 \c2, \c2, \d2
vadd.u32 \c3, \c3, \d3
- vst1.8 {\c0-\c1}, [fp, :256] /* free c0 and c1 as temps */
+ vst1.8 {\c0-\c1}, [sp, :256] /* free c0 and c1 as temps */
veor \c0, \b0, \c0
veor \c1, \b1, \c1
@@ -118,7 +118,7 @@
vtbl.8 \d3l, {\d3l}, \c0l
vtbl.8 \d3h, {\d3h}, \c0l
- vld1.8 {\c0-\c1}, [fp, :256] /* restore c0 and c1 */
+ vld1.8 {\c0-\c1}, [sp, :256] /* restore c0 and c1 */
/* c += d; b ^= c; b <<<= 7 */
vadd.u32 \c2, \c2, \d2
@@ -126,7 +126,7 @@
vadd.u32 \c0, \c0, \d0
vadd.u32 \c1, \c1, \d1
- vst1.8 {\c2-\c3}, [fp, :256] /* free c2 and c3 as temps */
+ vst1.8 {\c2-\c3}, [sp, :256] /* free c2 and c3 as temps */
veor \c2, \b2, \c2
veor \c3, \b3, \c3
@@ -160,17 +160,18 @@
/* save callee-saves registers */
push {r4, r5, r6, r7, r8, r10, fp, lr}
vpush {d8-d15}
+ mov fp, sp
/* r7 := .Lconstants - .Lconstants_addr, r6 := .Lconstants_addr */
ldr r7, .Lconstants_addr
adr r6, .Lconstants_addr
/* reserve space for two 128-bit/16-byte q registers */
- sub fp, sp, #0x20
- bic fp, fp, #0x1f /* align */
+ sub sp, sp, #0x20
+ bic sp, sp, #0x1f /* align */
/* get parameters */
- add ip, sp, #96
+ add ip, fp, #96
add r7, r7, r6 /* r7 := .Lconstants (= v0123) */
ldm ip, {r4, r5} /* r4 := const, r5 := nr */
ldm r2, {r6, r8, r10} /* (r6, r8, r10) := nonce[0:12) */
@@ -311,7 +312,7 @@
vadd.u32 q3, q3, q8
vadd.u32 q7, q7, q8
- vld1.8 {q8-q9}, [fp, :256] /* restore q8-q9 */
+ vld1.8 {q8-q9}, [sp, :256] /* restore q8-q9 */
vst1.8 {q0-q1}, [r0]!
vld1.8 {q0}, [r3] /* q0 := key[16:32) */
@@ -354,9 +355,10 @@
/* zero temporary space on the stack */
vmov.i32 q0, #0
vmov.i32 q1, #0
- vst1.8 {q0-q1}, [fp, :256]
+ vst1.8 {q0-q1}, [sp, :256]
/* restore callee-saves registers and stack */
+ mov sp, fp
vpop {d8-d15}
pop {r4, r5, r6, r7, r8, r10, fp, lr}
bx lr
@@ -374,17 +376,18 @@
/* save callee-saves registers */
push {r4, r5, r6, r7, r8, r10, fp, lr}
vpush {d8-d15}
+ mov fp, sp
/* r7 := .Lconstants - .Lconstants_addr, r6 := .Lconstants_addr */
ldr r7, .Lconstants_addr
adr r6, .Lconstants_addr
/* reserve space for two 128-bit/16-byte q registers */
- sub fp, sp, #0x20
- bic fp, fp, #0x1f /* align */
+ sub sp, sp, #0x20
+ bic sp, sp, #0x1f /* align */
/* get parameters */
- add ip, sp, #96
+ add ip, fp, #96
add r7, r7, r6 /* r7 := .Lconstants (= v0123) */
ldm ip, {r4, r5, ip} /* r4 := key, r5 := const, ip := nr */
ldm r3, {r6, r8, r10} /* (r6, r8, r10) := nonce[0:12) */
@@ -475,7 +478,7 @@
veor q0, q0, q8 /* compute ciphertext bytes [0:32) */
veor q1, q1, q9
- vld1.8 {q8-q9}, [fp, :256] /* restore q8-q9 */
+ vld1.8 {q8-q9}, [sp, :256] /* restore q8-q9 */
vst1.8 {q0-q1}, [r0]! /* store ciphertext bytes [0:32) */
vld1.8 {q0}, [r4] /* q0 := key[16:32) */
@@ -552,9 +555,10 @@
/* zero temporary space on the stack */
vmov.i32 q0, #0
vmov.i32 q1, #0
- vst1.8 {q0-q1}, [fp, :256]
+ vst1.8 {q0-q1}, [sp, :256]
/* restore callee-saves registers and stack */
+ mov sp, fp
vpop {d8-d15}
pop {r4, r5, r6, r7, r8, r10, fp, lr}
bx lr
Home |
Main Index |
Thread Index |
Old Index