Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/arm/arm Make this work. Various fixes and some fur...
details: https://anonhg.NetBSD.org/src/rev/517b9616110b
branches: trunk
changeset: 783421:517b9616110b
user: matt <matt%NetBSD.org@localhost>
date: Thu Dec 20 07:18:33 2012 +0000
description:
Make this work. Various fixes and some further optimizations.
diffstat:
sys/arch/arm/arm/cpu_in_cksum_buffer.S | 85 ++++++++++++++++++++++++++--------
1 files changed, 65 insertions(+), 20 deletions(-)
diffs (155 lines):
diff -r 6aece91e6994 -r 517b9616110b sys/arch/arm/arm/cpu_in_cksum_buffer.S
--- a/sys/arch/arm/arm/cpu_in_cksum_buffer.S Thu Dec 20 07:16:00 2012 +0000
+++ b/sys/arch/arm/arm/cpu_in_cksum_buffer.S Thu Dec 20 07:18:33 2012 +0000
@@ -29,7 +29,7 @@
#include <machine/asm.h>
-RCSID("$NetBSD: cpu_in_cksum_buffer.S,v 1.1 2012/12/19 15:05:16 matt Exp $")
+RCSID("$NetBSD: cpu_in_cksum_buffer.S,v 1.2 2012/12/20 07:18:33 matt Exp $")
/*
* Special note:
@@ -66,7 +66,7 @@
#ifndef __OPTIMIZE_SIZE__
rsb r3, r3, #64 /* subtract from 64 */
#ifdef _ARM_ARCH_DWORD_OK
- add r3, r3, r1, lsr #1 /* multiply by 1.5 */
+ add r3, r3, r3, lsr #1 /* multiply by 1.5 */
add pc, pc, r3 /* and jump! */
#else
add pc, pc, r3, lsl #1 /* multiply by 2 and jump! */
@@ -97,6 +97,7 @@
LOAD_DWORD_INTO_R4(r0) /* 1 dword left */
.Ladd_one_dword:
adcs ip, ip, r4
+.Ladd_one_word:
adcs ip, ip, r5
teq r2, r0 /* nothing left? */
beq .Lfold /* yep, proceed to hold */
@@ -107,13 +108,14 @@
bne 4b /* yep, do 64 at time */
#endif
bics r3, r1, #7 /* at least 8 bytes left? */
- bge 3b /* yep, do them */
+ bne 3b /* yep, do them */
.Lfinal_dword:
- tst r1, #4 /* more than one word more left? */
+ sub r3, r1, #1 /* 0-3 = 1 word, 4-7 = 2 words */
+ tst r3, #4 /* more than one word more left? */
moveq r4, #0 /* no, just use zero */
- ldrne r4, [r0], #4 /* yes, load first word */
- ldr r5, [r0] /* load last word */
+ ldreq r5, [r0] /* no, load last word */
+ ldmneia r0, {r4-r5} /* yes, load last dword */
.Lfinal_dword_noload:
rsb r1, r1, #4 /* find out many bytes to discard */
#ifdef __ARMEL__
@@ -127,7 +129,9 @@
tst r1, #1 /* discard odd? */
bicne r5, r5, #0x000000ff /* yes, discard odd byte */
#endif
- adds ip, ip, r4 /* add 1st to accumulator */
+.Lfinal_add_one_dword:
+ adcs ip, ip, r4 /* add 1st to accumulator */
+.Lfinal_add_one_word:
adcs ip, ip, r5 /* add 2nd to accumulator */
/*
@@ -143,33 +147,58 @@
#include "cpu_in_cksum_fold.S"
.Ldword_misaligned:
+ tst r0, #3 /* are at least word aligned? */
+ bne .Lword_misaligned /* no, do it the hard way */
+ ldr r5, [r0], #4 /* load word here in case of partial */
+ sub r1, r1, #4 /* subtract length of one word */
+ teq r1, #0 /* what is length? */
+ beq .Lfinal_add_one_word /* = 0? just do the final add */
+ addgt r2, r1, r0 /* > 0? point r2 just past end */
+ bgt .Ladd_one_word /* > 0? accumulate it and loop */
+ mov r4, #0 /* < 0? zero this */
+ b .Lfinal_dword_noload /* < 0? handle final partial dword */
+
+.Lword_misaligned:
+ tst r0, #4 /* do we load 1 or 2 words? */
bic r0, r0, #3 /* force word alignment */
- add r1, r1, r2 /* add misalignment to length */
- tst r2, #4 /* first */
- ldr r4, [r0], #4 /* load first word */
+ add r1, r1, r2 /* add initial offset to length */
+ sub r1, r1, #8 /* subtract length of one dword */
+ ldmeqia r0!, {r4-r5} /* load first dword */
+ ldrne r4, [r0], #4 /* load first word */
movne r5, #0 /* no second word */
- ldreq r5, [r0], #4 /* load second word */
/*
* We are now dword aligned.
*/
#ifdef __ARMEL__
tst r2, #2 /* discard at least 2? */
movne r4, r4, lsr #16 /* yes, discard lower halfword */
- tst r2, #1 /* discard odd? */
- bicne r4, r4, #0x0000ff00 /* yes, discard odd byte */
+ tst r2, #1 /* start odd? */
+ bicne r4, r4, #0x000000ff /* yes, discard even byte */
#else
tst r2, #2 /* discard at least 2? */
movne r4, r4, lsl #16 /* yes, discard upper halfword */
- tst r2, #1 /* discard odd? */
- bicne r4, r4, #0x00ff0000 /* yes, discard odd byte */
+ tst r2, #1 /* start odd? */
+ bicne r4, r4, #0xff000000 /* yes, discard even byte */
#endif
/*
+ * Since we started on an odd boundary, set up our stack frame so we
+ * fixup the return value to be byteswapped.
+ */
+ ldrne r3, [sp, #4] /* pop r5 */
+ strne r3, [sp, #-4]! /* push it again */
+ ldrne r3, [sp, #4] /* pop r4 */
+ strne r3, [sp, #-4]! /* push it again */
+ strne lr, [sp, #8] /* save our return address */
+ adrne lr, .Lmisaligned_fixup /* use new to fixup the return value */
+ /*
* See if we have a least a full dword to process. If we do, jump
* into the main loop as if we just load a single dword.
*/
- bics r3, r1, #7 /* at least one dword? */
- addne r2, r1, r0 /* yes, point r2 just past end */
- bne .Ladd_one_dword /* yes, accumulate it and loop */
+ teq r1, #0 /* what is length? */
+ beq .Lfinal_add_one_word /* = 0? just do the final add */
+ addgt r2, r1, r0 /* > 0? point r2 just past end */
+ bgt .Ladd_one_dword /* > 0? accumulate it and loop */
+
/*
* Not a full dword so do the final dword processing to find out
* bytes to discard. If we only loaded one word, move it to 2nd
@@ -177,7 +206,23 @@
* clear the 1st word.
*/
tst r2, #4 /* one or two words? */
- movne r5, r4 /* one, move 1st word to 2nd word */
- movne r4, #0 /* and clear 1st word */
+ moveq r5, r4 /* one, move 1st word to 2nd word */
+ moveq r4, #0 /* and clear 1st word */
b .Lfinal_dword_noload /* handle final dword */
+
+ /*
+ * If we had an odd address, we have byte swap the return value.
+ * instead of testing everywhere, we inserted a fake callframe and
+ * set LR to return to do the fixup and return to the caller.
+ */
+.Lmisaligned_fixup:
+ ldr lr, [sp], #8 /* fetch saved LR */
+#ifdef _ARM_ARCH_6
+ rev16 r0, r0 /* byte swap */
+#else
+ mov r0, r0, r0, ror #8 /* move 0:7 to 24:31 and 8:15 to 0:7 */
+ orr r0, r0, r0, lsl #16 /* move 0:7 to 16:23 */
+ mov r0, r0, r0, lsr #16 /* clear 16:31 to 0:15 */
+#endif
+ RET
END(cpu_in_cksum_buffer)
Home |
Main Index |
Thread Index |
Old Index