Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/common/lib/libc/arch/arm/string Slighly improved (can deal w...
details: https://anonhg.NetBSD.org/src/rev/bfd0fcbe1e3f
branches: trunk
changeset: 783341:bfd0fcbe1e3f
user: matt <matt%NetBSD.org@localhost>
date: Sat Dec 15 22:23:31 2012 +0000
description:
Slighly improved (can deal with all 16 bytes being non-NUL and quickly
proceed to next qword).
diffstat:
common/lib/libc/arch/arm/string/strlen_neon.S | 36 +++++++++++++++------------
1 files changed, 20 insertions(+), 16 deletions(-)
diffs (61 lines):
diff -r a1aa781491fa -r bfd0fcbe1e3f common/lib/libc/arch/arm/string/strlen_neon.S
--- a/common/lib/libc/arch/arm/string/strlen_neon.S Sat Dec 15 21:50:43 2012 +0000
+++ b/common/lib/libc/arch/arm/string/strlen_neon.S Sat Dec 15 22:23:31 2012 +0000
@@ -29,7 +29,7 @@
#include <machine/asm.h>
-RCSID("$NetBSD: strlen_neon.S,v 1.1 2012/12/15 19:26:34 matt Exp $")
+RCSID("$NetBSD: strlen_neon.S,v 1.2 2012/12/15 22:23:31 matt Exp $")
.text
ENTRY(strlen)
@@ -39,6 +39,9 @@
veor q2, q2, q2 /* clear mask */
mov r3, #7 /* NBBY - 1 */
vdup.32 q3, r3 /* dup throughout q3 */
+ mov r3, #0x04 /* magic since there are 4 bytes per U32 */
+ orr r3, r3, lsl #8 /* copy to next 8 bits */
+ orr r3, r3, lsl #16 /* copy to upper 16 bits */
beq .Lmain_loop
veor q0, q0, q0 /* clear q0 */
vmvn q2, q2 /* set all 16 bytes of mask to all 1s */
@@ -64,22 +67,23 @@
vorr q0, q0, q2 /* or "in" leading byte mask */
veor q2, q2, q2 /* clear byte mask */
vceq.i8 q1, q0, #0 /* test each byte for 0 */
+ /* Why couldn't there be a 64-bit CLZ? */
vclz.i32 q1, q1 /* count leading zeroes to find the 0 byte */
vadd.i32 q1, q1, q3 /* round up to byte bounary */
vshr.u32 q1, q1, #3 /* convert to bytes */
- vmov r2, r3, d3 /* get lo & hi counts */
- add r0, r0, r3 /* add bytes to count */
- cmp r3, #4 /* less than 4 means a NUL encountered */
- bxlt lr /* return */
- add r0, r0, r2 /* add bytes to count */
- cmp r2, #4 /* less than 4 means a NUL encountered */
- bxlt lr /* return */
- vmov r2, r3, d2 /* get lo & hi counts */
- add r0, r0, r3 /* add bytes to count */
- cmp r3, #4 /* less than 4 means a NUL encountered */
- bxlt lr /* return */
- add r0, r0, r2 /* add bytes to count */
- cmp r2, #4 /* less than 4 means a NUL encountered */
- bxlt lr /* return */
- b .Lmain_loop
+ vmovn.i32 d0, q1 /* 4 I32 -> 4 I16 */
+ vmovn.i16 d0, q0 /* 4 I16 -> 4 I8 */
+ vmov r2, s0 /* get counts */
+ cmp r2, r3 /* count eq 4 in each byte? */
+ addeq r0, #16 /* no NULs */
+ beq .Lmain_loop /* get next qword */
+ /* r2[31:24] already has 1st word byte count */
+ tst r2, #(4 << 24) /* first word has 4 non-NUL? */
+ addne r2, r2, r2, lsl #8 /* add second word byte-count */
+ tstne r2, #(4 << 16) /* second word has 4 non-NUL? */
+ addne r2, r2, r2, lsl #16 /* add thirs word byte-count */
+ tstne r2, #(4 << 8) /* third has 4 non-NULL? */
+ addne r2, r2, r2, lsl #24 /* add fourth word byte-count */
+ add r0, r0, r2, lsr #24 /* add accumulated byte-count to length */
+ RET /* and return. */
END(strlen)
Home |
Main Index |
Thread Index |
Old Index