Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/lib/libc/arch/sparc64/string Improved version of bzero.
details: https://anonhg.NetBSD.org/src/rev/c2682a289e35
branches: trunk
changeset: 513486:c2682a289e35
user: eeh <eeh%NetBSD.org@localhost>
date: Thu Aug 02 01:17:28 2001 +0000
description:
Improved version of bzero.
diffstat:
lib/libc/arch/sparc64/string/memset.S | 177 +++++++++++----------------------
1 files changed, 60 insertions(+), 117 deletions(-)
diffs (260 lines):
diff -r 3e155412603d -r c2682a289e35 lib/libc/arch/sparc64/string/memset.S
--- a/lib/libc/arch/sparc64/string/memset.S Wed Aug 01 20:54:16 2001 +0000
+++ b/lib/libc/arch/sparc64/string/memset.S Thu Aug 02 01:17:28 2001 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: memset.S,v 1.3 2001/08/01 16:45:20 eeh Exp $ */
+/* $NetBSD: memset.S,v 1.4 2001/08/02 01:17:28 eeh Exp $ */
/*
* Copyright (c) 2001, Eduardo E. Horvath
@@ -47,27 +47,10 @@
#include <machine/psl.h>
#if defined(LIBC_SCCS) && !defined(lint)
- RCSID("$NetBSD: memset.S,v 1.3 2001/08/01 16:45:20 eeh Exp $")
+ RCSID("$NetBSD: memset.S,v 1.4 2001/08/02 01:17:28 eeh Exp $")
#endif /* LIBC_SCCS and not lint */
/*
- * memset(addr, c, len)
- *
- * Duplicate the pattern so it fills 64-bits, then swap around the
- * arguments and call bzero.
- */
-ENTRY(memset)
- and %o1, 0x0ff, %o3
- mov %o2, %o1
- sllx %o3, 8, %o2
- or %o2, %o3, %o2
- mov %o0, %o4 ! Save original pointer
- sllx %o2, 16, %o3
- or %o2, %o3, %o2
- sllx %o2, 32, %o3
- ba,pt %icc, Lbzero_internal
- or %o2, %o3, %o2
-/*
* bzero(addr, len)
*
* We want to use VIS instructions if we're clearing out more than
@@ -76,122 +59,86 @@
* to keep track of the current owner of the FPU, hence the different
* code.
*
+ * XXXXX To produce more efficient code, we do not allow lengths
+ * greater than 0x80000000000000000, which are negative numbers.
+ * This should not really be an issue since the VA hole should
+ * cause any such ranges to fail anyway.
*/
ENTRY(bzero)
! %o0 = addr, %o1 = len
- clr %o2 ! Initialize our pattern
+ mov %o1, %o2
+ clr %o1 ! Initialize our pattern
+/*
+ * memset(addr, c, len)
+ *
+ */
+ENTRY(memset)
+ ! %o0 = addr, %o1 = pattern, %o2 = len
+ mov %o0, %o4 ! Save original pointer
+
Lbzero_internal:
- brz,pn %o1, Lbzero_done ! No bytes to copy??
- cmp %o1, 16 ! <16 bytes? use byte ops.
- bge,pn %xcc, 1f
- nop
-0:
- stb %o2, [%o0] ! Small clear.
- inc %o0
- deccc %o1
- bg,pt %icc, 0b
- nop
- ba,pt %icc, Lbzero_done
- nop
-
-1:
- btst 7, %o0 ! 64-bit aligned? Optimization
- bz,pt %xcc, 2f
- nop
- btst 3, %o0 ! 32-bit aligned?
- bz,pt %xcc, 1f
- nop
- btst 1, %o0 ! 16-bit aligned?
- bz,pt %xcc, 0f
+ btst 7, %o0 ! Word aligned?
+ bz,pn %xcc, 0f
nop
-
- !! unaligned -- store 1 byte
- stb %o2, [%o0]
- dec 1, %o1 ! Record storing 1 byte
inc %o0
- cmp %o1, 2
- bl,a,pn %icc, 7f ! 1 or 0 left
- dec 8, %o1 ! Fixup count -8
-0:
- btst 3, %o0
- bz,pt %xcc, 1f
- btst 7, %o0 ! 64-bit aligned?
+ deccc %o2 ! Store up to 7 bytes
+ bge,a,pt %xcc, Lbzero_internal
+ stb %o1, [%o0 - 1]
- !! 16-bit aligned -- store half word
- sth %o2, [%o0]
- dec 2, %o1 ! Prepare to store 2 bytes
- inc 2, %o0
- cmp %o1, 4
- bl,a,pn %icc, 5f ! Less than 4 left
- dec 8, %o1 ! Fixup count -8
-1:
- btst 7, %o0 ! 64-bit aligned?
- bz,pt %xcc, 2f
- nop
- !! 32-bit aligned -- store word
- stw %o2, [%o0]
- dec 4, %o1
- inc 4, %o0
- cmp %o1, 8
- bl,a,pn %icc, Lbzero_cleanup ! Less than 8 left
- dec 8, %o1 ! Fixup count -8
-2:
+ retl ! Duplicate Lbzero_done
+ mov %o4, %o0
+0:
+ /*
+ * Duplicate the pattern so it fills 64-bits.
+ */
+ andcc %o1, 0x0ff, %o1 ! No need to extend zero
+ bz,pt %icc, 1f
+ sllx %o1, 8, %o3 ! sigh. all dependent insns.
+ or %o1, %o3, %o1
+ sllx %o1, 16, %o3
+ or %o1, %o3, %o1
+ sllx %o1, 32, %o3
+ or %o1, %o3, %o1
+1:
#if 1
!! Now we are 64-bit aligned
- cmp %o1, 256 ! Use block clear if len > 256
+ cmp %o2, 256 ! Use block clear if len > 256
bge,pt %xcc, Lbzero_block ! use block store insns
#endif
- deccc 8, %o1
+ deccc 8, %o2
Lbzero_longs:
bl,pn %xcc, Lbzero_cleanup ! Less than 8 bytes left
nop
3:
- stx %o2, [%o0] ! Do 1 longword at a time
- deccc 8, %o1
+ inc 8, %o0
+ deccc 8, %o2
bge,pt %xcc, 3b
- inc 8, %o0
+ stx %o1, [%o0 - 8] ! Do 1 longword at a time
/*
* Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero,
* -6 => two bytes, etc. Mop up this remainder, if any.
*/
Lbzero_cleanup:
- btst 4, %o1
- bz,pt %xcc, 6f ! if (len & 4) {
- btst 2, %o1
- stw %o2, [%o0] ! *(int *)addr = 0;
+ btst 4, %o2
+ bz,pt %xcc, 5f ! if (len & 4) {
+ nop
+ stw %o1, [%o0] ! *(int *)addr = 0;
inc 4, %o0 ! addr += 4;
5:
- btst 2, %o1
-6:
- bz,pt %xcc, 8f ! if (len & 2) {
- btst 1, %o1
- sth %o2, [%o0] ! *(short *)addr = 0;
+ btst 2, %o2
+ bz,pt %xcc, 7f ! if (len & 2) {
+ nop
+ sth %o1, [%o0] ! *(short *)addr = 0;
inc 2, %o0 ! addr += 2;
7:
- btst 1, %o1
-8:
+ btst 1, %o2
bnz,a %icc, Lbzero_done ! if (len & 1)
- stb %o2, [%o0] ! *addr = 0;
+ stb %o1, [%o0] ! *addr = 0;
Lbzero_done:
retl
mov %o4, %o0 ! Restore ponter for memset (ugh)
- /*
- * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero,
- * -6 => two bytes, etc. but we're potentially unaligned.
- * Do byte stores since it's easiest.
- */
-Lbzero_small:
- inccc 8, %o1
- bz,pn %icc, Lbzero_done
-1:
- deccc %o1
- stb %o2, [%o0]
- bge,pt %icc, 1b
- inc %o0
- ba,a,pt %icc, Lbzero_done
- nop ! XXX spitfire bug?
#if 1
Lbzero_block:
/*
@@ -209,17 +156,17 @@
bz,pt %xcc, 2f
nop
1:
- stx %o2, [%o0]
+ stx %o1, [%o0]
inc 8, %o0
btst 63, %o0
bnz,pt %xcc, 1b
- dec 8, %o1
+ dec 8, %o2
2:
- brz %o2, 3f ! Skip the memory op
+ brz %o1, 3f ! Skip the memory op
fzero %f0 ! for bzero
- stx %o2, [%o0] ! Flush this puppy to RAM
+ stx %o1, [%o0] ! Flush this puppy to RAM
membar #StoreLoad
ldd [%o0], %f0
3:
@@ -232,22 +179,18 @@
fmovd %f0, %f14
!! Remember: we were 8 bytes too far
- dec 56, %o1 ! Go one iteration too far
+ dec 56, %o2 ! Go one iteration too far
5:
stda %f0, [%o0] ASI_BLK_P ! Store 64 bytes
- deccc 64, %o1
- ble,pn %xcc, 6f
- inc 64, %o0
-
- stda %f0, [%o0] ASI_BLK_P ! Store 64 bytes
- deccc 64, %o1
+ deccc 64, %o2
bg,pn %xcc, 5b
inc 64, %o0
-6:
+
+ membar #Sync
/*
* Now we're done we need to load the FPU state from where
* we put it.
*/
ba,pt %xcc, Lbzero_longs ! Finish up the remainder
- addcc %o1, 56, %o1 ! Restore the count
+ inccc 56, %o2 ! Restore the count
#endif
Home |
Main Index |
Thread Index |
Old Index