Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/common/lib/libc/arch/sparc64/string Use a single copy of the...
details: https://anonhg.NetBSD.org/src/rev/40594510ec17
branches: trunk
changeset: 785500:40594510ec17
user: christos <christos%NetBSD.org@localhost>
date: Sun Mar 17 00:42:31 2013 +0000
description:
Use a single copy of the source.
diffstat:
common/lib/libc/arch/sparc64/string/memcpy.S | 1624 +++++++++++++++++++++++
common/lib/libc/arch/sparc64/string/memset.S | 214 +++
common/lib/libc/arch/sparc64/string/strmacros.h | 119 +
3 files changed, 1957 insertions(+), 0 deletions(-)
diffs (truncated from 1969 to 300 lines):
diff -r f59b84f1659e -r 40594510ec17 common/lib/libc/arch/sparc64/string/memcpy.S
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/common/lib/libc/arch/sparc64/string/memcpy.S Sun Mar 17 00:42:31 2013 +0000
@@ -0,0 +1,1624 @@
+/* $NetBSD: memcpy.S,v 1.1 2013/03/17 00:42:31 christos Exp $ */
+
+/*
+ * Copyright (c) 1996-2002 Eduardo Horvath
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+#include "strmacros.h"
+
+/*
+ * kernel memcpy
+ * Assumes regions do not overlap; has no useful return value.
+ *
+ * Must not use %g7 (see copyin/copyout above).
+ */
+ENTRY(memcpy) /* dest, src, size */
+ /*
+ * Swap args for bcopy. Gcc generates calls to memcpy for
+ * structure assignments.
+ */
+ mov %o0, %o3
+ mov %o1, %o0
+ mov %o3, %o1
+#if !defined(_KERNEL) || defined(_RUMPKERNEL)
+ENTRY(bcopy) /* src, dest, size */
+#endif
+#ifdef DEBUG
+#if defined(_KERNEL) && !defined(_RUMPKERNEL)
+ set pmapdebug, %o4
+ ld [%o4], %o4
+ btst 0x80, %o4 ! PDB_COPY
+ bz,pt %icc, 3f
+ nop
+#endif
+ save %sp, -CC64FSZ, %sp
+ mov %i0, %o1
+ set 2f, %o0
+ mov %i1, %o2
+ call printf
+ mov %i2, %o3
+! ta 1; nop
+ restore
+ .data
+2: .asciz "memcpy(%p<-%p,%x)\n"
+ _ALIGN
+ .text
+3:
+#endif
+
+ cmp %o2, BCOPY_SMALL
+
+Lmemcpy_start:
+ bge,pt CCCR, 2f ! if >= this many, go be fancy.
+ cmp %o2, 256
+
+ mov %o1, %o5 ! Save memcpy return value
+ /*
+ * Not much to copy, just do it a byte at a time.
+ */
+ deccc %o2 ! while (--len >= 0)
+ bl 1f
+ .empty
+0:
+ inc %o0
+ ldsb [%o0 - 1], %o4 ! (++dst)[-1] = *src++;
+ stb %o4, [%o1]
+ deccc %o2
+ bge 0b
+ inc %o1
+1:
+ retl
+ mov %o5, %o0
+ NOTREACHED
+
+ /*
+ * Plenty of data to copy, so try to do it optimally.
+ */
+2:
+#ifdef USE_BLOCK_STORE_LOAD
+ ! If it is big enough, use VIS instructions
+ bge Lmemcpy_block
+ nop
+#endif /* USE_BLOCK_STORE_LOAD */
+Lmemcpy_fancy:
+
+ !!
+ !! First align the output to a 8-byte entity
+ !!
+
+ save %sp, -CC64FSZ, %sp
+
+ mov %i0, %l0
+ mov %i1, %l1
+
+ mov %i2, %l2
+ btst 1, %l1
+
+ bz,pt %icc, 4f
+ btst 2, %l1
+ ldub [%l0], %l4 ! Load 1st byte
+
+ deccc 1, %l2
+ ble,pn CCCR, Lmemcpy_finish ! XXXX
+ inc 1, %l0
+
+ stb %l4, [%l1] ! Store 1st byte
+ inc 1, %l1 ! Update address
+ btst 2, %l1
+4:
+ bz,pt %icc, 4f
+
+ btst 1, %l0
+ bz,a 1f
+ lduh [%l0], %l4 ! Load short
+
+ ldub [%l0], %l4 ! Load bytes
+
+ ldub [%l0+1], %l3
+ sllx %l4, 8, %l4
+ or %l3, %l4, %l4
+
+1:
+ deccc 2, %l2
+ ble,pn CCCR, Lmemcpy_finish ! XXXX
+ inc 2, %l0
+ sth %l4, [%l1] ! Store 1st short
+
+ inc 2, %l1
+4:
+ btst 4, %l1
+ bz,pt CCCR, 4f
+
+ btst 3, %l0
+ bz,a,pt CCCR, 1f
+ lduw [%l0], %l4 ! Load word -1
+
+ btst 1, %l0
+ bz,a,pt %icc, 2f
+ lduh [%l0], %l4
+
+ ldub [%l0], %l4
+
+ lduh [%l0+1], %l3
+ sllx %l4, 16, %l4
+ or %l4, %l3, %l4
+
+ ldub [%l0+3], %l3
+ sllx %l4, 8, %l4
+ ba,pt %icc, 1f
+ or %l4, %l3, %l4
+
+2:
+ lduh [%l0+2], %l3
+ sllx %l4, 16, %l4
+ or %l4, %l3, %l4
+
+1:
+ deccc 4, %l2
+ ble,pn CCCR, Lmemcpy_finish ! XXXX
+ inc 4, %l0
+
+ st %l4, [%l1] ! Store word
+ inc 4, %l1
+4:
+ !!
+ !! We are now 32-bit aligned in the dest.
+ !!
+Lmemcpy_common:
+
+ and %l0, 7, %l4 ! Shift amount
+ andn %l0, 7, %l0 ! Source addr
+
+ brz,pt %l4, Lmemcpy_noshift8 ! No shift version...
+
+ sllx %l4, 3, %l4 ! In bits
+ mov 8<<3, %l3
+
+ ldx [%l0], %o0 ! Load word -1
+ sub %l3, %l4, %l3 ! Reverse shift
+ deccc 12*8, %l2 ! Have enough room?
+
+ sllx %o0, %l4, %o0
+ bl,pn CCCR, 2f
+ and %l3, 0x38, %l3
+Lmemcpy_unrolled8:
+
+ /*
+ * This is about as close to optimal as you can get, since
+ * the shifts require EU0 and cannot be paired, and you have
+ * 3 dependent operations on the data.
+ */
+
+! ldx [%l0+0*8], %o0 ! Already done
+! sllx %o0, %l4, %o0 ! Already done
+ ldx [%l0+1*8], %o1
+ ldx [%l0+2*8], %o2
+ ldx [%l0+3*8], %o3
+ ldx [%l0+4*8], %o4
+ ba,pt %icc, 1f
+ ldx [%l0+5*8], %o5
+ .align 8
+1:
+ srlx %o1, %l3, %g1
+ inc 6*8, %l0
+
+ sllx %o1, %l4, %o1
+ or %g1, %o0, %g6
+ ldx [%l0+0*8], %o0
+
+ stx %g6, [%l1+0*8]
+ srlx %o2, %l3, %g1
+
+ sllx %o2, %l4, %o2
+ or %g1, %o1, %g6
+ ldx [%l0+1*8], %o1
+
+ stx %g6, [%l1+1*8]
+ srlx %o3, %l3, %g1
+
+ sllx %o3, %l4, %o3
+ or %g1, %o2, %g6
+ ldx [%l0+2*8], %o2
+
+ stx %g6, [%l1+2*8]
+ srlx %o4, %l3, %g1
+
+ sllx %o4, %l4, %o4
+ or %g1, %o3, %g6
+ ldx [%l0+3*8], %o3
+
+ stx %g6, [%l1+3*8]
+ srlx %o5, %l3, %g1
+
+ sllx %o5, %l4, %o5
+ or %g1, %o4, %g6
+ ldx [%l0+4*8], %o4
+
+ stx %g6, [%l1+4*8]
+ srlx %o0, %l3, %g1
+ deccc 6*8, %l2 ! Have enough room?
+
+ sllx %o0, %l4, %o0 ! Next loop
+ or %g1, %o5, %g6
+ ldx [%l0+5*8], %o5
+
+ stx %g6, [%l1+5*8]
+ bge,pt CCCR, 1b
+ inc 6*8, %l1
+
+Lmemcpy_unrolled8_cleanup:
+ !!
+ !! Finished 8 byte block, unload the regs.
+ !!
+ srlx %o1, %l3, %g1
+ inc 5*8, %l0
+
+ sllx %o1, %l4, %o1
+ or %g1, %o0, %g6
+
+ stx %g6, [%l1+0*8]
+ srlx %o2, %l3, %g1
+
+ sllx %o2, %l4, %o2
+ or %g1, %o1, %g6
+
+ stx %g6, [%l1+1*8]
+ srlx %o3, %l3, %g1
+
+ sllx %o3, %l4, %o3
+ or %g1, %o2, %g6
+
+ stx %g6, [%l1+2*8]
+ srlx %o4, %l3, %g1
+
+ sllx %o4, %l4, %o4
+ or %g1, %o3, %g6
+
+ stx %g6, [%l1+3*8]
Home |
Main Index |
Thread Index |
Old Index