Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/common/lib/libc/arch/aarch64/string Working / new versions f...
details: https://anonhg.NetBSD.org/src/rev/df4e82b19c34
branches: trunk
changeset: 359216:df4e82b19c34
user: skrll <skrll%NetBSD.org@localhost>
date: Sun Feb 04 21:52:16 2018 +0000
description:
Working / new versions from Ryo Shimizu
diffstat:
common/lib/libc/arch/aarch64/string/bcopy.S | 990 ++++++++++++++++++++++++++
common/lib/libc/arch/aarch64/string/memcmp.S | 57 +-
common/lib/libc/arch/aarch64/string/memcpy.S | 128 +---
common/lib/libc/arch/aarch64/string/memmove.S | 4 +
4 files changed, 1028 insertions(+), 151 deletions(-)
diffs (truncated from 1269 to 300 lines):
diff -r 573c1718439b -r df4e82b19c34 common/lib/libc/arch/aarch64/string/bcopy.S
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/common/lib/libc/arch/aarch64/string/bcopy.S Sun Feb 04 21:52:16 2018 +0000
@@ -0,0 +1,990 @@
+/* $NetBSD: bcopy.S,v 1.1 2018/02/04 21:52:16 skrll Exp $ */
+
+/*
+ * Copyright (c) 2018 Ryo Shimizu <ryo%nerv.org@localhost>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+
+#if defined(LIBC_SCCS)
+RCSID("$NetBSD: bcopy.S,v 1.1 2018/02/04 21:52:16 skrll Exp $")
+#endif
+
+#if defined(MEMCOPY)
+
+/*
+ * void *memcpy(void * restrict dst, const void * restrict src, size_t len);
+ */
+#define FUNCTION memcpy
+#define NO_OVERLAP
+#define SRC0 x1
+#define DST0 x0
+#define LEN x2
+
+#elif defined(MEMMOVE)
+
+/*
+ * void *memmove(void *dst, const void *src, size_t len);
+ */
+#define FUNCTION memmove
+#undef NO_OVERLAP
+#define SRC0 x1
+#define DST0 x0
+#define LEN x2
+
+#else /* !MEMCOPY && !MEMMOVE */
+
+/*
+ * void bcopy(const void *src, void *dst, size_t len);
+ */
+#define FUNCTION bcopy
+#define NO_OVERLAP
+#define SRC0 x0
+#define DST0 x1
+#define LEN x2
+
+#endif /* MEMCOPY/MEMMOVE/BCOPY */
+
+/* caller-saved temporary registers. breakable. */
+#define TMP_X x3
+#define TMP_Xw w3
+#define TMP_D x4
+#define TMP_S x5
+#define DST x6
+#define SRC x7
+#define DATA0 x8
+#define DATA0w w8
+#define DATA1 x9
+#define DATA1w w9
+#define DATA2 x10
+#define SRC_ALIGNBIT x11 /* (SRC & 7) * 8 */
+#define DST_ALIGNBIT x12 /* (DST & 7) * 8 */
+#define SRC_DST_ALIGNBIT x13 /* = SRC_ALIGNBIT - DST_ALIGNBIT */
+#define DST_SRC_ALIGNBIT x14 /* = -SRC_DST_ALIGNBIT */
+
+#define STP_ALIGN 16 /* align before stp/ldp. 8 or 16 */
+#define SMALLSIZE 32
+
+ .text
+ .align 5
+
+#ifndef NO_OVERLAP
+#ifndef STRICT_ALIGNMENT
+backward_ignore_align:
+ prfm PLDL1KEEP, [SRC0]
+ add SRC0, SRC0, LEN
+ add DST, DST0, LEN
+ cmp LEN, #SMALLSIZE
+ bcs copy_backward
+copy_backward_small:
+ cmp LEN, #8
+ bcs 9f
+
+ /* 0 <= len < 8 */
+ /* if (len & 4) { *--(uint32_t *)dst = *--(uint32_t *)src; } */
+ tbz LEN, #2, 1f
+ ldr TMP_Xw, [SRC0, #-4]!
+ str TMP_Xw, [DST, #-4]!
+1:
+ /* if (len & 2) { *--(uint16_t *)dst = *--(uint16_t *)src; } */
+ tbz LEN, #1, 1f
+ ldrh TMP_Xw, [SRC0, #-2]!
+ strh TMP_Xw, [DST, #-2]!
+1:
+ /* if (len & 1) { *--(uint8_t *)dst = *--(uint8_t *)src; } */
+ tbz LEN, #0, 1f
+ ldrb TMP_Xw, [SRC0, #-1]!
+ strb TMP_Xw, [DST, #-1]!
+1:
+ ret
+9:
+
+ cmp LEN, #16
+ bcs 9f
+
+ /* 8 <= len < 16 */
+ /* *--(uint64_t *)dst = *--(uint64_t *)src; */
+ ldr TMP_X, [SRC0, #-8]!
+ str TMP_X, [DST, #-8]!
+ /* if (len & 4) { *--(uint32_t *)dst = *--(uint32_t *)src; } */
+ tbz LEN, #2, 1f
+ ldr TMP_Xw, [SRC0, #-4]!
+ str TMP_Xw, [DST, #-4]!
+1:
+ /* if (len & 2) { *--(uint16_t *)dst = *--(uint16_t *)src; } */
+ tbz LEN, #1, 1f
+ ldrh TMP_Xw, [SRC0, #-2]!
+ strh TMP_Xw, [DST, #-2]!
+1:
+ /* if (len & 1) { *--(uint8_t *)dst = *--(uint8_t *)src; } */
+ tbz LEN, #0, 1f
+ ldrb TMP_Xw, [SRC0, #-1]!
+ strb TMP_Xw, [DST, #-1]!
+1:
+ ret
+9:
+
+ /* 16 <= len < 32 */
+ ldp DATA0, DATA1, [SRC0, #-16]!
+ stp DATA0, DATA1, [DST, #-16]!
+ /* if (len & 8) { *--(uint64_t *)dst = *--(uint64_t *)src; } */
+ tbz LEN, #3, 1f
+ ldr TMP_X, [SRC0, #-8]!
+ str TMP_X, [DST, #-8]!
+1:
+ /* if (len & 4) { *--(uint32_t *)dst = *--(uint32_t *)src; } */
+ tbz LEN, #2, 1f
+ ldr TMP_Xw, [SRC0, #-4]!
+ str TMP_Xw, [DST, #-4]!
+1:
+ /* if (len & 2) { *--(uint16_t *)dst = *--(uint16_t *)src; } */
+ tbz LEN, #1, 1f
+ ldrh TMP_Xw, [SRC0, #-2]!
+ strh TMP_Xw, [DST, #-2]!
+1:
+ /* if (len & 1) { *--(uint8_t *)dst = *--(uint8_t *)src; } */
+ tbz LEN, #0, 1f
+ ldrb TMP_Xw, [SRC0, #-1]!
+ strb TMP_Xw, [DST, #-1]!
+1:
+ ret
+#endif /* !STRICT_ALIGNMENT */
+
+ .align 4
+copy_backward:
+ /* DST is not aligned at this point */
+#ifndef STRICT_ALIGNMENT
+ cmp LEN, #512 /* pre-alignment can be overhead when small */
+ bcc 9f
+#endif
+ /* if (DST & 1) { *--(uint8_t *)dst = *--(uint8_t *)src; } */
+ tbz DST, #0, 1f
+ ldrb TMP_Xw, [SRC0, #-1]!
+ strb TMP_Xw, [DST, #-1]!
+ sub LEN, LEN, #1
+1:
+ /* if (DST & 2) { *--(uint16_t *)dst = *--(uint16_t *)src; } */
+ tbz DST, #1, 1f
+ ldrh TMP_Xw, [SRC0, #-2]!
+ strh TMP_Xw, [DST, #-2]!
+ sub LEN, LEN, #2
+1:
+ /* if (DST & 4) { *--(uint32_t *)dst = *--(uint32_t *)src; } */
+ tbz DST, #2, 1f
+ ldr TMP_Xw, [SRC0, #-4]!
+ str TMP_Xw, [DST, #-4]!
+ sub LEN, LEN, #4
+1:
+#if (STP_ALIGN > 8)
+ /* if (DST & 8) { *--(uint64_t *)dst = *--(uint64_t *)src; } */
+ tbz DST, #3, 1f
+ ldr TMP_X, [SRC0, #-8]!
+ str TMP_X, [DST, #-8]!
+ sub LEN, LEN, #8
+1:
+#endif /* (STP_ALIGN > 8) */
+9:
+
+ cmp LEN, #1024
+ bhs backward_copy1k
+backward_less1k:
+ /* copy 16*n bytes */
+ and TMP_D, LEN, #(1023-15) /* len &= 1023; len &= ~15; */
+ adr TMP_X, 8f
+ sub LEN, LEN, TMP_D
+ sub TMP_X, TMP_X, TMP_D, lsr #1 /* jump to (8f - len/2) */
+ br TMP_X
+backward_copy1k: /* copy 16*64 bytes */
+ sub LEN, LEN, #1024
+ .rept (1024 / 16)
+ ldp DATA0, DATA1, [SRC0, #-16]! /* *--dst = *--src; */
+ stp DATA0, DATA1, [DST, #-16]!
+ .endr
+8:
+ cbz LEN, done
+ cmp LEN, #1024
+ bhs backward_copy1k
+ cmp LEN, #16
+ bhs backward_less1k
+
+ /* if (len & 16) { *--(uint128_t *)dst = *--(uint128_t *)src; } */
+ tbz LEN, #4, 1f
+ ldp DATA0, DATA1, [SRC0, #-16]!
+ ldp DATA0, DATA1, [DST, #-16]!
+1:
+ /* if (len & 8) { *--(uint64_t *)dst = *--(uint64_t *)src; } */
+ tbz LEN, #3, 1f
+ ldr TMP_X, [SRC0, #-8]!
+ str TMP_X, [DST, #-8]!
+1:
+ /* if (len & 4) { *--(uint32_t *)dst = *--(uint32_t *)src; } */
+ tbz LEN, #2, 1f
+ ldr TMP_Xw, [SRC0, #-4]!
+ str TMP_Xw, [DST, #-4]!
+1:
+ /* if (len & 2) { *--(uint16_t *)dst = *--(uint16_t *)src; } */
+ tbz LEN, #1, 1f
+ ldrh TMP_Xw, [SRC0, #-2]!
+ strh TMP_Xw, [DST, #-2]!
+1:
+ /* if (len & 1) { *--(uint8_t *)dst = *--(uint8_t *)src; } */
+ tbz LEN, #0, 1f
+ ldrb TMP_Xw, [SRC0, #-1]!
+ strb TMP_Xw, [DST, #-1]!
+1:
+ ret
+#endif /* !NO_OVERLAP */
+
+
+#if defined(STRICT_ALIGNMENT) && !defined(NO_OVERLAP)
+ .align 5
+backward_copy:
+ prfm PLDL1KEEP, [SRC0]
+ add DST, DST0, LEN
+ add SRC0, SRC0, LEN
+ cmp LEN, #SMALLSIZE
+ bcs strict_backward
+
+ cmp LEN, #10
+ bcs 9f
+backward_tiny:
+ /* copy 1-10 bytes */
+ adr TMP_X, 8f
+ sub TMP_X, TMP_X, LEN, lsl #3 /* jump to (8f - len*2) */
+ br TMP_X
+ .rept 10
+ ldrb TMP_Xw, [SRC0, #-1]!
+ strb TMP_Xw, [DST, #-1]!
+ .endr
+8:
+ ret
+9:
+ /* length is small(<32), and src or dst may be unaligned */
+ eor TMP_X, SRC0, DST0
+ ands TMP_X, TMP_X, #7
+ bne notaligned_backward_small
+
+samealign_backward_small:
+ /* if (dst & 1) { *--(uint8_t *)dst = *--(uint8_t *)src; } */
+ tbz DST, #0, 1f
+ ldrb TMP_Xw, [SRC0, #-1]!
+ strb TMP_Xw, [DST, #-1]!
+ sub LEN, LEN, #1
+1:
+ /* if (dst & 2) { *--(uint16_t *)dst = *--(uint16_t *)src; } */
Home |
Main Index |
Thread Index |
Old Index