Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/common/lib/libc/arch/aarch64/string Working / new versions f...



details:   https://anonhg.NetBSD.org/src/rev/df4e82b19c34
branches:  trunk
changeset: 359216:df4e82b19c34
user:      skrll <skrll%NetBSD.org@localhost>
date:      Sun Feb 04 21:52:16 2018 +0000

description:
Working / new versions from Ryo Shimizu

diffstat:

 common/lib/libc/arch/aarch64/string/bcopy.S   |  990 ++++++++++++++++++++++++++
 common/lib/libc/arch/aarch64/string/memcmp.S  |   57 +-
 common/lib/libc/arch/aarch64/string/memcpy.S  |  128 +---
 common/lib/libc/arch/aarch64/string/memmove.S |    4 +
 4 files changed, 1028 insertions(+), 151 deletions(-)

diffs (truncated from 1269 to 300 lines):

diff -r 573c1718439b -r df4e82b19c34 common/lib/libc/arch/aarch64/string/bcopy.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/common/lib/libc/arch/aarch64/string/bcopy.S       Sun Feb 04 21:52:16 2018 +0000
@@ -0,0 +1,990 @@
+/* $NetBSD: bcopy.S,v 1.1 2018/02/04 21:52:16 skrll Exp $ */
+
+/*
+ * Copyright (c) 2018 Ryo Shimizu <ryo%nerv.org@localhost>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+
+#if defined(LIBC_SCCS)
+RCSID("$NetBSD: bcopy.S,v 1.1 2018/02/04 21:52:16 skrll Exp $")
+#endif
+
+#if defined(MEMCOPY)
+
+/*
+ * void *memcpy(void * restrict dst, const void * restrict src, size_t len);
+ */
+#define FUNCTION               memcpy
+#define NO_OVERLAP
+#define SRC0                   x1
+#define DST0                   x0
+#define LEN                    x2
+
+#elif defined(MEMMOVE)
+
+/*
+ * void *memmove(void *dst, const void *src, size_t len);
+ */
+#define FUNCTION               memmove
+#undef NO_OVERLAP
+#define SRC0                   x1
+#define DST0                   x0
+#define LEN                    x2
+
+#else /* !MEMCOPY && !MEMMOVE */
+
+/*
+ * void bcopy(const void *src, void *dst, size_t len);
+ */
+#define FUNCTION               bcopy
+#define NO_OVERLAP
+#define SRC0                   x0
+#define DST0                   x1
+#define LEN                    x2
+
+#endif /* MEMCOPY/MEMMOVE/BCOPY */
+
+/* caller-saved temporary registers. breakable. */
+#define TMP_X                  x3
+#define TMP_Xw                 w3
+#define TMP_D                  x4
+#define TMP_S                  x5
+#define DST                    x6
+#define SRC                    x7
+#define DATA0                  x8
+#define DATA0w                 w8
+#define DATA1                  x9
+#define DATA1w                 w9
+#define DATA2                  x10
+#define SRC_ALIGNBIT           x11     /* (SRC & 7) * 8 */
+#define DST_ALIGNBIT           x12     /* (DST & 7) * 8 */
+#define SRC_DST_ALIGNBIT       x13     /* = SRC_ALIGNBIT - DST_ALIGNBIT */
+#define DST_SRC_ALIGNBIT       x14     /* = -SRC_DST_ALIGNBIT */
+
+#define STP_ALIGN              16      /* align before stp/ldp. 8 or 16 */
+#define SMALLSIZE              32
+
+       .text
+       .align  5
+
+#ifndef NO_OVERLAP
+#ifndef STRICT_ALIGNMENT
+backward_ignore_align:
+       prfm    PLDL1KEEP, [SRC0]
+       add     SRC0, SRC0, LEN
+       add     DST, DST0, LEN
+       cmp     LEN, #SMALLSIZE
+       bcs     copy_backward
+copy_backward_small:
+       cmp     LEN, #8
+       bcs     9f
+
+       /* 0 <= len < 8 */
+       /* if (len & 4) { *--(uint32_t *)dst = *--(uint32_t *)src; } */
+       tbz     LEN, #2, 1f
+       ldr     TMP_Xw, [SRC0, #-4]!
+       str     TMP_Xw, [DST, #-4]!
+1:
+       /* if (len & 2) { *--(uint16_t *)dst = *--(uint16_t *)src; } */
+       tbz     LEN, #1, 1f
+       ldrh    TMP_Xw, [SRC0, #-2]!
+       strh    TMP_Xw, [DST, #-2]!
+1:
+       /* if (len & 1) { *--(uint8_t *)dst = *--(uint8_t *)src; } */
+       tbz     LEN, #0, 1f
+       ldrb    TMP_Xw, [SRC0, #-1]!
+       strb    TMP_Xw, [DST, #-1]!
+1:
+       ret
+9:
+
+       cmp     LEN, #16
+       bcs     9f
+
+       /* 8 <= len < 16 */
+       /* *--(uint64_t *)dst = *--(uint64_t *)src; */
+       ldr     TMP_X, [SRC0, #-8]!
+       str     TMP_X, [DST, #-8]!
+       /* if (len & 4) { *--(uint32_t *)dst = *--(uint32_t *)src; } */
+       tbz     LEN, #2, 1f
+       ldr     TMP_Xw, [SRC0, #-4]!
+       str     TMP_Xw, [DST, #-4]!
+1:
+       /* if (len & 2) { *--(uint16_t *)dst = *--(uint16_t *)src; } */
+       tbz     LEN, #1, 1f
+       ldrh    TMP_Xw, [SRC0, #-2]!
+       strh    TMP_Xw, [DST, #-2]!
+1:
+       /* if (len & 1) { *--(uint8_t *)dst = *--(uint8_t *)src; } */
+       tbz     LEN, #0, 1f
+       ldrb    TMP_Xw, [SRC0, #-1]!
+       strb    TMP_Xw, [DST, #-1]!
+1:
+       ret
+9:
+
+       /* 16 <= len < 32 */
+       ldp     DATA0, DATA1, [SRC0, #-16]!
+       stp     DATA0, DATA1, [DST, #-16]!
+       /* if (len & 8) { *--(uint64_t *)dst = *--(uint64_t *)src; } */
+       tbz     LEN, #3, 1f
+       ldr     TMP_X, [SRC0, #-8]!
+       str     TMP_X, [DST, #-8]!
+1:
+       /* if (len & 4) { *--(uint32_t *)dst = *--(uint32_t *)src; } */
+       tbz     LEN, #2, 1f
+       ldr     TMP_Xw, [SRC0, #-4]!
+       str     TMP_Xw, [DST, #-4]!
+1:
+       /* if (len & 2) { *--(uint16_t *)dst = *--(uint16_t *)src; } */
+       tbz     LEN, #1, 1f
+       ldrh    TMP_Xw, [SRC0, #-2]!
+       strh    TMP_Xw, [DST, #-2]!
+1:
+       /* if (len & 1) { *--(uint8_t *)dst = *--(uint8_t *)src; } */
+       tbz     LEN, #0, 1f
+       ldrb    TMP_Xw, [SRC0, #-1]!
+       strb    TMP_Xw, [DST, #-1]!
+1:
+       ret
+#endif /* !STRICT_ALIGNMENT */
+
+       .align  4
+copy_backward:
+       /* DST is not aligned at this point */
+#ifndef STRICT_ALIGNMENT
+       cmp     LEN, #512       /* pre-alignment can be overhead when small */
+       bcc     9f
+#endif
+       /* if (DST & 1) { *--(uint8_t *)dst = *--(uint8_t *)src; } */
+       tbz     DST, #0, 1f
+       ldrb    TMP_Xw, [SRC0, #-1]!
+       strb    TMP_Xw, [DST, #-1]!
+       sub     LEN, LEN, #1
+1:
+       /* if (DST & 2) { *--(uint16_t *)dst = *--(uint16_t *)src; } */
+       tbz     DST, #1, 1f
+       ldrh    TMP_Xw, [SRC0, #-2]!
+       strh    TMP_Xw, [DST, #-2]!
+       sub     LEN, LEN, #2
+1:
+       /* if (DST & 4) { *--(uint32_t *)dst = *--(uint32_t *)src; } */
+       tbz     DST, #2, 1f
+       ldr     TMP_Xw, [SRC0, #-4]!
+       str     TMP_Xw, [DST, #-4]!
+       sub     LEN, LEN, #4
+1:
+#if (STP_ALIGN > 8)
+       /* if (DST & 8) { *--(uint64_t *)dst = *--(uint64_t *)src; } */
+       tbz     DST, #3, 1f
+       ldr     TMP_X, [SRC0, #-8]!
+       str     TMP_X, [DST, #-8]!
+       sub     LEN, LEN, #8
+1:
+#endif /* (STP_ALIGN > 8) */
+9:
+
+       cmp     LEN, #1024
+       bhs     backward_copy1k
+backward_less1k:
+       /* copy 16*n bytes */
+       and     TMP_D, LEN, #(1023-15)          /* len &= 1023; len &= ~15; */
+       adr     TMP_X, 8f
+       sub     LEN, LEN, TMP_D
+       sub     TMP_X, TMP_X, TMP_D, lsr #1     /* jump to (8f - len/2) */
+       br      TMP_X
+backward_copy1k:       /* copy 16*64 bytes */
+       sub     LEN, LEN, #1024
+       .rept   (1024 / 16)
+       ldp     DATA0, DATA1, [SRC0, #-16]!     /* *--dst = *--src; */
+       stp     DATA0, DATA1, [DST, #-16]!
+       .endr
+8:
+       cbz     LEN, done
+       cmp     LEN, #1024
+       bhs     backward_copy1k
+       cmp     LEN, #16
+       bhs     backward_less1k
+
+       /* if (len & 16) { *--(uint128_t *)dst = *--(uint128_t *)src; } */
+       tbz     LEN, #4, 1f
+       ldp     DATA0, DATA1, [SRC0, #-16]!
+       ldp     DATA0, DATA1, [DST, #-16]!
+1:
+       /* if (len & 8) { *--(uint64_t *)dst = *--(uint64_t *)src; } */
+       tbz     LEN, #3, 1f
+       ldr     TMP_X, [SRC0, #-8]!
+       str     TMP_X, [DST, #-8]!
+1:
+       /* if (len & 4) { *--(uint32_t *)dst = *--(uint32_t *)src; } */
+       tbz     LEN, #2, 1f
+       ldr     TMP_Xw, [SRC0, #-4]!
+       str     TMP_Xw, [DST, #-4]!
+1:
+       /* if (len & 2) { *--(uint16_t *)dst = *--(uint16_t *)src; } */
+       tbz     LEN, #1, 1f
+       ldrh    TMP_Xw, [SRC0, #-2]!
+       strh    TMP_Xw, [DST, #-2]!
+1:
+       /* if (len & 1) { *--(uint8_t *)dst = *--(uint8_t *)src; } */
+       tbz     LEN, #0, 1f
+       ldrb    TMP_Xw, [SRC0, #-1]!
+       strb    TMP_Xw, [DST, #-1]!
+1:
+       ret
+#endif /* !NO_OVERLAP */
+
+
+#if defined(STRICT_ALIGNMENT) && !defined(NO_OVERLAP)
+       .align  5
+backward_copy:
+       prfm    PLDL1KEEP, [SRC0]
+       add     DST, DST0, LEN
+       add     SRC0, SRC0, LEN
+       cmp     LEN, #SMALLSIZE
+       bcs     strict_backward
+
+       cmp     LEN, #10
+       bcs     9f
+backward_tiny:
+       /* copy 1-10 bytes */
+       adr     TMP_X, 8f
+       sub     TMP_X, TMP_X, LEN, lsl #3       /* jump to (8f - len*2) */
+       br      TMP_X
+       .rept   10
+       ldrb    TMP_Xw, [SRC0, #-1]!
+       strb    TMP_Xw, [DST, #-1]!
+       .endr
+8:
+       ret
+9:
+       /* length is small(<32), and src or dst may be unaligned */
+       eor     TMP_X, SRC0, DST0
+       ands    TMP_X, TMP_X, #7
+       bne     notaligned_backward_small
+
+samealign_backward_small:
+       /* if (dst & 1) { *--(uint8_t *)dst = *--(uint8_t *)src; } */
+       tbz     DST, #0, 1f
+       ldrb    TMP_Xw, [SRC0, #-1]!
+       strb    TMP_Xw, [DST, #-1]!
+       sub     LEN, LEN, #1
+1:
+       /* if (dst & 2) { *--(uint16_t *)dst = *--(uint16_t *)src; } */



Home | Main Index | Thread Index | Old Index