Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch New, improved version of copyin(), copyout(), and k...
details: https://anonhg.NetBSD.org/src/rev/794917671dfc
branches: trunk
changeset: 535195:794917671dfc
user: bjh21 <bjh21%NetBSD.org@localhost>
date: Sun Aug 11 21:19:12 2002 +0000
description:
New, improved version of copyin(), copyout(), and kcopy() by Allen Briggs.
This version works on both 26-bit and 32-bit machines. For large copies,
it's up to three times as fast as the old arm32 version and five times as
fast as the old arm26 version. For small copies it seems to be even faster
(getrusage() is apparently over ten times faster on an ARM610).
Hooray for Allen!
diffstat:
sys/arch/acorn26/acorn26/copyinout.S | 103 +-----
sys/arch/arm/arm/bcopyinout.S | 673 +++++++++++++++++++++++++++++++++++
sys/arch/arm/arm32/bcopyinout.S | 245 ------------
sys/arch/arm/conf/files.arm | 4 +-
4 files changed, 677 insertions(+), 348 deletions(-)
diffs (truncated from 1072 to 300 lines):
diff -r 49e6533f517c -r 794917671dfc sys/arch/acorn26/acorn26/copyinout.S
--- a/sys/arch/acorn26/acorn26/copyinout.S Sun Aug 11 20:50:39 2002 +0000
+++ b/sys/arch/acorn26/acorn26/copyinout.S Sun Aug 11 21:19:12 2002 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: copyinout.S,v 1.2 2002/03/24 23:37:42 bjh21 Exp $ */
+/* $NetBSD: copyinout.S,v 1.3 2002/08/11 21:19:15 bjh21 Exp $ */
/*-
* Copyright (c) 2000 Ben Harris
@@ -32,110 +32,11 @@
#include <machine/asm.h>
-RCSID("$NetBSD: copyinout.S,v 1.2 2002/03/24 23:37:42 bjh21 Exp $")
+RCSID("$NetBSD: copyinout.S,v 1.3 2002/08/11 21:19:15 bjh21 Exp $")
#include <sys/errno.h>
#include "assym.h"
-/*
- * int copyin(const void *ua, void *ka, size_t len);
- * int copyout(const void *ka, void *ua, size_t len);
- * int kcopy(const void *src, void *dst, size_t len);
- */
-
-/*
- * memcpy isn't currently data-abort-safe (it uses R14). This is much
- * slower, but safer.
- */
-
-/* LINTSTUB: Func: int copyin(const void *uaddr, void *kaddr, size_t len) */
-ENTRY(copyin)
- mov ip, sp
- stmfd sp!, {r4, fp, ip, lr, pc}
- sub fp, ip, #4
- adr r3, Lcopyfault
- ldr r4, Lcurproc
- ldr r4, [r4]
- ldr r4, [r4, #P_ADDR]
- str r3, [r4, #(U_PCB + PCB_ONFAULT)]
- teq r2, #0
- beq Lcopyinskip
-Lcopyinloop:
- ldrbt r3, [r0], #1
- strb r3, [r1], #1
- subs r2, r2, #1
- bne Lcopyinloop
-Lcopyinskip:
- mov r0, #0
- str r0, [r4, #(U_PCB + PCB_ONFAULT)]
-#ifdef __APCS_26__
- ldmdb fp, {r4, fp, sp, pc}^
-#else
- ldmdb fp, {r4, fp, sp, pc}
-#endif
-
-/* LINTSTUB: Func: int copyout(const void *kaddr, void *uaddr, size_t len) */
-ENTRY(copyout)
- mov ip, sp
- stmfd sp!, {r4, fp, ip, lr, pc}
- sub fp, ip, #4
- adr r3, Lcopyfault
- ldr r4, Lcurproc
- ldr r4, [r4]
- ldr r4, [r4, #P_ADDR]
- str r3, [r4, #(U_PCB + PCB_ONFAULT)]
- teq r2, #0
- beq Lcopyoutskip
-Lcopyoutloop:
- ldrb r3, [r0], #1
- strbt r3, [r1], #1
- subs r2, r2, #1
- bne Lcopyoutloop
-Lcopyoutskip:
- mov r0, #0
- str r0, [r4, #(U_PCB + PCB_ONFAULT)]
-#ifdef __APCS_26__
- ldmdb fp, {r4, fp, sp, pc}^
-#else
- ldmdb fp, {r4, fp, sp, pc}
-#endif
-
-/* LINTSTUB: Func: int kcopy(const void *kfaddr, void *kdaddr, size_t len) */
-ENTRY(kcopy)
- mov ip, sp
- stmfd sp!, {r4, fp, ip, lr, pc}
- sub fp, ip, #4
- adr r3, Lcopyfault
- ldr r4, Lcurproc
- ldr r4, [r4]
- ldr r4, [r4, #P_ADDR]
- str r3, [r4, #(U_PCB + PCB_ONFAULT)]
- teq r2, #0
- beq Lkcopyskip
-Lkcopyloop:
- ldrb r3, [r0], #1
- strb r3, [r1], #1
- subs r2, r2, #1
- bne Lkcopyloop
-Lkcopyskip:
- mov r0, #0
- str r0, [r4, #(U_PCB + PCB_ONFAULT)]
-#ifdef __APCS_26__
- ldmdb fp, {r4, fp, sp, pc}^
-#else
- ldmdb fp, {r4, fp, sp, pc}
-#endif
-
-Lcopyfault:
- mov r1, #0
- str r1, [r4, #(U_PCB + PCB_ONFAULT)]
- /* Return value is provided by fault handler. */
-#ifdef __APCS_26__
- ldmdb fp, {r4, fp, sp, pc}^
-#else
- ldmdb fp, {r4, fp, sp, pc}
-#endif
-
/* LINTSTUB: Func: int copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done) */
ENTRY(copyinstr)
mov ip, sp
diff -r 49e6533f517c -r 794917671dfc sys/arch/arm/arm/bcopyinout.S
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/arm/arm/bcopyinout.S Sun Aug 11 21:19:12 2002 +0000
@@ -0,0 +1,673 @@
+/* $NetBSD: bcopyinout.S,v 1.1 2002/08/11 21:19:12 bjh21 Exp $ */
+
+/*
+ * Copyright (c) 2002 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Allen Briggs for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed for the NetBSD Project by
+ * Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ * or promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "assym.h"
+
+#include <machine/asm.h>
+#include <sys/errno.h>
+
+ .text
+ .align 0
+
+Lcurpcb:
+ .word _C_LABEL(curpcb)
+
+#ifdef __PROG32
+#define SAVE_REGS stmfd sp!, {r4-r11}
+#define RESTORE_REGS ldmfd sp!, {r4-r11}
+#else
+/* Need to save R14_svc because it'll get trampled if we take a page fault. */
+#define SAVE_REGS stmfd sp!, {r4-r11, r14}
+#define RESTORE_REGS ldmfd sp!, {r4-r11, r14}
+#endif
+
+#if 0 && defined(__XSCALE__)
+#define HELLOCPP #
+#define PREFETCH(rx,o) pld [ rx , HELLOCPP (o) ]
+#else
+#define PREFETCH(rx,o)
+#endif
+
+/*
+ * r0 = user space address
+ * r1 = kernel space address
+ * r2 = length
+ *
+ * Copies bytes from user space to kernel space
+ *
+ * We save/restore r4-r11:
+ * r4-r11 are scratch
+ */
+ENTRY(copyin)
+ /* Quick exit if length is zero */
+ teq r2, #0
+ moveq r0, #0
+ moveq pc, lr
+
+ SAVE_REGS
+ ldr r4, Lcurpcb
+ ldr r4, [r4]
+
+ ldr r5, [r4, #PCB_ONFAULT]
+ add r3, pc, #Lcopyfault - . - 8
+ str r3, [r4, #PCB_ONFAULT]
+
+ PREFETCH(r0, 0)
+ PREFETCH(r1, 0)
+
+ /*
+ * If not too many bytes, take the slow path.
+ */
+ cmp r2, #0x08
+ blt Licleanup
+
+ /*
+ * Align destination to word boundary.
+ */
+ and r6, r1, #0x3
+ ldr pc, [pc, r6, lsl #2]
+ b Lialend
+ .word Lialend
+ .word Lial1
+ .word Lial2
+ .word Lial3
+Lial3: ldrbt r6, [r0], #1
+ sub r2, r2, #1
+ strb r6, [r1], #1
+Lial2: ldrbt r7, [r0], #1
+ sub r2, r2, #1
+ strb r7, [r1], #1
+Lial1: ldrbt r6, [r0], #1
+ sub r2, r2, #1
+ strb r6, [r1], #1
+Lialend:
+
+ /*
+ * If few bytes left, finish slow.
+ */
+ cmp r2, #0x08
+ blt Licleanup
+
+ /*
+ * If source is not aligned, finish slow.
+ */
+ ands r3, r0, #0x03
+ bne Licleanup
+
+ cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */
+ blt Licleanup8
+
+ /*
+ * Align destination to cacheline boundary.
+ * If source and destination are nicely aligned, this can be a big
+ * win. If not, it's still cheaper to copy in groups of 32 even if
+ * we don't get the nice cacheline alignment.
+ */
+ and r6, r1, #0x1f
+ ldr pc, [pc, r6]
+ b Licaligned
+ .word Licaligned
+ .word Lical4
+ .word Lical8
+ .word Lical12
+ .word Lical16
+ .word Lical20
+ .word Lical24
+ .word Lical28
+Lical28:ldrt r6, [r0], #4
+ sub r2, r2, #4
+ str r6, [r1], #4
+Lical24:ldrt r7, [r0], #4
+ sub r2, r2, #4
+ str r7, [r1], #4
+Lical20:ldrt r6, [r0], #4
+ sub r2, r2, #4
+ str r6, [r1], #4
+Lical16:ldrt r7, [r0], #4
+ sub r2, r2, #4
+ str r7, [r1], #4
+Lical12:ldrt r6, [r0], #4
+ sub r2, r2, #4
+ str r6, [r1], #4
+Lical8: ldrt r7, [r0], #4
+ sub r2, r2, #4
+ str r7, [r1], #4
+Lical4: ldrt r6, [r0], #4
+ sub r2, r2, #4
+ str r6, [r1], #4
+
+ /*
+ * We start with > 0x40 bytes to copy (>= 0x60 got us into this
+ * part of the code, and we may have knocked that down by as much
Home |
Main Index |
Thread Index |
Old Index