Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/netbsd-1-6]: src/sys/arch/arm/arm Pull up revision 1.1 (new, requested b...



details:   https://anonhg.NetBSD.org/src/rev/527d03693bf0
branches:  netbsd-1-6
changeset: 529406:527d03693bf0
user:      he <he%NetBSD.org@localhost>
date:      Mon Nov 18 02:36:47 2002 +0000

description:
Pull up revision 1.1 (new, requested by bjh21 in ticket #689):
  New and much improved version of copyin(), copyout(), and
  kcopy() which works on both 26-bit and 32-bit machines.

diffstat:

 sys/arch/arm/arm/bcopyinout.S |  673 ++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 673 insertions(+), 0 deletions(-)

diffs (truncated from 677 to 300 lines):

diff -r 15c43e0d62e5 -r 527d03693bf0 sys/arch/arm/arm/bcopyinout.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/arm/arm/bcopyinout.S     Mon Nov 18 02:36:47 2002 +0000
@@ -0,0 +1,673 @@
+/*     $NetBSD: bcopyinout.S,v 1.9.4.2 2002/11/18 02:36:47 he Exp $    */
+
+/*
+ * Copyright (c) 2002 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Allen Briggs for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed for the NetBSD Project by
+ *      Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ *    or promote products derived from this software without specific prior
+ *    written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "assym.h"
+
+#include <machine/asm.h>
+#include <sys/errno.h>
+
+       .text
+       .align  0
+
+Lcurpcb:
+       .word _C_LABEL(curpcb)
+
+#ifdef __PROG32
+#define SAVE_REGS      stmfd   sp!, {r4-r11}
+#define RESTORE_REGS   ldmfd   sp!, {r4-r11}
+#else
+/* Need to save R14_svc because it'll get trampled if we take a page fault. */
+#define SAVE_REGS      stmfd   sp!, {r4-r11, r14}
+#define RESTORE_REGS   ldmfd   sp!, {r4-r11, r14}
+#endif
+               
+#if 0 && defined(__XSCALE__)
+#define HELLOCPP #
+#define PREFETCH(rx,o) pld     [ rx , HELLOCPP (o) ]
+#else
+#define PREFETCH(rx,o)
+#endif
+
+/*
+ * r0 = user space address
+ * r1 = kernel space address
+ * r2 = length
+ *
+ * Copies bytes from user space to kernel space
+ *
+ * We save/restore r4-r11:
+ * r4-r11 are scratch
+ */
+ENTRY(copyin)
+       /* Quick exit if length is zero */      
+       teq     r2, #0
+       moveq   r0, #0
+       moveq   pc, lr
+
+       SAVE_REGS
+       ldr     r4, Lcurpcb
+       ldr     r4, [r4]
+
+       ldr     r5, [r4, #PCB_ONFAULT]
+       add     r3, pc, #Lcopyfault - . - 8
+       str     r3, [r4, #PCB_ONFAULT]
+
+       PREFETCH(r0, 0)
+       PREFETCH(r1, 0)
+
+       /*
+        * If not too many bytes, take the slow path.
+        */
+       cmp     r2, #0x08
+       blt     Licleanup
+
+       /*
+        * Align destination to word boundary.
+        */
+       and     r6, r1, #0x3
+       ldr     pc, [pc, r6, lsl #2]
+       b       Lialend
+       .word   Lialend
+       .word   Lial1
+       .word   Lial2
+       .word   Lial3
+Lial3: ldrbt   r6, [r0], #1
+       sub     r2, r2, #1
+       strb    r6, [r1], #1
+Lial2: ldrbt   r7, [r0], #1
+       sub     r2, r2, #1
+       strb    r7, [r1], #1
+Lial1: ldrbt   r6, [r0], #1
+       sub     r2, r2, #1
+       strb    r6, [r1], #1
+Lialend:
+
+       /*
+        * If few bytes left, finish slow.
+        */
+       cmp     r2, #0x08
+       blt     Licleanup
+
+       /*
+        * If source is not aligned, finish slow.
+        */
+       ands    r3, r0, #0x03
+       bne     Licleanup
+
+       cmp     r2, #0x60       /* Must be > 0x5f for unrolled cacheline */
+       blt     Licleanup8
+
+       /*
+        * Align destination to cacheline boundary.
+        * If source and destination are nicely aligned, this can be a big
+        * win.  If not, it's still cheaper to copy in groups of 32 even if
+        * we don't get the nice cacheline alignment.
+        */
+       and     r6, r1, #0x1f
+       ldr     pc, [pc, r6]
+       b       Licaligned
+       .word   Licaligned
+       .word   Lical4
+       .word   Lical8
+       .word   Lical12
+       .word   Lical16
+       .word   Lical20
+       .word   Lical24
+       .word   Lical28
+Lical28:ldrt   r6, [r0], #4
+       sub     r2, r2, #4
+       str     r6, [r1], #4
+Lical24:ldrt   r7, [r0], #4
+       sub     r2, r2, #4
+       str     r7, [r1], #4
+Lical20:ldrt   r6, [r0], #4
+       sub     r2, r2, #4
+       str     r6, [r1], #4
+Lical16:ldrt   r7, [r0], #4
+       sub     r2, r2, #4
+       str     r7, [r1], #4
+Lical12:ldrt   r6, [r0], #4
+       sub     r2, r2, #4
+       str     r6, [r1], #4
+Lical8:        ldrt    r7, [r0], #4
+       sub     r2, r2, #4
+       str     r7, [r1], #4
+Lical4:        ldrt    r6, [r0], #4
+       sub     r2, r2, #4
+       str     r6, [r1], #4
+
+       /*
+        * We start with > 0x40 bytes to copy (>= 0x60 got us into this
+        * part of the code, and we may have knocked that down by as much
+        * as 0x1c getting aligned).
+        *
+        * This loop basically works out to:
+        * do {
+        *      prefetch-next-cacheline(s)
+        *      bytes -= 0x20;
+        *      copy cacheline
+        * } while (bytes >= 0x40);
+        * bytes -= 0x20;
+        * copy cacheline
+        */
+Licaligned:
+       PREFETCH(r0, 32)
+       PREFETCH(r1, 32)
+
+       sub     r2, r2, #0x20
+
+       /* Copy a cacheline */
+       ldrt    r10, [r0], #4
+       ldrt    r11, [r0], #4
+       ldrt    r6, [r0], #4
+       ldrt    r7, [r0], #4
+       ldrt    r8, [r0], #4
+       ldrt    r9, [r0], #4
+       stmia   r1!, {r10-r11}
+       ldrt    r10, [r0], #4
+       ldrt    r11, [r0], #4
+       stmia   r1!, {r6-r11}
+
+       cmp     r2, #0x40
+       bge     Licaligned
+
+       sub     r2, r2, #0x20
+
+       /* Copy a cacheline */
+       ldrt    r10, [r0], #4
+       ldrt    r11, [r0], #4
+       ldrt    r6, [r0], #4
+       ldrt    r7, [r0], #4
+       ldrt    r8, [r0], #4
+       ldrt    r9, [r0], #4
+       stmia   r1!, {r10-r11}
+       ldrt    r10, [r0], #4
+       ldrt    r11, [r0], #4
+       stmia   r1!, {r6-r11}
+
+       cmp     r2, #0x08
+       blt     Liprecleanup
+
+Licleanup8:
+       ldrt    r8, [r0], #4
+       ldrt    r9, [r0], #4
+       sub     r2, r2, #8
+       stmia   r1!, {r8, r9}
+       cmp     r2, #8
+       bge     Licleanup8
+
+Liprecleanup:
+       /*
+        * If we're done, bail.
+        */
+       cmp     r2, #0
+       beq     Lout
+
+Licleanup:
+       and     r6, r2, #0x3
+       ldr     pc, [pc, r6, lsl #2]
+       b       Licend
+       .word   Lic4
+       .word   Lic1
+       .word   Lic2
+       .word   Lic3
+Lic4:  ldrbt   r6, [r0], #1
+       sub     r2, r2, #1
+       strb    r6, [r1], #1
+Lic3:  ldrbt   r7, [r0], #1
+       sub     r2, r2, #1
+       strb    r7, [r1], #1
+Lic2:  ldrbt   r6, [r0], #1
+       sub     r2, r2, #1
+       strb    r6, [r1], #1
+Lic1:  ldrbt   r7, [r0], #1
+       subs    r2, r2, #1
+       strb    r7, [r1], #1
+Licend:
+       bne     Licleanup
+
+Liout:
+       mov     r0, #0
+
+       str     r5, [r4, #PCB_ONFAULT]
+       RESTORE_REGS
+
+       mov     pc, lr
+
+Lcopyfault:
+       mov     r0, #EFAULT
+
+       str     r5, [r4, #PCB_ONFAULT]
+       RESTORE_REGS
+
+       mov     pc, lr
+
+/*
+ * r0 = kernel space address
+ * r1 = user space address
+ * r2 = length
+ *
+ * Copies bytes from kernel space to user space
+ *
+ * We save/restore r4-r11:
+ * r4-r11 are scratch
+ */
+
+ENTRY(copyout)
+       /* Quick exit if length is zero */      
+       teq     r2, #0
+       moveq   r0, #0
+       moveq   pc, lr
+
+       SAVE_REGS



Home | Main Index | Thread Index | Old Index