[src/trunk]: src/sys/arch/arm32/arm32 Slightly smaller code and tune for Stro...

To: source-changes-hg%NetBSD.org@localhost
Subject: [src/trunk]: src/sys/arch/arm32/arm32 Slightly smaller code and tune for Stro...
From: rearnsha <rearnsha%NetBSD.org@localhost>
Date: Fri, 24 Jan 2020 08:56:45 +0000
details:   https://anonhg.NetBSD.org/src/rev/796ead72df89
branches:  trunk
changeset: 507313:796ead72df89
user:      rearnsha <rearnsha%NetBSD.org@localhost>
date:      Mon Mar 19 22:51:51 2001 +0000

description:
Slightly smaller code and tune for StrongARM.

diffstat:

 sys/arch/arm32/arm32/blockio.S |  250 ++++++++++++++++++++--------------------
 1 files changed, 122 insertions(+), 128 deletions(-)

diffs (truncated from 477 to 300 lines):

diff -r 9787cce59d55 -r 796ead72df89 sys/arch/arm32/arm32/blockio.S
--- a/sys/arch/arm32/arm32/blockio.S    Mon Mar 19 22:33:35 2001 +0000
+++ b/sys/arch/arm32/arm32/blockio.S    Mon Mar 19 22:51:51 2001 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: blockio.S,v 1.9 1999/10/26 06:53:41 cgd Exp $  */
+/*     $NetBSD: blockio.S,v 1.10 2001/03/19 22:51:51 rearnsha Exp $    */
 
 /*
  * Copyright (c) 1994 Mark Brinicombe.
@@ -41,6 +41,8 @@
  * optimised block read/write from/to IO routines.
  *
  * Created      : 08/10/94
+ * Modified    : 22/01/99  -- R.Earnshaw
+ *                            Faster, and small tweaks for StrongARM   
  */
 
 #include <machine/asm.h>
@@ -68,10 +70,10 @@
 
 inswloop:
        ldr     r3, [r0]
+       subs    r2, r2, #0x00000001     /* Loop test in load delay slot */
        strb    r3, [r1], #0x0001
        mov     r3, r3, lsr #8
        strb    r3, [r1], #0x0001
-       subs    r2, r2, #0x00000001
        bgt     inswloop
 
        mov     pc, lr
@@ -79,20 +81,17 @@
 /* Word aligned insw */
 
 fastinsw:
-       stmfd   sp!, {r4}
 
 fastinswloop:
        ldr     r3, [r0, #0x0002]       /* take advantage of nonaligned
                                         * word accesses */
-       ldr     r4, [r0]
+       ldr     ip, [r0]
        mov     r3, r3, lsr #16         /* Put the two shorts together */
-       orr     r3, r3, r4, lsl #16
+       orr     r3, r3, ip, lsl #16
        str     r3, [r1], #0x0004       /* Store */
        subs    r2, r2, #0x00000002     /* Next */
        bgt     fastinswloop
 
-       ldmfd   sp!, {r4}
-
        mov     pc, lr
 
 
@@ -117,42 +116,43 @@
 
 /* Non aligned outsw */
 
-       stmfd   sp!, {r4}
-
 outswloop:
        ldrb    r3, [r1], #0x0001
-       ldrb    r4, [r1], #0x0001
-       orr     r3, r3, r4, lsl #8
+       ldrb    ip, [r1], #0x0001
+       subs    r2, r2, #0x00000001     /* Loop test in load delay slot */
+       orr     r3, r3, ip, lsl #8
        orr     r3, r3, r3, lsl #16
        str     r3, [r0]
-       subs    r2, r2, #0x00000001
        bgt     outswloop
 
-       ldmfd   sp!, {r4}
-
        mov     pc, lr
 
 /* Word aligned outsw */
 
 fastoutsw:
-       stmfd   sp!, {r4}
 
 fastoutswloop:
-       ldr     r3, [r1], #0x0004
+       ldr     r3, [r1], #0x0004       /* r3 = (H)(L) */
+       subs    r2, r2, #0x00000002     /* Loop test in load delay slot */
 
-       mov     r4, r3, lsl #16
-       orr     r4, r4, r4, lsr #16
-       str     r4, [r0]
+       eor     ip, r3, r3, lsr #16     /* ip = (H)(H^L) */
+       eor     r3, r3, ip, lsl #16     /* r3 = (H^H^L)(L) = (L)(L) */
+       eor     ip, ip, r3, lsr #16     /* ip = (H)(H^L^L) = (H)(H) */
 
-       mov     r4, r3, lsr #16
-       orr     r4, r4, r4, lsl #16
-       str     r4, [r0]
+       str     r3, [r0]
+       str     ip, [r0]
+       
+/*     mov     ip, r3, lsl #16
+ *     orr     ip, ip, ip, lsr #16
+ *     str     ip, [r0]
+ *
+ *     mov     ip, r3, lsr #16
+ *     orr     ip, ip, ip, lsl #16
+ *     str     ip, [r0]
+ */
 
-       subs    r2, r2, #0x00000002
        bgt     fastoutswloop
 
-       ldmfd   sp!, {r4}
-
        mov     pc, lr
 
 /*
@@ -170,7 +170,8 @@
        cmp     r2, #0x00000000
        movle   pc, lr
 
-/* If the destination address is word aligned and the size suitably aligned, do it fast */
+/* If the destination address is word aligned and the size suitably
+   aligned, do it fast */
 
        tst     r2, #0x00000007
        tsteq   r1, #0x00000003
@@ -179,40 +180,38 @@
 
 /* Word aligned insw */
 
-       stmfd   sp!, {r4-r7}
+       stmfd   sp!, {r4,r5,lr}
 
 insw16loop:
        ldr     r3, [r0, #0x0002]       /* take advantage of nonaligned
                                         * word accesses */
-       ldr     r7, [r0]
+       ldr     lr, [r0]
        mov     r3, r3, lsr #16         /* Put the two shorts together */
-       orr     r3, r3, r7, lsl #16
+       orr     r3, r3, lr, lsl #16
 
        ldr     r4, [r0, #0x0002]       /* take advantage of nonaligned
                                         * word accesses */
-       ldr     r7, [r0]
+       ldr     lr, [r0]
        mov     r4, r4, lsr #16         /* Put the two shorts together */
-       orr     r4, r4, r7, lsl #16
+       orr     r4, r4, lr, lsl #16
 
        ldr     r5, [r0, #0x0002]       /* take advantage of nonaligned
                                         * word accesses */
-       ldr     r7, [r0]
+       ldr     lr, [r0]
        mov     r5, r5, lsr #16         /* Put the two shorts together */
-       orr     r5, r5, r7, lsl #16
+       orr     r5, r5, lr, lsl #16
 
-       ldr     r6, [r0, #0x0002]       /* take advantage of nonaligned
+       ldr     ip, [r0, #0x0002]       /* take advantage of nonaligned
                                         * word accesses */
-       ldr     r7, [r0]
-       mov     r6, r6, lsr #16         /* Put the two shorts together */
-       orr     r6, r6, r7, lsl #16
+       ldr     lr, [r0]
+       mov     ip, ip, lsr #16         /* Put the two shorts together */
+       orr     ip, ip, lr, lsl #16
 
-       stmia   r1!, {r3-r6}
+       stmia   r1!, {r3-r5,ip}
        subs    r2, r2, #0x00000008     /* Next */
        bgt     insw16loop
 
-       ldmfd   sp!, {r4-r7}
-
-       mov     pc, lr
+       ldmfd   sp!, {r4,r5,pc}         /* Restore regs and go home */
 
 
 /*
@@ -228,7 +227,8 @@
        cmp     r2, #0x00000000
        movle   pc, lr
 
-/* If the destination address is word aligned and the size suitably aligned, do it fast */
+/* If the destination address is word aligned and the size suitably
+   aligned, do it fast */
 
        tst     r2, #0x00000007
        tsteq   r1, #0x00000003
@@ -237,49 +237,48 @@
 
 /* Word aligned outsw */
 
-       stmfd   sp!, {r4-r7}
+       stmfd   sp!, {r4,r5,lr}
 
 outsw16loop:
-       ldmia   r1!, {r4-r7}
-
-       mov     r3, r4, lsl #16
-       orr     r3, r3, r3, lsr #16
-       str     r3, [r0]
+       ldmia   r1!, {r4,r5,ip,lr}
 
-       mov     r3, r4, lsr #16
-       orr     r3, r3, r3, lsl #16
-       str     r3, [r0]
-
-       mov     r3, r5, lsl #16
-       orr     r3, r3, r3, lsr #16
-       str     r3, [r0]
-
-       mov     r3, r5, lsr #16
-       orr     r3, r3, r3, lsl #16
+       eor     r3, r4, r4, lsl #16     /* r3 = (A^B)(B) */
+       eor     r4, r4, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
+       eor     r3, r3, r4, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
        str     r3, [r0]
+       str     r4, [r0]
+       
+/*     mov     r3, r4, lsl #16
+ *     orr     r3, r3, r3, lsr #16
+ *     str     r3, [r0]
+ *
+ *     mov     r3, r4, lsr #16
+ *     orr     r3, r3, r3, lsl #16
+ *     str     r3, [r0]
+ */
 
-       mov     r3, r6, lsl #16
-       orr     r3, r3, r3, lsr #16
-       str     r3, [r0]
-
-       mov     r3, r6, lsr #16
-       orr     r3, r3, r3, lsl #16
+       eor     r3, r5, r5, lsl #16     /* r3 = (A^B)(B) */
+       eor     r5, r5, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
+       eor     r3, r3, r5, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
        str     r3, [r0]
+       str     r5, [r0]
 
-       mov     r3, r7, lsl #16
-       orr     r3, r3, r3, lsr #16
+       eor     r3, ip, ip, lsl #16     /* r3 = (A^B)(B) */
+       eor     ip, ip, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
+       eor     r3, r3, ip, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
        str     r3, [r0]
+       str     ip, [r0]
 
-       mov     r3, r7, lsr #16
-       orr     r3, r3, r3, lsl #16
+       eor     r3, lr, lr, lsl #16     /* r3 = (A^B)(B) */
+       eor     lr, lr, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
+       eor     r3, r3, lr, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
        str     r3, [r0]
+       str     lr, [r0]
 
        subs    r2, r2, #0x00000008
        bgt     outsw16loop
 
-       ldmfd   sp!, {r4-r7}
-
-       mov     pc, lr
+       ldmfd   sp!, {r4,r5,pc}         /* and go home */
 
 /*
  * reads short ints (16 bits) from an I/O address into a block of memory
@@ -297,7 +296,8 @@
        cmp     r2, #0x00000000
        movle   pc, lr
 
-/* If the destination address is word aligned and the size suitably aligned, do it fast */
+/* If the destination address is word aligned and the size suitably
+   aligned, do it fast */
 
        tst     r1, #0x00000003
 
@@ -305,25 +305,25 @@
 
 /* Word aligned insw */
 
-       stmfd   sp!, {r4-r11}
+       stmfd   sp!, {r4-r9,lr}
 
-       mov     r11, #0xff000000
-       orr     r11, r11, #0x00ff0000
+       mov     lr, #0xff000000
+       orr     lr, lr, #0x00ff0000
 
 inswm8_loop8:
        cmp     r2, #8
        bcc     inswm8_l8
 
-       ldmia   r0, {r3-r10}
+       ldmia   r0, {r3-r9,ip}
 
-       bic     r3, r3, r11
+       bic     r3, r3, lr
        orr     r3, r3, r4, lsl #16
-       bic     r5, r5, r11
Prev by Date: [src/trunk]: src/sys/arch/arm26/podulebus These have moved to sys/dev/podulebus.
Next by Date: [src/trunk]: src/sys Move machine-dependent podulebus headers to <machine/pod...
Previous by Thread: [src/trunk]: src/sys/arch/arm26/podulebus These have moved to sys/dev/podulebus.
Next by Thread: [src/trunk]: src/sys Move machine-dependent podulebus headers to <machine/pod...
Indexes:
Home | Main Index | Thread Index | Old Index