Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/arm32/arm32 Slightly smaller code and tune for Stro...
details: https://anonhg.NetBSD.org/src/rev/796ead72df89
branches: trunk
changeset: 507313:796ead72df89
user: rearnsha <rearnsha%NetBSD.org@localhost>
date: Mon Mar 19 22:51:51 2001 +0000
description:
Slightly smaller code and tune for StrongARM.
diffstat:
sys/arch/arm32/arm32/blockio.S | 250 ++++++++++++++++++++--------------------
1 files changed, 122 insertions(+), 128 deletions(-)
diffs (truncated from 477 to 300 lines):
diff -r 9787cce59d55 -r 796ead72df89 sys/arch/arm32/arm32/blockio.S
--- a/sys/arch/arm32/arm32/blockio.S Mon Mar 19 22:33:35 2001 +0000
+++ b/sys/arch/arm32/arm32/blockio.S Mon Mar 19 22:51:51 2001 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: blockio.S,v 1.9 1999/10/26 06:53:41 cgd Exp $ */
+/* $NetBSD: blockio.S,v 1.10 2001/03/19 22:51:51 rearnsha Exp $ */
/*
* Copyright (c) 1994 Mark Brinicombe.
@@ -41,6 +41,8 @@
* optimised block read/write from/to IO routines.
*
* Created : 08/10/94
+ * Modified : 22/01/99 -- R.Earnshaw
+ * Faster, and small tweaks for StrongARM
*/
#include <machine/asm.h>
@@ -68,10 +70,10 @@
inswloop:
ldr r3, [r0]
+ subs r2, r2, #0x00000001 /* Loop test in load delay slot */
strb r3, [r1], #0x0001
mov r3, r3, lsr #8
strb r3, [r1], #0x0001
- subs r2, r2, #0x00000001
bgt inswloop
mov pc, lr
@@ -79,20 +81,17 @@
/* Word aligned insw */
fastinsw:
- stmfd sp!, {r4}
fastinswloop:
ldr r3, [r0, #0x0002] /* take advantage of nonaligned
* word accesses */
- ldr r4, [r0]
+ ldr ip, [r0]
mov r3, r3, lsr #16 /* Put the two shorts together */
- orr r3, r3, r4, lsl #16
+ orr r3, r3, ip, lsl #16
str r3, [r1], #0x0004 /* Store */
subs r2, r2, #0x00000002 /* Next */
bgt fastinswloop
- ldmfd sp!, {r4}
-
mov pc, lr
@@ -117,42 +116,43 @@
/* Non aligned outsw */
- stmfd sp!, {r4}
-
outswloop:
ldrb r3, [r1], #0x0001
- ldrb r4, [r1], #0x0001
- orr r3, r3, r4, lsl #8
+ ldrb ip, [r1], #0x0001
+ subs r2, r2, #0x00000001 /* Loop test in load delay slot */
+ orr r3, r3, ip, lsl #8
orr r3, r3, r3, lsl #16
str r3, [r0]
- subs r2, r2, #0x00000001
bgt outswloop
- ldmfd sp!, {r4}
-
mov pc, lr
/* Word aligned outsw */
fastoutsw:
- stmfd sp!, {r4}
fastoutswloop:
- ldr r3, [r1], #0x0004
+ ldr r3, [r1], #0x0004 /* r3 = (H)(L) */
+ subs r2, r2, #0x00000002 /* Loop test in load delay slot */
- mov r4, r3, lsl #16
- orr r4, r4, r4, lsr #16
- str r4, [r0]
+ eor ip, r3, r3, lsr #16 /* ip = (H)(H^L) */
+ eor r3, r3, ip, lsl #16 /* r3 = (H^H^L)(L) = (L)(L) */
+ eor ip, ip, r3, lsr #16 /* ip = (H)(H^L^L) = (H)(H) */
- mov r4, r3, lsr #16
- orr r4, r4, r4, lsl #16
- str r4, [r0]
+ str r3, [r0]
+ str ip, [r0]
+
+/* mov ip, r3, lsl #16
+ * orr ip, ip, ip, lsr #16
+ * str ip, [r0]
+ *
+ * mov ip, r3, lsr #16
+ * orr ip, ip, ip, lsl #16
+ * str ip, [r0]
+ */
- subs r2, r2, #0x00000002
bgt fastoutswloop
- ldmfd sp!, {r4}
-
mov pc, lr
/*
@@ -170,7 +170,8 @@
cmp r2, #0x00000000
movle pc, lr
-/* If the destination address is word aligned and the size suitably aligned, do it fast */
+/* If the destination address is word aligned and the size suitably
+ aligned, do it fast */
tst r2, #0x00000007
tsteq r1, #0x00000003
@@ -179,40 +180,38 @@
/* Word aligned insw */
- stmfd sp!, {r4-r7}
+ stmfd sp!, {r4,r5,lr}
insw16loop:
ldr r3, [r0, #0x0002] /* take advantage of nonaligned
* word accesses */
- ldr r7, [r0]
+ ldr lr, [r0]
mov r3, r3, lsr #16 /* Put the two shorts together */
- orr r3, r3, r7, lsl #16
+ orr r3, r3, lr, lsl #16
ldr r4, [r0, #0x0002] /* take advantage of nonaligned
* word accesses */
- ldr r7, [r0]
+ ldr lr, [r0]
mov r4, r4, lsr #16 /* Put the two shorts together */
- orr r4, r4, r7, lsl #16
+ orr r4, r4, lr, lsl #16
ldr r5, [r0, #0x0002] /* take advantage of nonaligned
* word accesses */
- ldr r7, [r0]
+ ldr lr, [r0]
mov r5, r5, lsr #16 /* Put the two shorts together */
- orr r5, r5, r7, lsl #16
+ orr r5, r5, lr, lsl #16
- ldr r6, [r0, #0x0002] /* take advantage of nonaligned
+ ldr ip, [r0, #0x0002] /* take advantage of nonaligned
* word accesses */
- ldr r7, [r0]
- mov r6, r6, lsr #16 /* Put the two shorts together */
- orr r6, r6, r7, lsl #16
+ ldr lr, [r0]
+ mov ip, ip, lsr #16 /* Put the two shorts together */
+ orr ip, ip, lr, lsl #16
- stmia r1!, {r3-r6}
+ stmia r1!, {r3-r5,ip}
subs r2, r2, #0x00000008 /* Next */
bgt insw16loop
- ldmfd sp!, {r4-r7}
-
- mov pc, lr
+ ldmfd sp!, {r4,r5,pc} /* Restore regs and go home */
/*
@@ -228,7 +227,8 @@
cmp r2, #0x00000000
movle pc, lr
-/* If the destination address is word aligned and the size suitably aligned, do it fast */
+/* If the destination address is word aligned and the size suitably
+ aligned, do it fast */
tst r2, #0x00000007
tsteq r1, #0x00000003
@@ -237,49 +237,48 @@
/* Word aligned outsw */
- stmfd sp!, {r4-r7}
+ stmfd sp!, {r4,r5,lr}
outsw16loop:
- ldmia r1!, {r4-r7}
-
- mov r3, r4, lsl #16
- orr r3, r3, r3, lsr #16
- str r3, [r0]
+ ldmia r1!, {r4,r5,ip,lr}
- mov r3, r4, lsr #16
- orr r3, r3, r3, lsl #16
- str r3, [r0]
-
- mov r3, r5, lsl #16
- orr r3, r3, r3, lsr #16
- str r3, [r0]
-
- mov r3, r5, lsr #16
- orr r3, r3, r3, lsl #16
+ eor r3, r4, r4, lsl #16 /* r3 = (A^B)(B) */
+ eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
+ eor r3, r3, r4, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
str r3, [r0]
+ str r4, [r0]
+
+/* mov r3, r4, lsl #16
+ * orr r3, r3, r3, lsr #16
+ * str r3, [r0]
+ *
+ * mov r3, r4, lsr #16
+ * orr r3, r3, r3, lsl #16
+ * str r3, [r0]
+ */
- mov r3, r6, lsl #16
- orr r3, r3, r3, lsr #16
- str r3, [r0]
-
- mov r3, r6, lsr #16
- orr r3, r3, r3, lsl #16
+ eor r3, r5, r5, lsl #16 /* r3 = (A^B)(B) */
+ eor r5, r5, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
+ eor r3, r3, r5, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
str r3, [r0]
+ str r5, [r0]
- mov r3, r7, lsl #16
- orr r3, r3, r3, lsr #16
+ eor r3, ip, ip, lsl #16 /* r3 = (A^B)(B) */
+ eor ip, ip, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
+ eor r3, r3, ip, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
str r3, [r0]
+ str ip, [r0]
- mov r3, r7, lsr #16
- orr r3, r3, r3, lsl #16
+ eor r3, lr, lr, lsl #16 /* r3 = (A^B)(B) */
+ eor lr, lr, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
+ eor r3, r3, lr, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
str r3, [r0]
+ str lr, [r0]
subs r2, r2, #0x00000008
bgt outsw16loop
- ldmfd sp!, {r4-r7}
-
- mov pc, lr
+ ldmfd sp!, {r4,r5,pc} /* and go home */
/*
* reads short ints (16 bits) from an I/O address into a block of memory
@@ -297,7 +296,8 @@
cmp r2, #0x00000000
movle pc, lr
-/* If the destination address is word aligned and the size suitably aligned, do it fast */
+/* If the destination address is word aligned and the size suitably
+ aligned, do it fast */
tst r1, #0x00000003
@@ -305,25 +305,25 @@
/* Word aligned insw */
- stmfd sp!, {r4-r11}
+ stmfd sp!, {r4-r9,lr}
- mov r11, #0xff000000
- orr r11, r11, #0x00ff0000
+ mov lr, #0xff000000
+ orr lr, lr, #0x00ff0000
inswm8_loop8:
cmp r2, #8
bcc inswm8_l8
- ldmia r0, {r3-r10}
+ ldmia r0, {r3-r9,ip}
- bic r3, r3, r11
+ bic r3, r3, lr
orr r3, r3, r4, lsl #16
- bic r5, r5, r11
Home |
Main Index |
Thread Index |
Old Index