Subject: port-arm/23028: memcpy and friends bronken on ARMEB
To: None <gnats-bugs@gnats.netbsd.org>
From: Shoichi Miyake <smi@sm.sony.co.jp>
List: netbsd-bugs
Date: 10/01/2003 22:32:07
>Number: 23028
>Category: port-arm
>Synopsis: memcpy and friends bronken on ARMEB
>Confidential: no
>Severity: serious
>Priority: medium
>Responsible: port-arm-maintainer
>State: open
>Class: sw-bug
>Submitter-Id: net
>Arrival-Date: Wed Oct 01 13:33:00 UTC 2003
>Closed-Date:
>Last-Modified:
>Originator: Shoichi Miyake
>Release: NetBSD 1.6ZC
>Organization:
Sony Corporation
>Environment:
System: NetBSD zao1 1.6ZC NetBSD 1.6ZC (ZAO425) #35: Wed Oct 1 21:23:31 JST 2003 smi@mandolin:/work/nb/arch/evbarm/obj/sys/arch/evbarm/compile/ZAO425 evbarm
Architecture: armeb
Machine: evbarm
>Description:
memcpy and friends broken on ARMEB platform.
>How-To-Repeat:
For example, build a mdroot kernel and boot to see how single
user shell acts. Both input and output are garbage.
>Fix:
I have to change following files for my evbarm ZAO425 to work,
and there seems to be no problem with diskless multiuser mode, for now.
- lib/libc/arch/arm/string/_memcpy.S
- sys/lib/libkern/arch/arm/memcpy.S
I tested aligned, unaligned{1,2,3}{src,dst}, and forward, backward
copy.
Best Redgard,
Shoichi Miyake
Index: lib/libc/arch/arm/string/_memcpy.S
===================================================================
RCS file: /cvsroot/src/lib/libc/arch/arm/string/_memcpy.S,v
retrieving revision 1.4
diff -u -r1.4 _memcpy.S
--- lib/libc/arch/arm/string/_memcpy.S 2003/04/05 23:08:52 1.4
+++ lib/libc/arch/arm/string/_memcpy.S 2003/10/01 12:30:35
@@ -177,6 +177,17 @@
stmdb sp!, {r4, r5}
.Lmemcpy_fsrcul1loop16:
+#ifdef __ARMEB__
+ mov r3, lr, lsl #8
+ ldmia r1!, {r4, r5, r12, lr}
+ orr r3, r3, r4, lsr #24
+ mov r4, r4, lsl #8
+ orr r4, r4, r5, lsr #24
+ mov r5, r5, lsl #8
+ orr r5, r5, r12, lsr #24
+ mov r12, r12, lsl #8
+ orr r12, r12, lr, lsr #24
+#else
mov r3, lr, lsr #8
ldmia r1!, {r4, r5, r12, lr}
orr r3, r3, r4, lsl #24
@@ -186,6 +197,7 @@
orr r5, r5, r12, lsl #24
mov r12, r12, lsr #8
orr r12, r12, lr, lsl #24
+#endif
stmia r0!, {r3-r5, r12}
subs r2, r2, #0x10
bge .Lmemcpy_fsrcul1loop16
@@ -194,9 +206,15 @@
blt .Lmemcpy_fsrcul1l4
.Lmemcpy_fsrcul1loop4:
+#ifdef __ARMEB__
+ mov r12, lr, lsl #8
+ ldr lr, [r1], #4
+ orr r12, r12, lr, lsr #24
+#else
mov r12, lr, lsr #8
ldr lr, [r1], #4
orr r12, r12, lr, lsl #24
+#endif
str r12, [r0], #4
subs r2, r2, #4
bge .Lmemcpy_fsrcul1loop4
@@ -212,6 +230,17 @@
stmdb sp!, {r4, r5}
.Lmemcpy_fsrcul2loop16:
+#ifdef __ARMEB__
+ mov r3, lr, lsl #16
+ ldmia r1!, {r4, r5, r12, lr}
+ orr r3, r3, r4, lsr #16
+ mov r4, r4, lsl #16
+ orr r4, r4, r5, lsr #16
+ mov r5, r5, lsl #16
+ orr r5, r5, r12, lsr #16
+ mov r12, r12, lsl #16
+ orr r12, r12, lr, lsr #16
+#else
mov r3, lr, lsr #16
ldmia r1!, {r4, r5, r12, lr}
orr r3, r3, r4, lsl #16
@@ -221,6 +250,7 @@
orr r5, r5, r12, lsl #16
mov r12, r12, lsr #16
orr r12, r12, lr, lsl #16
+#endif
stmia r0!, {r3-r5, r12}
subs r2, r2, #0x10
bge .Lmemcpy_fsrcul2loop16
@@ -229,9 +259,15 @@
blt .Lmemcpy_fsrcul2l4
.Lmemcpy_fsrcul2loop4:
+#ifdef __ARMEB__
+ mov r12, lr, lsl #16
+ ldr lr, [r1], #4
+ orr r12, r12, lr, lsr #16
+#else
mov r12, lr, lsr #16
ldr lr, [r1], #4
orr r12, r12, lr, lsl #16
+#endif
str r12, [r0], #4
subs r2, r2, #4
bge .Lmemcpy_fsrcul2loop4
@@ -247,6 +283,17 @@
stmdb sp!, {r4, r5}
.Lmemcpy_fsrcul3loop16:
+#ifdef __ARMEB__
+ mov r3, lr, lsl #24
+ ldmia r1!, {r4, r5, r12, lr}
+ orr r3, r3, r4, lsr #8
+ mov r4, r4, lsl #24
+ orr r4, r4, r5, lsr #8
+ mov r5, r5, lsl #24
+ orr r5, r5, r12, lsr #8
+ mov r12, r12, lsl #24
+ orr r12, r12, lr, lsr #8
+#else
mov r3, lr, lsr #24
ldmia r1!, {r4, r5, r12, lr}
orr r3, r3, r4, lsl #8
@@ -256,6 +303,7 @@
orr r5, r5, r12, lsl #8
mov r12, r12, lsr #24
orr r12, r12, lr, lsl #8
+#endif
stmia r0!, {r3-r5, r12}
subs r2, r2, #0x10
bge .Lmemcpy_fsrcul3loop16
@@ -264,9 +312,15 @@
blt .Lmemcpy_fsrcul3l4
.Lmemcpy_fsrcul3loop4:
+#ifdef __ARMEB__
+ mov r12, lr, lsl #24
+ ldr lr, [r1], #4
+ orr r12, r12, lr, lsr #8
+#else
mov r12, lr, lsr #24
ldr lr, [r1], #4
orr r12, r12, lr, lsl #8
+#endif
str r12, [r0], #4
subs r2, r2, #4
bge .Lmemcpy_fsrcul3loop4
@@ -369,6 +423,17 @@
stmdb sp!, {r4, r5, lr}
.Lmemcpy_bsrcul3loop16:
+#ifdef __ARMEB__
+ mov lr, r3, lsr #8
+ ldmdb r1!, {r3-r5, r12}
+ orr lr, lr, r12, lsl #24
+ mov r12, r12, lsr #8
+ orr r12, r12, r5, lsl #24
+ mov r5, r5, lsr #8
+ orr r5, r5, r4, lsl #24
+ mov r4, r4, lsr #8
+ orr r4, r4, r3, lsl #24
+#else
mov lr, r3, lsl #8
ldmdb r1!, {r3-r5, r12}
orr lr, lr, r12, lsr #24
@@ -378,6 +443,7 @@
orr r5, r5, r4, lsr #24
mov r4, r4, lsl #8
orr r4, r4, r3, lsr #24
+#endif
stmdb r0!, {r4, r5, r12, lr}
subs r2, r2, #0x10
bge .Lmemcpy_bsrcul3loop16
@@ -386,9 +452,15 @@
blt .Lmemcpy_bsrcul3l4
.Lmemcpy_bsrcul3loop4:
+#ifdef __ARMEB__
+ mov r12, r3, lsr #8
+ ldr r3, [r1, #-4]!
+ orr r12, r12, r3, lsl #24
+#else
mov r12, r3, lsl #8
ldr r3, [r1, #-4]!
orr r12, r12, r3, lsr #24
+#endif
str r12, [r0, #-4]!
subs r2, r2, #4
bge .Lmemcpy_bsrcul3loop4
@@ -404,6 +476,17 @@
stmdb sp!, {r4, r5, lr}
.Lmemcpy_bsrcul2loop16:
+#ifdef __ARMEB__
+ mov lr, r3, lsr #16
+ ldmdb r1!, {r3-r5, r12}
+ orr lr, lr, r12, lsl #16
+ mov r12, r12, lsr #16
+ orr r12, r12, r5, lsl #16
+ mov r5, r5, lsr #16
+ orr r5, r5, r4, lsl #16
+ mov r4, r4, lsr #16
+ orr r4, r4, r3, lsl #16
+#else
mov lr, r3, lsl #16
ldmdb r1!, {r3-r5, r12}
orr lr, lr, r12, lsr #16
@@ -413,6 +496,7 @@
orr r5, r5, r4, lsr #16
mov r4, r4, lsl #16
orr r4, r4, r3, lsr #16
+#endif
stmdb r0!, {r4, r5, r12, lr}
subs r2, r2, #0x10
bge .Lmemcpy_bsrcul2loop16
@@ -421,9 +505,15 @@
blt .Lmemcpy_bsrcul2l4
.Lmemcpy_bsrcul2loop4:
+#ifdef __ARMEB__
+ mov r12, r3, lsr #16
+ ldr r3, [r1, #-4]!
+ orr r12, r12, r3, lsl #16
+#else
mov r12, r3, lsl #16
ldr r3, [r1, #-4]!
orr r12, r12, r3, lsr #16
+#endif
str r12, [r0, #-4]!
subs r2, r2, #4
bge .Lmemcpy_bsrcul2loop4
@@ -439,6 +529,17 @@
stmdb sp!, {r4, r5, lr}
.Lmemcpy_bsrcul1loop32:
+#ifdef __ARMEB__
+ mov lr, r3, lsr #24
+ ldmdb r1!, {r3-r5, r12}
+ orr lr, lr, r12, lsl #8
+ mov r12, r12, lsr #24
+ orr r12, r12, r5, lsl #8
+ mov r5, r5, lsr #24
+ orr r5, r5, r4, lsl #8
+ mov r4, r4, lsr #24
+ orr r4, r4, r3, lsl #8
+#else
mov lr, r3, lsl #24
ldmdb r1!, {r3-r5, r12}
orr lr, lr, r12, lsr #8
@@ -448,6 +549,7 @@
orr r5, r5, r4, lsr #8
mov r4, r4, lsl #24
orr r4, r4, r3, lsr #8
+#endif
stmdb r0!, {r4, r5, r12, lr}
subs r2, r2, #0x10
bge .Lmemcpy_bsrcul1loop32
@@ -456,9 +558,15 @@
blt .Lmemcpy_bsrcul1l4
.Lmemcpy_bsrcul1loop4:
+#ifdef __ARMEB__
+ mov r12, r3, lsr #24
+ ldr r3, [r1, #-4]!
+ orr r12, r12, r3, lsl #8
+#else
mov r12, r3, lsl #24
ldr r3, [r1, #-4]!
orr r12, r12, r3, lsr #8
+#endif
str r12, [r0, #-4]!
subs r2, r2, #4
bge .Lmemcpy_bsrcul1loop4
Index: sys/lib/libkern/arch/arm/memcpy.S
===================================================================
RCS file: /cvsroot/src/sys/lib/libkern/arch/arm/memcpy.S,v
retrieving revision 1.4
diff -u -r1.4 memcpy.S
--- sys/lib/libkern/arch/arm/memcpy.S 2003/04/05 23:27:15 1.4
+++ sys/lib/libkern/arch/arm/memcpy.S 2003/10/01 12:12:05
@@ -185,6 +185,17 @@
stmdb sp!, {r4, r5}
.Lmemcpy_fsrcul1loop16:
+#ifdef __ARMEB__
+ mov r3, lr, lsl #8
+ ldmia r1!, {r4, r5, r12, lr}
+ orr r3, r3, r4, lsr #24
+ mov r4, r4, lsl #8
+ orr r4, r4, r5, lsr #24
+ mov r5, r5, lsl #8
+ orr r5, r5, r12, lsr #24
+ mov r12, r12, lsl #8
+ orr r12, r12, lr, lsr #24
+#else
mov r3, lr, lsr #8
ldmia r1!, {r4, r5, r12, lr}
orr r3, r3, r4, lsl #24
@@ -194,6 +205,7 @@
orr r5, r5, r12, lsl #24
mov r12, r12, lsr #8
orr r12, r12, lr, lsl #24
+#endif
stmia r0!, {r3-r5, r12}
subs r2, r2, #0x10
bge .Lmemcpy_fsrcul1loop16
@@ -202,9 +214,15 @@
blt .Lmemcpy_fsrcul1l4
.Lmemcpy_fsrcul1loop4:
+#ifdef __ARMEB__
+ mov r12, lr, lsl #8
+ ldr lr, [r1], #4
+ orr r12, r12, lr, lsr #24
+#else
mov r12, lr, lsr #8
ldr lr, [r1], #4
orr r12, r12, lr, lsl #24
+#endif
str r12, [r0], #4
subs r2, r2, #4
bge .Lmemcpy_fsrcul1loop4
@@ -220,6 +238,17 @@
stmdb sp!, {r4, r5}
.Lmemcpy_fsrcul2loop16:
+#ifdef __ARMEB__
+ mov r3, lr, lsl #16
+ ldmia r1!, {r4, r5, r12, lr}
+ orr r3, r3, r4, lsr #16
+ mov r4, r4, lsl #16
+ orr r4, r4, r5, lsr #16
+ mov r5, r5, lsl #16
+ orr r5, r5, r12, lsr #16
+ mov r12, r12, lsl #16
+ orr r12, r12, lr, lsr #16
+#else
mov r3, lr, lsr #16
ldmia r1!, {r4, r5, r12, lr}
orr r3, r3, r4, lsl #16
@@ -229,6 +258,7 @@
orr r5, r5, r12, lsl #16
mov r12, r12, lsr #16
orr r12, r12, lr, lsl #16
+#endif
stmia r0!, {r3-r5, r12}
subs r2, r2, #0x10
bge .Lmemcpy_fsrcul2loop16
@@ -237,9 +267,15 @@
blt .Lmemcpy_fsrcul2l4
.Lmemcpy_fsrcul2loop4:
+#ifdef __ARMEB__
+ mov r12, lr, lsl #16
+ ldr lr, [r1], #4
+ orr r12, r12, lr, lsr #16
+#else
mov r12, lr, lsr #16
ldr lr, [r1], #4
orr r12, r12, lr, lsl #16
+#endif
str r12, [r0], #4
subs r2, r2, #4
bge .Lmemcpy_fsrcul2loop4
@@ -255,6 +291,17 @@
stmdb sp!, {r4, r5}
.Lmemcpy_fsrcul3loop16:
+#ifdef __ARMEB__
+ mov r3, lr, lsl #24
+ ldmia r1!, {r4, r5, r12, lr}
+ orr r3, r3, r4, lsr #8
+ mov r4, r4, lsl #24
+ orr r4, r4, r5, lsr #8
+ mov r5, r5, lsl #24
+ orr r5, r5, r12, lsr #8
+ mov r12, r12, lsl #24
+ orr r12, r12, lr, lsr #8
+#else
mov r3, lr, lsr #24
ldmia r1!, {r4, r5, r12, lr}
orr r3, r3, r4, lsl #8
@@ -264,6 +311,7 @@
orr r5, r5, r12, lsl #8
mov r12, r12, lsr #24
orr r12, r12, lr, lsl #8
+#endif
stmia r0!, {r3-r5, r12}
subs r2, r2, #0x10
bge .Lmemcpy_fsrcul3loop16
@@ -272,9 +320,15 @@
blt .Lmemcpy_fsrcul3l4
.Lmemcpy_fsrcul3loop4:
+#ifdef __ARMEB__
+ mov r12, lr, lsl #24
+ ldr lr, [r1], #4
+ orr r12, r12, lr, lsr #8
+#else
mov r12, lr, lsr #24
ldr lr, [r1], #4
orr r12, r12, lr, lsl #8
+#endif
str r12, [r0], #4
subs r2, r2, #4
bge .Lmemcpy_fsrcul3loop4
@@ -377,6 +431,17 @@
stmdb sp!, {r4, r5}
.Lmemcpy_bsrcul3loop16:
+#ifdef __ARMEB__
+ mov lr, r3, lsr #8
+ ldmdb r1!, {r3-r5, r12}
+ orr lr, lr, r12, lsl #24
+ mov r12, r12, lsr #8
+ orr r12, r12, r5, lsl #24
+ mov r5, r5, lsr #8
+ orr r5, r5, r4, lsl #24
+ mov r4, r4, lsr #8
+ orr r4, r4, r3, lsl #24
+#else
mov lr, r3, lsl #8
ldmdb r1!, {r3-r5, r12}
orr lr, lr, r12, lsr #24
@@ -386,6 +451,7 @@
orr r5, r5, r4, lsr #24
mov r4, r4, lsl #8
orr r4, r4, r3, lsr #24
+#endif
stmdb r0!, {r4, r5, r12, lr}
subs r2, r2, #0x10
bge .Lmemcpy_bsrcul3loop16
@@ -394,9 +460,15 @@
blt .Lmemcpy_bsrcul3l4
.Lmemcpy_bsrcul3loop4:
+#ifdef __ARMEB__
+ mov r12, r3, lsr #8
+ ldr r3, [r1, #-4]!
+ orr r12, r12, r3, lsl #24
+#else
mov r12, r3, lsl #8
ldr r3, [r1, #-4]!
orr r12, r12, r3, lsr #24
+#endif
str r12, [r0, #-4]!
subs r2, r2, #4
bge .Lmemcpy_bsrcul3loop4
@@ -412,6 +484,17 @@
stmdb sp!, {r4, r5}
.Lmemcpy_bsrcul2loop16:
+#ifdef __ARMEB__
+ mov lr, r3, lsr #16
+ ldmdb r1!, {r3-r5, r12}
+ orr lr, lr, r12, lsl #16
+ mov r12, r12, lsr #16
+ orr r12, r12, r5, lsl #16
+ mov r5, r5, lsr #16
+ orr r5, r5, r4, lsl #16
+ mov r4, r4, lsr #16
+ orr r4, r4, r3, lsl #16
+#else
mov lr, r3, lsl #16
ldmdb r1!, {r3-r5, r12}
orr lr, lr, r12, lsr #16
@@ -421,6 +504,7 @@
orr r5, r5, r4, lsr #16
mov r4, r4, lsl #16
orr r4, r4, r3, lsr #16
+#endif
stmdb r0!, {r4, r5, r12, lr}
subs r2, r2, #0x10
bge .Lmemcpy_bsrcul2loop16
@@ -429,9 +513,15 @@
blt .Lmemcpy_bsrcul2l4
.Lmemcpy_bsrcul2loop4:
+#ifdef __ARMEB__
+ mov r12, r3, lsr #16
+ ldr r3, [r1, #-4]!
+ orr r12, r12, r3, lsl #16
+#else
mov r12, r3, lsl #16
ldr r3, [r1, #-4]!
orr r12, r12, r3, lsr #16
+#endif
str r12, [r0, #-4]!
subs r2, r2, #4
bge .Lmemcpy_bsrcul2loop4
@@ -447,6 +537,17 @@
stmdb sp!, {r4, r5}
.Lmemcpy_bsrcul1loop32:
+#ifdef __ARMEB__
+ mov lr, r3, lsr #24
+ ldmdb r1!, {r3-r5, r12}
+ orr lr, lr, r12, lsl #8
+ mov r12, r12, lsr #24
+ orr r12, r12, r5, lsl #8
+ mov r5, r5, lsr #24
+ orr r5, r5, r4, lsl #8
+ mov r4, r4, lsr #24
+ orr r4, r4, r3, lsl #8
+#else
mov lr, r3, lsl #24
ldmdb r1!, {r3-r5, r12}
orr lr, lr, r12, lsr #8
@@ -456,6 +557,7 @@
orr r5, r5, r4, lsr #8
mov r4, r4, lsl #24
orr r4, r4, r3, lsr #8
+#endif
stmdb r0!, {r4, r5, r12, lr}
subs r2, r2, #0x10
bge .Lmemcpy_bsrcul1loop32
@@ -464,9 +566,15 @@
blt .Lmemcpy_bsrcul1l4
.Lmemcpy_bsrcul1loop4:
+#ifdef __ARMEB__
+ mov r12, r3, lsr #24
+ ldr r3, [r1, #-4]!
+ orr r12, r12, r3, lsl #8
+#else
mov r12, r3, lsl #24
ldr r3, [r1, #-4]!
orr r12, r12, r3, lsr #8
+#endif
str r12, [r0, #-4]!
subs r2, r2, #4
bge .Lmemcpy_bsrcul1loop4
>Release-Note:
>Audit-Trail:
>Unformatted: