Subject: port-arm/23028: memcpy and friends bronken on ARMEB
To: None <gnats-bugs@gnats.netbsd.org>
From: Shoichi Miyake <smi@sm.sony.co.jp>
List: netbsd-bugs
Date: 10/01/2003 22:32:07
>Number:         23028
>Category:       port-arm
>Synopsis:       memcpy and friends bronken on ARMEB
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    port-arm-maintainer
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Wed Oct 01 13:33:00 UTC 2003
>Closed-Date:
>Last-Modified:
>Originator:     Shoichi Miyake
>Release:        NetBSD 1.6ZC
>Organization:
Sony Corporation
>Environment:
System: NetBSD zao1 1.6ZC NetBSD 1.6ZC (ZAO425) #35: Wed Oct  1 21:23:31 JST 2003  smi@mandolin:/work/nb/arch/evbarm/obj/sys/arch/evbarm/compile/ZAO425 evbarm
Architecture: armeb
Machine: evbarm
>Description:
	memcpy and friends broken on ARMEB platform.
>How-To-Repeat:
	For example, build a mdroot kernel and boot to see how single
user shell acts. Both input and output are garbage.
>Fix:
	I have to change following files for my evbarm ZAO425 to work,
and there seems to be no problem with diskless multiuser mode, for now.
	- lib/libc/arch/arm/string/_memcpy.S
	- sys/lib/libkern/arch/arm/memcpy.S
I tested aligned, unaligned{1,2,3}{src,dst}, and forward, backward
copy.

Best Redgard,
Shoichi Miyake

Index: lib/libc/arch/arm/string/_memcpy.S
===================================================================
RCS file: /cvsroot/src/lib/libc/arch/arm/string/_memcpy.S,v
retrieving revision 1.4
diff -u -r1.4 _memcpy.S
--- lib/libc/arch/arm/string/_memcpy.S	2003/04/05 23:08:52	1.4
+++ lib/libc/arch/arm/string/_memcpy.S	2003/10/01 12:30:35
@@ -177,6 +177,17 @@
 	stmdb	sp!, {r4, r5}
 
 .Lmemcpy_fsrcul1loop16:
+#ifdef __ARMEB__
+	mov	r3, lr, lsl #8
+	ldmia	r1!, {r4, r5, r12, lr}
+	orr	r3, r3, r4, lsr #24
+	mov	r4, r4, lsl #8
+	orr	r4, r4, r5, lsr #24
+	mov	r5, r5, lsl #8
+	orr	r5, r5, r12, lsr #24
+	mov	r12, r12, lsl #8
+	orr	r12, r12, lr, lsr #24
+#else
 	mov	r3, lr, lsr #8
 	ldmia	r1!, {r4, r5, r12, lr}
 	orr	r3, r3, r4, lsl #24
@@ -186,6 +197,7 @@
 	orr	r5, r5, r12, lsl #24
 	mov	r12, r12, lsr #8
 	orr	r12, r12, lr, lsl #24
+#endif
 	stmia	r0!, {r3-r5, r12}
 	subs	r2, r2, #0x10         
 	bge	.Lmemcpy_fsrcul1loop16
@@ -194,9 +206,15 @@
 	blt	.Lmemcpy_fsrcul1l4
 
 .Lmemcpy_fsrcul1loop4:
+#ifdef __ARMEB__
+	mov	r12, lr, lsl #8
+	ldr	lr, [r1], #4
+	orr	r12, r12, lr, lsr #24
+#else
 	mov	r12, lr, lsr #8
 	ldr	lr, [r1], #4
 	orr	r12, r12, lr, lsl #24
+#endif
 	str	r12, [r0], #4
 	subs	r2, r2, #4
 	bge	.Lmemcpy_fsrcul1loop4
@@ -212,6 +230,17 @@
 	stmdb	sp!, {r4, r5}
 
 .Lmemcpy_fsrcul2loop16:
+#ifdef __ARMEB__
+	mov	r3, lr, lsl #16
+	ldmia	r1!, {r4, r5, r12, lr}
+	orr	r3, r3, r4, lsr #16
+	mov	r4, r4, lsl #16
+	orr	r4, r4, r5, lsr #16
+	mov	r5, r5, lsl #16
+	orr	r5, r5, r12, lsr #16
+	mov	r12, r12, lsl #16
+	orr	r12, r12, lr, lsr #16
+#else
 	mov	r3, lr, lsr #16
 	ldmia	r1!, {r4, r5, r12, lr}
 	orr	r3, r3, r4, lsl #16
@@ -221,6 +250,7 @@
 	orr	r5, r5, r12, lsl #16
 	mov	r12, r12, lsr #16
 	orr	r12, r12, lr, lsl #16
+#endif
 	stmia	r0!, {r3-r5, r12}
 	subs	r2, r2, #0x10         
 	bge	.Lmemcpy_fsrcul2loop16
@@ -229,9 +259,15 @@
 	blt	.Lmemcpy_fsrcul2l4
 
 .Lmemcpy_fsrcul2loop4:
+#ifdef __ARMEB__
+	mov	r12, lr, lsl #16
+	ldr	lr, [r1], #4
+	orr	r12, r12, lr, lsr #16
+#else
 	mov	r12, lr, lsr #16
 	ldr	lr, [r1], #4
 	orr	r12, r12, lr, lsl #16
+#endif
 	str	r12, [r0], #4
 	subs	r2, r2, #4
 	bge	.Lmemcpy_fsrcul2loop4
@@ -247,6 +283,17 @@
 	stmdb	sp!, {r4, r5}
 
 .Lmemcpy_fsrcul3loop16:
+#ifdef __ARMEB__
+	mov	r3, lr, lsl #24
+	ldmia	r1!, {r4, r5, r12, lr}
+	orr	r3, r3, r4, lsr #8
+	mov	r4, r4, lsl #24
+	orr	r4, r4, r5, lsr #8
+	mov	r5, r5, lsl #24
+	orr	r5, r5, r12, lsr #8
+	mov	r12, r12, lsl #24
+	orr	r12, r12, lr, lsr #8
+#else
 	mov	r3, lr, lsr #24
 	ldmia	r1!, {r4, r5, r12, lr}
 	orr	r3, r3, r4, lsl #8
@@ -256,6 +303,7 @@
 	orr	r5, r5, r12, lsl #8
 	mov	r12, r12, lsr #24
 	orr	r12, r12, lr, lsl #8
+#endif
 	stmia	r0!, {r3-r5, r12}
 	subs	r2, r2, #0x10         
 	bge	.Lmemcpy_fsrcul3loop16
@@ -264,9 +312,15 @@
 	blt	.Lmemcpy_fsrcul3l4
 
 .Lmemcpy_fsrcul3loop4:
+#ifdef __ARMEB__
+	mov	r12, lr, lsl #24
+	ldr	lr, [r1], #4
+	orr	r12, r12, lr, lsr #8
+#else
 	mov	r12, lr, lsr #24
 	ldr	lr, [r1], #4
 	orr	r12, r12, lr, lsl #8
+#endif
 	str	r12, [r0], #4
 	subs	r2, r2, #4
 	bge	.Lmemcpy_fsrcul3loop4
@@ -369,6 +423,17 @@
 	stmdb	sp!, {r4, r5, lr}
 
 .Lmemcpy_bsrcul3loop16:
+#ifdef __ARMEB__
+	mov	lr, r3, lsr #8
+	ldmdb	r1!, {r3-r5, r12}
+	orr	lr, lr, r12, lsl #24
+	mov	r12, r12, lsr #8
+	orr	r12, r12, r5, lsl #24
+	mov	r5, r5, lsr #8
+	orr	r5, r5, r4, lsl #24
+	mov	r4, r4, lsr #8
+	orr	r4, r4, r3, lsl #24
+#else
 	mov	lr, r3, lsl #8
 	ldmdb	r1!, {r3-r5, r12}
 	orr	lr, lr, r12, lsr #24
@@ -378,6 +443,7 @@
 	orr	r5, r5, r4, lsr #24
 	mov	r4, r4, lsl #8
 	orr	r4, r4, r3, lsr #24
+#endif
 	stmdb	r0!, {r4, r5, r12, lr}
 	subs	r2, r2, #0x10         
 	bge	.Lmemcpy_bsrcul3loop16
@@ -386,9 +452,15 @@
 	blt	.Lmemcpy_bsrcul3l4
 
 .Lmemcpy_bsrcul3loop4:
+#ifdef __ARMEB__
+	mov	r12, r3, lsr #8
+	ldr	r3, [r1, #-4]!
+	orr	r12, r12, r3, lsl #24
+#else
 	mov	r12, r3, lsl #8
 	ldr	r3, [r1, #-4]!
 	orr	r12, r12, r3, lsr #24
+#endif
 	str	r12, [r0, #-4]!
 	subs	r2, r2, #4
 	bge	.Lmemcpy_bsrcul3loop4
@@ -404,6 +476,17 @@
 	stmdb	sp!, {r4, r5, lr}
 
 .Lmemcpy_bsrcul2loop16:
+#ifdef __ARMEB__
+	mov	lr, r3, lsr #16
+	ldmdb	r1!, {r3-r5, r12}
+	orr	lr, lr, r12, lsl #16
+	mov	r12, r12, lsr #16
+	orr	r12, r12, r5, lsl #16
+	mov	r5, r5, lsr #16
+	orr	r5, r5, r4, lsl #16
+	mov	r4, r4, lsr #16
+	orr	r4, r4, r3, lsl #16
+#else
 	mov	lr, r3, lsl #16
 	ldmdb	r1!, {r3-r5, r12}
 	orr	lr, lr, r12, lsr #16
@@ -413,6 +496,7 @@
 	orr	r5, r5, r4, lsr #16
 	mov	r4, r4, lsl #16
 	orr	r4, r4, r3, lsr #16
+#endif
 	stmdb	r0!, {r4, r5, r12, lr}
 	subs	r2, r2, #0x10         
 	bge	.Lmemcpy_bsrcul2loop16
@@ -421,9 +505,15 @@
 	blt	.Lmemcpy_bsrcul2l4
 
 .Lmemcpy_bsrcul2loop4:
+#ifdef __ARMEB__
+	mov	r12, r3, lsr #16
+	ldr	r3, [r1, #-4]!
+	orr	r12, r12, r3, lsl #16
+#else
 	mov	r12, r3, lsl #16
 	ldr	r3, [r1, #-4]!
 	orr	r12, r12, r3, lsr #16
+#endif
 	str	r12, [r0, #-4]!
 	subs	r2, r2, #4
 	bge	.Lmemcpy_bsrcul2loop4
@@ -439,6 +529,17 @@
 	stmdb	sp!, {r4, r5, lr}
 
 .Lmemcpy_bsrcul1loop32:
+#ifdef __ARMEB__
+	mov	lr, r3, lsr #24
+	ldmdb	r1!, {r3-r5, r12}
+	orr	lr, lr, r12, lsl #8
+	mov	r12, r12, lsr #24
+	orr	r12, r12, r5, lsl #8
+	mov	r5, r5, lsr #24
+	orr	r5, r5, r4, lsl #8
+	mov	r4, r4, lsr #24
+	orr	r4, r4, r3, lsl #8
+#else
 	mov	lr, r3, lsl #24
 	ldmdb	r1!, {r3-r5, r12}
 	orr	lr, lr, r12, lsr #8
@@ -448,6 +549,7 @@
 	orr	r5, r5, r4, lsr #8
 	mov	r4, r4, lsl #24
 	orr	r4, r4, r3, lsr #8
+#endif
 	stmdb	r0!, {r4, r5, r12, lr}
 	subs	r2, r2, #0x10         
 	bge	.Lmemcpy_bsrcul1loop32
@@ -456,9 +558,15 @@
 	blt	.Lmemcpy_bsrcul1l4
 
 .Lmemcpy_bsrcul1loop4:
+#ifdef __ARMEB__
+	mov	r12, r3, lsr #24
+	ldr	r3, [r1, #-4]!
+	orr	r12, r12, r3, lsl #8
+#else
 	mov	r12, r3, lsl #24
 	ldr	r3, [r1, #-4]!
 	orr	r12, r12, r3, lsr #8
+#endif
 	str	r12, [r0, #-4]!
 	subs	r2, r2, #4
 	bge	.Lmemcpy_bsrcul1loop4
Index: sys/lib/libkern/arch/arm/memcpy.S
===================================================================
RCS file: /cvsroot/src/sys/lib/libkern/arch/arm/memcpy.S,v
retrieving revision 1.4
diff -u -r1.4 memcpy.S
--- sys/lib/libkern/arch/arm/memcpy.S	2003/04/05 23:27:15	1.4
+++ sys/lib/libkern/arch/arm/memcpy.S	2003/10/01 12:12:05
@@ -185,6 +185,17 @@
 	stmdb	sp!, {r4, r5}
 
 .Lmemcpy_fsrcul1loop16:
+#ifdef __ARMEB__
+	mov	r3, lr, lsl #8
+	ldmia	r1!, {r4, r5, r12, lr}
+	orr	r3, r3, r4, lsr #24
+	mov	r4, r4, lsl #8
+	orr	r4, r4, r5, lsr #24
+	mov	r5, r5, lsl #8
+	orr	r5, r5, r12, lsr #24
+	mov	r12, r12, lsl #8
+	orr	r12, r12, lr, lsr #24
+#else
 	mov	r3, lr, lsr #8
 	ldmia	r1!, {r4, r5, r12, lr}
 	orr	r3, r3, r4, lsl #24
@@ -194,6 +205,7 @@
 	orr	r5, r5, r12, lsl #24
 	mov	r12, r12, lsr #8
 	orr	r12, r12, lr, lsl #24
+#endif
 	stmia	r0!, {r3-r5, r12}
 	subs	r2, r2, #0x10         
 	bge	.Lmemcpy_fsrcul1loop16
@@ -202,9 +214,15 @@
 	blt	.Lmemcpy_fsrcul1l4
 
 .Lmemcpy_fsrcul1loop4:
+#ifdef __ARMEB__
+	mov	r12, lr, lsl #8
+	ldr	lr, [r1], #4
+	orr	r12, r12, lr, lsr #24
+#else
 	mov	r12, lr, lsr #8
 	ldr	lr, [r1], #4
 	orr	r12, r12, lr, lsl #24
+#endif
 	str	r12, [r0], #4
 	subs	r2, r2, #4
 	bge	.Lmemcpy_fsrcul1loop4
@@ -220,6 +238,17 @@
 	stmdb	sp!, {r4, r5}
 
 .Lmemcpy_fsrcul2loop16:
+#ifdef __ARMEB__
+	mov	r3, lr, lsl #16
+	ldmia	r1!, {r4, r5, r12, lr}
+	orr	r3, r3, r4, lsr #16
+	mov	r4, r4, lsl #16
+	orr	r4, r4, r5, lsr #16
+	mov	r5, r5, lsl #16
+	orr	r5, r5, r12, lsr #16
+	mov	r12, r12, lsl #16
+	orr	r12, r12, lr, lsr #16
+#else
 	mov	r3, lr, lsr #16
 	ldmia	r1!, {r4, r5, r12, lr}
 	orr	r3, r3, r4, lsl #16
@@ -229,6 +258,7 @@
 	orr	r5, r5, r12, lsl #16
 	mov	r12, r12, lsr #16
 	orr	r12, r12, lr, lsl #16
+#endif
 	stmia	r0!, {r3-r5, r12}
 	subs	r2, r2, #0x10         
 	bge	.Lmemcpy_fsrcul2loop16
@@ -237,9 +267,15 @@
 	blt	.Lmemcpy_fsrcul2l4
 
 .Lmemcpy_fsrcul2loop4:
+#ifdef __ARMEB__
+	mov	r12, lr, lsl #16
+	ldr	lr, [r1], #4
+	orr	r12, r12, lr, lsr #16
+#else
 	mov	r12, lr, lsr #16
 	ldr	lr, [r1], #4
 	orr	r12, r12, lr, lsl #16
+#endif
 	str	r12, [r0], #4
 	subs	r2, r2, #4
 	bge	.Lmemcpy_fsrcul2loop4
@@ -255,6 +291,17 @@
 	stmdb	sp!, {r4, r5}
 
 .Lmemcpy_fsrcul3loop16:
+#ifdef __ARMEB__
+	mov	r3, lr, lsl #24
+	ldmia	r1!, {r4, r5, r12, lr}
+	orr	r3, r3, r4, lsr #8
+	mov	r4, r4, lsl #24
+	orr	r4, r4, r5, lsr #8
+	mov	r5, r5, lsl #24
+	orr	r5, r5, r12, lsr #8
+	mov	r12, r12, lsl #24
+	orr	r12, r12, lr, lsr #8
+#else
 	mov	r3, lr, lsr #24
 	ldmia	r1!, {r4, r5, r12, lr}
 	orr	r3, r3, r4, lsl #8
@@ -264,6 +311,7 @@
 	orr	r5, r5, r12, lsl #8
 	mov	r12, r12, lsr #24
 	orr	r12, r12, lr, lsl #8
+#endif
 	stmia	r0!, {r3-r5, r12}
 	subs	r2, r2, #0x10         
 	bge	.Lmemcpy_fsrcul3loop16
@@ -272,9 +320,15 @@
 	blt	.Lmemcpy_fsrcul3l4
 
 .Lmemcpy_fsrcul3loop4:
+#ifdef __ARMEB__
+	mov	r12, lr, lsl #24
+	ldr	lr, [r1], #4
+	orr	r12, r12, lr, lsr #8
+#else
 	mov	r12, lr, lsr #24
 	ldr	lr, [r1], #4
 	orr	r12, r12, lr, lsl #8
+#endif
 	str	r12, [r0], #4
 	subs	r2, r2, #4
 	bge	.Lmemcpy_fsrcul3loop4
@@ -377,6 +431,17 @@
 	stmdb	sp!, {r4, r5}
 
 .Lmemcpy_bsrcul3loop16:
+#ifdef __ARMEB__
+	mov	lr, r3, lsr #8
+	ldmdb	r1!, {r3-r5, r12}
+	orr	lr, lr, r12, lsl #24
+	mov	r12, r12, lsr #8
+	orr	r12, r12, r5, lsl #24
+	mov	r5, r5, lsr #8
+	orr	r5, r5, r4, lsl #24
+	mov	r4, r4, lsr #8
+	orr	r4, r4, r3, lsl #24
+#else
 	mov	lr, r3, lsl #8
 	ldmdb	r1!, {r3-r5, r12}
 	orr	lr, lr, r12, lsr #24
@@ -386,6 +451,7 @@
 	orr	r5, r5, r4, lsr #24
 	mov	r4, r4, lsl #8
 	orr	r4, r4, r3, lsr #24
+#endif
 	stmdb	r0!, {r4, r5, r12, lr}
 	subs	r2, r2, #0x10         
 	bge	.Lmemcpy_bsrcul3loop16
@@ -394,9 +460,15 @@
 	blt	.Lmemcpy_bsrcul3l4
 
 .Lmemcpy_bsrcul3loop4:
+#ifdef __ARMEB__
+	mov	r12, r3, lsr #8
+	ldr	r3, [r1, #-4]!
+	orr	r12, r12, r3, lsl #24
+#else
 	mov	r12, r3, lsl #8
 	ldr	r3, [r1, #-4]!
 	orr	r12, r12, r3, lsr #24
+#endif
 	str	r12, [r0, #-4]!
 	subs	r2, r2, #4
 	bge	.Lmemcpy_bsrcul3loop4
@@ -412,6 +484,17 @@
 	stmdb	sp!, {r4, r5}
 
 .Lmemcpy_bsrcul2loop16:
+#ifdef __ARMEB__
+	mov	lr, r3, lsr #16
+	ldmdb	r1!, {r3-r5, r12}
+	orr	lr, lr, r12, lsl #16
+	mov	r12, r12, lsr #16
+	orr	r12, r12, r5, lsl #16
+	mov	r5, r5, lsr #16
+	orr	r5, r5, r4, lsl #16
+	mov	r4, r4, lsr #16
+	orr	r4, r4, r3, lsl #16
+#else
 	mov	lr, r3, lsl #16
 	ldmdb	r1!, {r3-r5, r12}
 	orr	lr, lr, r12, lsr #16
@@ -421,6 +504,7 @@
 	orr	r5, r5, r4, lsr #16
 	mov	r4, r4, lsl #16
 	orr	r4, r4, r3, lsr #16
+#endif
 	stmdb	r0!, {r4, r5, r12, lr}
 	subs	r2, r2, #0x10         
 	bge	.Lmemcpy_bsrcul2loop16
@@ -429,9 +513,15 @@
 	blt	.Lmemcpy_bsrcul2l4
 
 .Lmemcpy_bsrcul2loop4:
+#ifdef __ARMEB__
+	mov	r12, r3, lsr #16
+	ldr	r3, [r1, #-4]!
+	orr	r12, r12, r3, lsl #16
+#else
 	mov	r12, r3, lsl #16
 	ldr	r3, [r1, #-4]!
 	orr	r12, r12, r3, lsr #16
+#endif
 	str	r12, [r0, #-4]!
 	subs	r2, r2, #4
 	bge	.Lmemcpy_bsrcul2loop4
@@ -447,6 +537,17 @@
 	stmdb	sp!, {r4, r5}
 
 .Lmemcpy_bsrcul1loop32:
+#ifdef __ARMEB__
+	mov	lr, r3, lsr #24
+	ldmdb	r1!, {r3-r5, r12}
+	orr	lr, lr, r12, lsl #8
+	mov	r12, r12, lsr #24
+	orr	r12, r12, r5, lsl #8
+	mov	r5, r5, lsr #24
+	orr	r5, r5, r4, lsl #8
+	mov	r4, r4, lsr #24
+	orr	r4, r4, r3, lsl #8
+#else
 	mov	lr, r3, lsl #24
 	ldmdb	r1!, {r3-r5, r12}
 	orr	lr, lr, r12, lsr #8
@@ -456,6 +557,7 @@
 	orr	r5, r5, r4, lsr #8
 	mov	r4, r4, lsl #24
 	orr	r4, r4, r3, lsr #8
+#endif
 	stmdb	r0!, {r4, r5, r12, lr}
 	subs	r2, r2, #0x10         
 	bge	.Lmemcpy_bsrcul1loop32
@@ -464,9 +566,15 @@
 	blt	.Lmemcpy_bsrcul1l4
 
 .Lmemcpy_bsrcul1loop4:
+#ifdef __ARMEB__
+	mov	r12, r3, lsr #24
+	ldr	r3, [r1, #-4]!
+	orr	r12, r12, r3, lsl #8
+#else
 	mov	r12, r3, lsl #24
 	ldr	r3, [r1, #-4]!
 	orr	r12, r12, r3, lsr #8
+#endif
 	str	r12, [r0, #-4]!
 	subs	r2, r2, #4
 	bge	.Lmemcpy_bsrcul1loop4

>Release-Note:
>Audit-Trail:
>Unformatted: