Subject: Re: port-powerpc/25941: build problem with 1.6.2-STABLE
To: Nick Hudson <skrll@netbsd.org>
From: Miles Nordin <carton@Ivy.NET>
List: netbsd-bugs
Date: 06/17/2004 11:50:28
>>>>> "nh" == Nick Hudson <skrll@netbsd.org> writes:

    nh> PPC gas got changed shortly after 1.6 was branched to
    nh> understand %r prefixes.  Please test the attached patch.

If 1.6.2-STABLE builds for powerpc on releng, then the bug is gone.

The gas that 1.6.2-STABLE built _did_ understand registers written as
%r1---that seemed to work ok.  There were two problems:

ld.elf_so's rtld_start.S wrote registers as '%r1'.
  asm.h has '#define r1 1' which changes %r1 -> %1
  gas does not accept register names of %1
  so I added #ifdef _KERNEL to asm.h as in 2.0_BETA, to disable 
   those #defines

libc's bcopy.S, strlen.S, and ffs.S wrote registers as 'r1'
  while the old asm.h would translate this to '1', it no longer does.
  gas does not accept register names of 'r1'
  so i pulled up all those files to 2.0_BETA

so AFAICT, 1.6.2-STABLE gas
  will accept:   '%r1' or '1'
  won't accept:  'r1' or '%1'

These are the exact changes I used.  sorry for the long bug report.
I'm still a little clumsy with this.

--- sys/arch/powerpc/include/asm.h.orig	Thu Jun 17 11:44:40 2004
+++ sys/arch/powerpc/include/asm.h	Wed Jun 16 12:47:06 2004
@@ -92,6 +92,8 @@
 
 /* Condition Register Bit Fields */
 
+#if !defined(_NOREGNAMES)
+#if defined(_KERNEL) || defined(_STANDALONE)
 #define cr0     0
 #define cr1     1
 #define cr2     2
@@ -100,9 +102,11 @@
 #define cr5     5
 #define cr6     6
 #define cr7     7
+#endif
 
 /* General Purpose Registers (GPRs) */
 
+#if defined(_KERNEL) || defined(_STANDALONE)
 #define r0      0
 #define r1      1
 #define r2      2
@@ -135,9 +139,11 @@
 #define r29     29
 #define r30     30
 #define r31     31
+#endif
 
 /* Floating Point Registers (FPRs) */
 
+#if defined(_KERNEL) || defined(_STANDALONE)
 #define fr0     0
 #define fr1     1
 #define fr2     2
@@ -170,5 +176,7 @@
 #define fr29    29
 #define fr30    30
 #define fr31    31
+#endif
+#endif /* !_NOREGNAMES */
 
 #endif /* !_PPC_ASM_H_ */
--- lib/libc/arch/powerpc/string/bzero.S.orig	Wed Jun 16 12:48:02 2004
+++ lib/libc/arch/powerpc/string/bzero.S	Wed Jun 16 12:57:54 2004
@@ -1,7 +1,7 @@
-/*	$NetBSD: bzero.S,v 1.4 2002/03/12 22:08:22 eeh Exp $ */
+/*	$NetBSD: bzero.S,v 1.7 2003/07/26 19:24:37 salo Exp $ */
 
 /*-
- * Copyright (C) 2001	Martin J. Laubach <mjl@netbsd.org>
+ * Copyright (C) 2001	Martin J. Laubach <mjl@NetBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -37,14 +37,14 @@
 
 /*----------------------------------------------------------------------*/
 /*
-     void bzero(void *b r3, size_t len r4);
-     void * memset(void *b r3, int c r4, size_t len r5);
+     void bzero(void *b %r3, size_t len %r4);
+     void * memset(void *b %r3, int c %r4, size_t len %r5);
 */
 /*----------------------------------------------------------------------*/
 
-#define r_dst	r3
-#define r_len	r4
-#define r_val	r0
+#define r_dst	%r3
+#define r_len	%r4
+#define r_val	%r0
 
 		.text
 		.align 4
@@ -53,14 +53,14 @@
 		b	cb_memset
 
 ENTRY(memset)
-		cmplwi	cr1, r5, 0
-		mr.	r0, r4
-		mr	r8, r3
+		cmplwi	cr1, %r5, 0
+		mr.	%r0, %r4
+		mr	%r8, %r3
 		beqlr-	cr1			/* Nothing to do */
 
-		rlwimi	r0, r4, 8, 16, 23	/* word extend fill value */
-		rlwimi	r0, r0, 16, 0, 15
-		mr	r4, r5
+		rlwimi	%r0, %r4, 8, 16, 23	/* word extend fill value */
+		rlwimi	%r0, %r0, 16, 0, 15
+		mr	%r4, %r5
 		bne-	simple_fill		/* =! 0, use trivial fill */
 cb_memset:
 
@@ -68,17 +68,17 @@
 #ifndef _KERNEL
 		/* First find out cache line size */
 #ifdef PIC
-		mflr	r9
+		mflr	%r9
 		bl	_GLOBAL_OFFSET_TABLE_@local-4
-		mflr	r10
-		mtlr	r9
-		lwz	r5,cache_info@got(r10)
+		mflr	%r10
+		mtlr	%r9
+		lwz	%r5,cache_info@got(%r10)
 #else
-		lis	r5,cache_info@h
-		ori	r5,r5,cache_info@l
+		lis	%r5,cache_info@h
+		ori	%r5,%r5,cache_info@l
 #endif
-		lwz	r6, 4(r5)
-		cmpwi	r6, -1
+		lwz	%r6, 4(%r5)
+		cmpwi	%r6, -1
 		bne+	cb_cacheline_known
 
 /*----------------------------------------------------------------------*/
@@ -94,142 +94,142 @@
 #define R0_SAVE		28
 #define R8_SAVE		32
 
-		mflr	r6
-		stw	r6, 4(r1)
-		stwu	r1, -STKFRAME_SZ(r1)
-
-		stw	r8, R8_SAVE(r1)
-		stw	r3, R3_SAVE(r1)
-		stw	r4, R4_SAVE(r1)
-		stw	r0, R0_SAVE(r1)
+		mflr	%r6
+		stw	%r6, 4(%r1)
+		stwu	%r1, -STKFRAME_SZ(%r1)
+
+		stw	%r8, R8_SAVE(%r1)
+		stw	%r3, R3_SAVE(%r1)
+		stw	%r4, R4_SAVE(%r1)
+		stw	%r0, R0_SAVE(%r1)
 
 	
 
-		li	r0, CTL_MACHDEP		/* Construct MIB */
-		stw	r0, MIB(r1)
-		li	r0, CPU_CACHEINFO
-		stw	r0, MIB+4(r1)
-
-		li	r0, 4*4			/* Oldlenp := 4*4 */
-		stw	r0, OLDPLEN(r1)
-
-		addi	r3, r1, MIB
-		li	r4, 2			/* namelen */
-		/* r5 already contains &cache_info */
-		addi	r6, r1, OLDPLEN
-		li	r7, 0
-		li	r8, 0
+		li	%r0, CTL_MACHDEP		/* Construct MIB */
+		stw	%r0, MIB(%r1)
+		li	%r0, CPU_CACHEINFO
+		stw	%r0, MIB+4(%r1)
+
+		li	%r0, 4*4			/* Oldlenp := 4*4 */
+		stw	%r0, OLDPLEN(%r1)
+
+		addi	%r3, %r1, MIB
+		li	%r4, 2			/* namelen */
+		/* %r5 already contains &cache_info */
+		addi	%r6, %r1, OLDPLEN
+		li	%r7, 0
+		li	%r8, 0
 		bl	PIC_PLT(_C_LABEL(sysctl))
 	
-		cmpwi	r3, 0			/* Check result */
+		cmpwi	%r3, 0			/* Check result */
 		beq	1f
 
 		/* Failure, try older sysctl */
 	
-		li	r0, CTL_MACHDEP		/* Construct MIB */
-		stw	r0, MIB(r1)
-		li	r0, CPU_CACHELINE
-		stw	r0, MIB+4(r1)
+		li	%r0, CTL_MACHDEP		/* Construct MIB */
+		stw	%r0, MIB(%r1)
+		li	%r0, CPU_CACHELINE
+		stw	%r0, MIB+4(%r1)
 
-		li	r0, 4			/* Oldlenp := 4 */
-		stw	r0, OLDPLEN(r1)
+		li	%r0, 4			/* Oldlenp := 4 */
+		stw	%r0, OLDPLEN(%r1)
 
-		addi	r3, r1, MIB
-		li	r4, 2			/* namelen */
+		addi	%r3, %r1, MIB
+		li	%r4, 2			/* namelen */
 #ifdef PIC
-		mflr	r9
+		mflr	%r9
 		bl	_GLOBAL_OFFSET_TABLE_@local-4
-		mflr	r10
-		mtlr	r9
-		lwz	r5,cache_info@got(r10)
-		addi	r5, r5, 4
-#else
-		lis	r5,cache_info+4@h
-		ori	r5,r5,cache_info+4@l
-#endif
-		addi	r6, r1, OLDPLEN
-		li	r7, 0
-		li	r8, 0
+		mflr	%r10
+		mtlr	%r9
+		lwz	%r5,cache_info@got(%r10)
+		addi	%r5, %r5, 4
+#else
+		lis	%r5,cache_info+4@h
+		ori	%r5,%r5,cache_info+4@l
+#endif
+		addi	%r6, %r1, OLDPLEN
+		li	%r7, 0
+		li	%r8, 0
 		bl	PIC_PLT(_C_LABEL(sysctl))
 1:
-		lwz	r8, R8_SAVE(r1)
-		lwz	r3, R3_SAVE(r1)
-		lwz	r4, R4_SAVE(r1)
-		lwz	r0, R0_SAVE(r1)
+		lwz	%r8, R8_SAVE(%r1)
+		lwz	%r3, R3_SAVE(%r1)
+		lwz	%r4, R4_SAVE(%r1)
+		lwz	%r0, R0_SAVE(%r1)
 
 #ifdef PIC
 		bl	_GLOBAL_OFFSET_TABLE_@local-4
-		mflr	r10
-		lwz	r9, cache_info@got(r10)
-		lwz	r9, 4(r9)
-#else
-		lis	r5, cache_info+4@ha
-		lwz	r9, cache_info+4@l(r5)
-#endif
-		la	r1, STKFRAME_SZ(r1)
-		lwz	r5, 4(r1)
-		mtlr	r5
-
-		cntlzw	r6, r9			/* compute shift value */
-		li	r5, 31
-		subf	r5, r6, r5
+		mflr	%r10
+		lwz	%r9, cache_info@got(%r10)
+		lwz	%r9, 4(%r9)
+#else
+		lis	%r5, cache_info+4@ha
+		lwz	%r9, cache_info+4@l(%r5)
+#endif
+		la	%r1, STKFRAME_SZ(%r1)
+		lwz	%r5, 4(%r1)
+		mtlr	%r5
+
+		cntlzw	%r6, %r9			/* compute shift value */
+		li	%r5, 31
+		subf	%r5, %r6, %r5
 
 #ifdef PIC
-		lwz	r6, cache_sh@got(r10)
-		stw	r5, 0(r6)
+		lwz	%r6, cache_sh@got(%r10)
+		stw	%r5, 0(%r6)
 #else
-		lis	r6, cache_sh@ha
-		stw	r5, cache_sh@l(r6)
+		lis	%r6, cache_sh@ha
+		stw	%r5, cache_sh@l(%r6)
 #endif
 /*----------------------------------------------------------------------*/
-/* Okay, we know the cache line size (r9) and shift value (r10) */
+/* Okay, we know the cache line size (%r9) and shift value (%r10) */
 cb_cacheline_known:
 #ifdef PIC
-		lwz	r5, cache_info@got(r10)
-		lwz	r9, 4(r5)
-		lwz	r5, cache_sh@got(r10)
-		lwz	r10, 0(r5)
-#else
-		lis	r9, cache_info+4@ha
-		lwz	r9, cache_info+4@l(r9)
-		lis	r10, cache_sh@ha
-		lwz	r10, cache_sh@l(r10)
+		lwz	%r5, cache_info@got(%r10)
+		lwz	%r9, 4(%r5)
+		lwz	%r5, cache_sh@got(%r10)
+		lwz	%r10, 0(%r5)
+#else
+		lis	%r9, cache_info+4@ha
+		lwz	%r9, cache_info+4@l(%r9)
+		lis	%r10, cache_sh@ha
+		lwz	%r10, cache_sh@l(%r10)
 #endif
 
 #else /* _KERNEL */
 #ifdef	MULTIPROCESSOR
-		mfspr	r10, 0			/* Get cpu_info pointer */
+		mfsprg	%r10, 0			/* Get cpu_info pointer */
 #else
-		lis	r10, cpu_info_store@ha
-		addi	r10, r10, cpu_info_store@l
+		lis	%r10, cpu_info_store@ha
+		addi	%r10, %r10, cpu_info_store@l
 #endif
-		lwz	r9, CPU_CI+4(r10)	/* Load D$ line size */
-		cntlzw	r10, r9			/* Calculate shift.. */
-		li	r6, 31
-		subf	r10, r10, r6
+		lwz	%r9, CPU_CI+4(%r10)	/* Load D$ line size */
+		cntlzw	%r10, %r9			/* Calculate shift.. */
+		li	%r6, 31
+		subf	%r10, %r10, %r6
 #endif /* _KERNEL */
 		/* Back in memory filling business */
 		
 		cmplwi	cr1, r_len, 0		/* Nothing to do? */
-		add	r5, r9, r9
-		cmplw	r_len, r5		/* <= 2*CL bytes to move? */
+		add	%r5, %r9, %r9
+		cmplw	r_len, %r5		/* <= 2*CL bytes to move? */
 		beqlr-	cr1			/* then do nothing */
 
 		blt+	simple_fill		/* a trivial fill routine */
 
 		/* Word align the block, fill bytewise until dst even*/
 		
-		andi.	r5, r_dst, 0x03	
-		li	r6, 4
+		andi.	%r5, r_dst, 0x03	
+		li	%r6, 4
 		beq+	cb_aligned_w		/* already aligned to word? */
 
-		subf	r5, r5, r6		/* bytes to fill to align4 */
+		subf	%r5, %r5, %r6		/* bytes to fill to align4 */
 #if USE_STSWX
-		mtxer	r5
-		stswx	r0, 0, r_dst
-		add	r_dst, r5, r_dst
+		mtxer	%r5
+		stswx	%r0, 0, r_dst
+		add	r_dst, %r5, r_dst
 #else
-		mtctr	r5
+		mtctr	%r5
 
 		subi	r_dst, r_dst, 1
 1:		stbu	r_val, 1(r_dst)		/* Fill bytewise */
@@ -237,21 +237,23 @@
 
 		addi	r_dst, r_dst, 1
 #endif
-		subf	r_len, r5, r_len
+		subf	r_len, %r5, r_len
 
 cb_aligned_w:	/* Cache block align, fill wordwise until dst aligned */
 
 		/* I know I have something to do since we had > 2*CL initially */
 		/* so no need to check for r_len = 0 */
 
-		rlwinm.	r5, r_dst, 30, 29, 31
-		srwi	r6, r9, 2
+		subi	%r6, %r9, 1		/* CL mask */
+		and.	%r5, r_dst, %r6
+		srwi	%r5, %r5, 2
+		srwi	%r6, %r9, 2
 		beq	cb_aligned_cb		/* already on CL boundary? */
 
-		subf	r5, r5, r6		/* words to fill to alignment */
-		mtctr	r5
-		slwi	r5, r5, 2
-		subf	r_len, r5, r_len
+		subf	%r5, %r5, %r6		/* words to fill to alignment */
+		mtctr	%r5
+		slwi	%r5, %r5, 2
+		subf	r_len, %r5, r_len
 
 		subi	r_dst, r_dst, 4
 1:		stwu	r_val, 4(r_dst)		/* Fill wordwise */
@@ -260,15 +262,15 @@
 
 cb_aligned_cb:	/* no need to check r_len, see above */
 		
-		srw.	r5, r_len, r10		/* Number of cache blocks */
-		mtctr	r5
+		srw.	%r5, r_len, %r10		/* Number of cache blocks */
+		mtctr	%r5
 		beq	cblocks_done
 
-		slw	r5, r5, r10
-		subf	r_len, r5, r_len
+		slw	%r5, %r5, %r10
+		subf	r_len, %r5, r_len
 
 1:		dcbz	0, r_dst		/* Clear blockwise */
-		add	r_dst, r_dst, r9
+		add	r_dst, r_dst, %r9
 		bdnz	1b
 
 cblocks_done:	/* still CL aligned, but less than CL bytes left */
@@ -291,47 +293,47 @@
 #else
 		cmplwi	cr1, r_len, 8		/* < 8 bytes to move? */
 #endif
-		andi.	r5, r_dst, 0x03		/* bytes to fill to align4 */
+		andi.	%r5, r_dst, 0x03		/* bytes to fill to align4 */
 		blt	cr1, sf_bytewise	/* trivial byte mover */
 
-		li	r6, 4
-		subf	r5, r5, r6
+		li	%r6, 4
+		subf	%r5, %r5, %r6
 		beq+	sf_aligned_w		/* dest is word aligned */
 
 #if USE_STSWX
-		mtxer	r5
-		stswx	r0, 0, r_dst
-		add	r_dst, r5, r_dst
+		mtxer	%r5
+		stswx	%r0, 0, r_dst
+		add	r_dst, %r5, r_dst
 #else
-		mtctr	r5			/* nope, then fill bytewise */
+		mtctr	%r5			/* nope, then fill bytewise */
 		subi	r_dst, r_dst, 1		/* until it is */
 1:		stbu	r_val, 1(r_dst)		
 		bdnz	1b
 
 		addi	r_dst, r_dst, 1
 #endif
-		subf	r_len, r5, r_len
+		subf	r_len, %r5, r_len
 
 sf_aligned_w:	/* no need to check r_len since it were >= 8 bytes initially */
 #if USE_STSWX
-		mr	r6, r0
-		mr	r7, r0
+		mr	%r6, %r0
+		mr	%r7, %r0
 
-		srwi	r5, r_len, 3
-		mtctr	r5
+		srwi	%r5, r_len, 3
+		mtctr	%r5
 		
-		slwi	r5, r5, 3		/* adjust len */
-		subf.	r_len, r5, r_len
+		slwi	%r5, %r5, 3		/* adjust len */
+		subf.	r_len, %r5, r_len
 		
-1:		stswi	r6, r_dst, 8
+1:		stswi	%r6, r_dst, 8
 		addi	r_dst, r_dst, 8
 		bdnz	1b
 #else
-		srwi	r5, r_len, 2		/* words to fill */
-		mtctr	r5
+		srwi	%r5, r_len, 2		/* words to fill */
+		mtctr	%r5
 
-		slwi	r5, r5, 2
-		subf.	r_len, r5, r_len	/* adjust len for fill */
+		slwi	%r5, %r5, 2
+		subf.	r_len, %r5, r_len	/* adjust len for fill */
 
 		subi	r_dst, r_dst, 4
 1:		stwu	r_val, 4(r_dst)
@@ -341,17 +343,17 @@
 
 sf_word_done:	bne-	sf_bytewise
 
-sf_return:	mr	r3, r8			/* restore orig ptr */
+sf_return:	mr	%r3, %r8			/* restore orig ptr */
 		blr				/* for memset functionality */
 
 sf_bytewise:
 #if USE_STSWX
-		mr	r5, r0
-		mr	r6, r0
-		mr	r7, r0
+		mr	%r5, %r0
+		mr	%r6, %r0
+		mr	%r7, %r0
 		
 		mtxer	r_len
-		stswx	r5, 0, r_dst
+		stswx	%r5, 0, r_dst
 #else
 		mtctr	r_len
 
@@ -359,7 +361,7 @@
 1:		stbu	r_val, 1(r_dst)
 		bdnz	1b
 #endif
-		mr	r3, r8			/* restore orig ptr */
+		mr	%r3, %r8			/* restore orig ptr */
 		blr				/* for memset functionality */
 
 /*----------------------------------------------------------------------*/
--- lib/libc/arch/powerpc/string/ffs.S.orig	Wed Jun 16 20:03:32 2004
+++ lib/libc/arch/powerpc/string/ffs.S	Wed Jun 16 20:04:00 2004
@@ -31,11 +31,11 @@
 
 .align 4
 ENTRY(ffs)
-	neg	r4, r3
-	and	r3, r4, r3
-	cntlzw	r3, r3
-	li	r0, 32
-	subf	r3, r3, r0
+	neg	%r4, %r3
+	and	%r3, %r4, %r3
+	cntlzw	%r3, %r3
+	li	%r0, 32
+	subf	%r3, %r3, %r0
 	blr
 
 /*----------------------------------------------------------------------*/
--- lib/libc/arch/powerpc/string/strlen.S.orig	Wed Jun 16 13:01:19 2004
+++ lib/libc/arch/powerpc/string/strlen.S	Wed Jun 16 13:01:33 2004
@@ -1,7 +1,7 @@
-/*	$NetBSD: strlen.S,v 1.1 2001/11/30 02:26:35 mjl Exp $ */
+/*	$NetBSD: strlen.S,v 1.3 2003/07/26 19:24:37 salo Exp $ */
 
 /*-
- * Copyright (C) 2001	Martin J. Laubach <mjl@netbsd.org>
+ * Copyright (C) 2001	Martin J. Laubach <mjl@NetBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -64,49 +64,49 @@
 ENTRY(strlen)
 
 		/* Setup constants */
-		lis	r10, 0x7f7f
-		lis	r9, 0xfefe
-		ori	r10, r10, 0x7f7f
-		ori	r9, r9, 0xfeff
+		lis	%r10, 0x7f7f
+		lis	%r9, 0xfefe
+		ori	%r10, %r10, 0x7f7f
+		ori	%r9, %r9, 0xfeff
 
 		/* Mask out leading bytes on non aligned strings */
-		rlwinm.	r8, r3, 3, 27, 28	/* leading bits to mask */
-		clrrwi	r5, r3, 2		/*  clear low 2 addr bits */
-		li	r0, -1
+		rlwinm.	%r8, %r3, 3, 27, 28	/* leading bits to mask */
+		clrrwi	%r5, %r3, 2		/*  clear low 2 addr bits */
+		li	%r0, -1
 		beq+	3f			/* skip alignment if already */
 						/* aligned */
 
-		srw	r0, r0, r8		/* make 0000...1111 mask */
+		srw	%r0, %r0, %r8		/* make 0000...1111 mask */
 
-		lwz	r7, 0(r5)
-		nor	r0, r0, r0		/* invert mask */
-		or	r7, r7, r0		/* make leading bytes != 0 */
+		lwz	%r7, 0(%r5)
+		nor	%r0, %r0, %r0		/* invert mask */
+		or	%r7, %r7, %r0		/* make leading bytes != 0 */
 		b	2f
 
-3:		subi	r5, r5, 4
+3:		subi	%r5, %r5, 4
 
-1:		lwzu	r7, 4(r5)		/* fetch data word */
+1:		lwzu	%r7, 4(%r5)		/* fetch data word */
 
-2:		nor	r0, r7, r10		/* do step 1 */
-		add	r6, r7, r9
-		and.	r0, r0, r6
+2:		nor	%r0, %r7, %r10		/* do step 1 */
+		add	%r6, %r7, %r9
+		and.	%r0, %r0, %r6
 
 		beq+	1b			/* no NUL bytes here */
 	
-		and	r8, r7, r10		/* ok, a NUL is somewhere */
-		or	r7, r7, r10		/* do step 2 to find out */
-		add	r0, r8, r10		/* where */
-		nor	r8, r7, r0
+		and	%r8, %r7, %r10		/* ok, a NUL is somewhere */
+		or	%r7, %r7, %r10		/* do step 2 to find out */
+		add	%r0, %r8, %r10		/* where */
+		nor	%r8, %r7, %r0
 
-		cntlzw	r0, r8			/* offset from this word */
-		srwi	r4, r0, 3
+		cntlzw	%r0, %r8		/* offset from this word */
+		srwi	%r4, %r0, 3
 
-		add	r4, r5, r4		/* r4 contains end pointer */
+		add	%r4, %r5, %r4		/* r4 contains end pointer */
 		/* NOTE: Keep it so this function returns the end pointer
 		   in r4, so we can it use from other str* calls (strcat
 		   comes to mind */
 
-		subf	r3, r3, r4
+		subf	%r3, %r3, %r4
 		blr
 
 /*----------------------------------------------------------------------*/