Subject: ARM710 bug in UMULL
To: None <port-arm32@netbsd.org>
From: None <kim@pvv.ntnu.no>
List: port-arm32
Date: 11/01/2001 12:02:19
The ARM7 is said to be able to perform 64 bits = 32 bits * 32 bits
multiplications, such as in the UMULL instruction, say:
umull r2,r4, r1, r3
However, this does not work on my RiscPC700 running NetBSD. The processor is
VLSI/ARM
9226 B557125
VY86C710A
ARM710a
ARM Ltd
Are there known bugs with the ARM710a?
I have tested thoroughly, with handassembling and dissassembling single
bits in the instruction, to eliminate assembler bugs and such. The strange
thing is I got it to work once, apparently. Perhaps it is sensitive to
different instances of the same instruction, the registers, or the arguments.
I am currently using this processor to develop bluetooth and crypto
applications, in which the UMULL instruction is very important for
efficiency. NetBSD is really good for such development work. Thats
why I prefer it.
Below is the program I use to test this. It should return 21, because
thats 7*3, but it usually returns 2, or about 4 000 000 000. It is
compiled correctly by the GNU assembler as. F.ex. gcc -o x x.s
Kim0
rfp .req r9
sl .req r10
fp .req r11
ip .req r12
sp .req r13
lr .req r14
pc .req r15
gcc2_compiled.:
___gnu_compiled_c:
.text
.align 0
.global _gurgle
.type _gurgle,#function
_gurgle:
@ args = 0, pretend = 0, frame = 8
@ frame_needed = 1, current_function_anonymous_args = 0
mov ip, sp
stmfd sp!, {r4, fp, ip, lr, pc}
sub fp, ip, #4
sub sp, sp, #8
str r0, [fp, #-20]
str r1, [fp, #-24]
ldr r3, [fp, #-20]
ldr r1, [fp, #-24]
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0
mov r0,r0 @ nop, 0xE1A00000 eller 0x a0e1 0000
@ 1110 ignore flags, -00 data processing, 0 register2, 1-101 mov,
@ 0 not alter kondition codes, -0000 r0, -0000 r0, -0000-0000-0000
mov r1, #7
mov r3, #3
umull r2,r4, r1, r3
@ 0x 9123 84e0 eller 0xe0842391
@ 1110:e ignore flags, 0000:0-1 mull, 0 unsigned, 0 no accumulate,
@ 0:8 unsigned, rdhi=0100:4, rdlo=0010:2, Rs=0011:3, 1001:9, Rm=0001:1
mov r3, r2
mov r4, #0
mov r1, r4
mov r0, r3
b L1
L1:
ldmea fp, {r4, fp, sp, pc}
Lfe1:
.size _gurgle,Lfe1-_gurgle
.align 0
LC0:
.ascii "%d\012\000"
.align 0
.global _main
.type _main,#function
_main:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 1, current_function_anonymous_args = 0
mov ip, sp
stmfd sp!, {r4, fp, ip, lr, pc}
sub fp, ip, #4
bl ___main
mov r0, #3
mov r1, #13
bl _gurgle
mov r4, r1
mov r3, r0
ldr r0, L4
mov r2, r4
mov r1, r3
bl _printf
b L3
L5:
.align 0
L4:
.word LC0
L3:
L2:
ldmea fp, {r4, fp, sp, pc}
Lfe2:
.size _main,Lfe2-_main