Source-Changes archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: CVS commit: src/sys/arch/arm/ep93xx
> On Sun, May 25, 2008 at 03:57:22AM +0000, Katsuomi Hamajima wrote:
>> Modified Files:
>> src/sys/arch/arm/ep93xx: epclk.c
>>
>> Log Message:
>> speed up an initial value calculation of "remaining" variable in delay().
>
> Are you sure that GCC doesn't do exactly that? For unsigned arithmetic,
> GCC will normally use unsigned mul + shift and not a division. It would
> be strongly prefered to not have inline assembly here.
I do not understand your opinion, sorry. I attach disassembled delay().
Please tell me details.
void
delay(unsigned int n)
{
unsigned int cur_tick, initial_tick;
int remaining;
#ifdef DEBUG
if (epclk_sc == NULL) {
printf("delay: called before start epclk\n");
return;
}
#endif
/*
* Read the counter first, so that the rest of the setup overhead is
* counted.
*/
initial_tick = TIMER4VAL();
if (n <= UINT_MAX / TIMER_FREQ) {
/*
* For unsigned arithmetic, division can be replaced with
* multiplication with the inverse and a shift.
*/
remaining = n * TIMER_FREQ / 1000000;
} else {
/* This is a very long delay.
* Being slow here doesn't matter.
*/
remaining = (unsigned long long) n * TIMER_FREQ / 1000000;
}
while (remaining > 0) {
cur_tick = TIMER4VAL();
if (cur_tick > initial_tick)
remaining -= UINT_MAX - (cur_tick - initial_tick);
else
remaining -= initial_tick - cur_tick;
initial_tick = cur_tick;
}
}
0000000c <delay>:
c: e1a0c00d mov ip, sp
10: e92dd8f0 stmdb sp!, {r4, r5, r6, r7, fp, ip, lr, pc}
14: e3a02c11 mov r2, #4352 ; 0x1100
18: e2822011 add r2, r2, #17 ; 0x11
1c: e3a0320f mov r3, #-268435456 ; 0xf0000000
20: e2833811 add r3, r3, #1114112 ; 0x110000
24: e1500002 cmp r0, r2
28: e24cb004 sub fp, ip, #4 ; 0x4
2c: e5937060 ldr r7, [r3, #96]
30: e1a0c000 mov ip, r0
34: 8a000020 bhi 88 <delay+0x7c>
38: e1a00a00 mov r0, r0, lsl #20
3c: e3a0193d mov r1, #999424 ; 0xf4000
40: e040080c sub r0, r0, ip, lsl #16
44: e2811d09 add r1, r1, #576 ; 0x240
48: ebfffffe bl 0 <__udivsi3>
4c: e3500000 cmp r0, #0 ; 0x0
50: d89da8f0 ldmleia sp, {r4, r5, r6, r7, fp, sp, pc}
54: e3a0320f mov r3, #-268435456 ; 0xf0000000
58: e2833811 add r3, r3, #1114112 ; 0x110000
5c: e5933060 ldr r3, [r3, #96]
60: e0672003 rsb r2, r7, r3
64: e1530007 cmp r3, r7
68: e0631007 rsb r1, r3, r7
6c: e1e02002 mvn r2, r2
70: 80620000 rsbhi r0, r2, r0
74: 90610000 rsbls r0, r1, r0
78: e3500000 cmp r0, #0 ; 0x0
7c: e1a07003 mov r7, r3
80: ca000013 bgt 54 <delay+0x48>
84: e89da8f0 ldmia sp, {r4, r5, r6, r7, fp, sp, pc}
88: e1a03000 mov r3, r0
8c: e3a04000 mov r4, #0 ; 0x0
90: e1a06804 mov r6, r4, lsl #16
94: e1866823 orr r6, r6, r3, lsr #16
98: e1a05803 mov r5, r3, lsl #16
9c: e1a04206 mov r4, r6, lsl #4
a0: e1a03205 mov r3, r5, lsl #4
a4: e1844e25 orr r4, r4, r5, lsr #28
a8: e1a00003 mov r0, r3
ac: e1a01004 mov r1, r4
b0: e3a0293d mov r2, #999424 ; 0xf4000
b4: e0500005 subs r0, r0, r5
b8: e0c11006 sbc r1, r1, r6
bc: e2822d09 add r2, r2, #576 ; 0x240
c0: e3a03000 mov r3, #0 ; 0x0
c4: ebfffffe bl 0 <__udivdi3>
c8: e3500000 cmp r0, #0 ; 0x0
cc: ca000013 bgt 54 <delay+0x48>
d0: e89da8f0 ldmia sp, {r4, r5, r6, r7, fp, sp, pc}
void
delay(unsigned int n)
{
unsigned int cur_tick, initial_tick;
int remaining;
u_int32_t scalar = 4222124650UL;
#ifdef DEBUG
if (epclk_sc == NULL) {
printf("delay: called before start epclk\n");
return;
}
#endif
/*
* Read the counter first, so that the rest of the setup overhead is
* counted.
*/
initial_tick = TIMER4VAL();
/* This is a quick ARM way to multiply by 983040/1000000 */
__asm volatile ("umull %0, %1, %2, %3;"
: "=r"(n), "=&r"(remaining)
: "r"((scalar)), "0"(n));
while (remaining > 0) {
cur_tick = TIMER4VAL();
if (cur_tick >= initial_tick)
remaining -= cur_tick - initial_tick;
else
remaining -= UINT_MAX - initial_tick + cur_tick + 1;
initial_tick = cur_tick;
}
}
0000000c <delay>:
c: e52de004 str lr, [sp, #-4]!
10: e3a0220f mov r2, #-268435456 ; 0xf0000000
14: e2822811 add r2, r2, #1114112 ; 0x110000
18: e5921060 ldr r1, [r2, #96]
1c: e59f303c ldr r3, [pc, #60] ; 60 <.text+0x60>
20: e08c0093 umull r0, ip, r3, r0
24: e35c0000 cmp ip, #0 ; 0x0
28: d49df004 ldrle pc, [sp], #4
2c: e1a0e002 mov lr, r2
30: e59e2060 ldr r2, [lr, #96]
34: e1e03001 mvn r3, r1
38: e0823003 add r3, r2, r3
3c: e1520001 cmp r2, r1
40: e0610002 rsb r0, r1, r2
44: e063300c rsb r3, r3, ip
48: 3243c001 subcc ip, r3, #1 ; 0x1
4c: 2060c00c rsbcs ip, r0, ip
50: e35c0000 cmp ip, #0 ; 0x0
54: e1a01002 mov r1, r2
58: ca00000a bgt 30 <delay+0x24>
5c: e49df004 ldr pc, [sp], #4
60: fba8826a blx fea20a12 <_KERNEL_OPT_ARM_INTR_IMPL+0x793b03f1>
Home |
Main Index |
Thread Index |
Old Index