Subject: StrongARM performance tweaks cpufunc_asm.S
To: None <port-arm32@netbsd.org>
From: Richard Earnshaw <rearnsha@buzzard.freeserve.co.uk>
List: port-arm32
Date: 03/07/2001 22:09:42
This is a multipart MIME message.
--==_Exmh_-11899661060
Content-Type: text/plain; charset=us-ascii
2 tweaks and (I think) some corrections, this time for StrongARM chips only.
First tweak: The existing code for the strongarm maintains two blocks of
ram for use when flushing the cache; on each cache flush the blocks are
switched. This is unnecessary because we never use this ram for any other
purpose, so all we achieve by switching banks is to make second and
subsequent cache flushes less efficient (since all they do is evict lines
of data from the other flush block).
Second tweak: increase the size of regions for which we use the single line
flush commands -- profiling shows this to be a major win over flushing the
entire cache (since the likelihood is that such lines won't be in the cache
anyway).
Finally the correctness fix is to add some calls to drain the write buffers
-- these are particularly important when we are trying to synchronize I$
and D$.
R.
--==_Exmh_-11899661060
Content-Type: text/plain ; name="arm32cpu.diffs"; charset=us-ascii
Content-Description: arm32cpu.diffs
Content-Disposition: attachment; filename="arm32cpu.diffs"
Index: cpufunc_asm.S
===================================================================
RCS file: /cvsroot/syssrc/sys/arch/arm32/arm32/cpufunc_asm.S,v
retrieving revision 1.10
diff -p -p -r1.10 cpufunc_asm.S
*** cpufunc_asm.S 1999/10/26 06:53:41 1.10
--- cpufunc_asm.S 2001/03/07 21:58:06
*************** ENTRY(sa110_cache_cleanD)
*** 513,525 ****
--- 513,528 ----
#endif
ldr r2, Lsa110_cache_clean_addr
ldmia r2, {r0, r1}
+ #ifdef DOUBLE_CACHE_CLEAN_BANK
eor r0, r0, r1
str r0, [r2]
+ #endif
Lsa110_cache_cleanD_loop:
ldr r2, [r0], #32
subs r1, r1, #32
bne Lsa110_cache_cleanD_loop
+ mcr 15, 0, r0, c7, c10, 4 /* drain write buffer */
#ifdef CACHE_CLEAN_BLOCK_INTR
msr cpsr_all , r3
*************** ENTRY(sa110_cache_purgeID)
*** 542,555 ****
--- 545,561 ----
#endif
ldr r2, Lsa110_cache_clean_addr
ldmia r2, {r0, r1}
+ #ifdef DOUBLE_CACHE_CLEAN_BANK
eor r0, r0, r1
str r0, [r2]
+ #endif
Lsa110_cache_purgeID_loop:
ldr r2, [r0], #32
subs r1, r1, #32
bne Lsa110_cache_purgeID_loop
+ mcr 15, 0, r0, c7, c10, 4 /* drain write buffer */
mcr 15, 0, r0, c7, c5, 0 /* flush I cache (D flushed above) */
#ifdef CACHE_CLEAN_BLOCK_INTR
msr cpsr_all , r3
*************** ENTRY(sa110_cache_purgeD)
*** 571,584 ****
--- 577,593 ----
#endif
ldr r2, Lsa110_cache_clean_addr
ldmia r2, {r0, r1}
+ #ifdef DOUBLE_CACHE_CLEAN_BANK
eor r0, r0, r1
str r0, [r2]
+ #endif
Lsa110_cache_purgeD_loop:
ldr r2, [r0], #32
subs r1, r1, #32
bne Lsa110_cache_purgeD_loop
+ mcr 15, 0, r0, c7, c10, 4 /* drain write buffer */
#ifdef CACHE_CLEAN_BLOCK_INTR
msr cpsr_all , r3
#else
*************** Lsa110_cache_purgeD_loop:
*** 588,599 ****
--- 597,610 ----
ENTRY(sa110_cache_purgeID_E)
mcr 15, 0, r0, c7, c10, 1 /* clean dcache entry */
+ mcr 15, 0, r0, c7, c10, 4 /* drain write buffer */
mcr 15, 0, r0, c7, c5, 0 /* flush I cache */
mcr 15, 0, r0, c7, c6, 1 /* flush D cache single entry */
mov pc, lr
ENTRY(sa110_cache_purgeD_E)
mcr 15, 0, r0, c7, c10, 1 /* clean dcache entry */
+ mcr 15, 0, r0, c7, c10, 4 /* drain write buffer */
mcr 15, 0, r0, c7, c6, 1 /* flush D cache single entry */
mov pc, lr
#endif /* CPU_SA110 */
*************** ENTRY(sa110_cache_syncI)
*** 626,639 ****
--- 637,653 ----
#endif
ldr r2, Lsa110_cache_clean_addr
ldmia r2, {r0, r1}
+ #ifdef DOUBLE_CACHE_CLEAN_BANK
eor r0, r0, r1
str r0, [r2]
+ #endif
Lsa110_cache_syncI_loop:
ldr r2, [r0], #32
subs r1, r1, #32
bne Lsa110_cache_syncI_loop
+ mcr 15, 0, r0, c7, c10, 4 /* drain write buffer */
mcr 15, 0, r0, c7, c5, 0 /* flush I cache */
#ifdef CACHE_CLEAN_BLOCK_INTR
msr cpsr_all , r3
*************** sa110_cache_cleanD_rng_loop:
*** 656,666 ****
add r0, r0, #32
subs r1, r1, #32
bpl sa110_cache_cleanD_rng_loop
mov pc, lr
ENTRY(sa110_cache_purgeID_rng)
! cmp r1, #0x2000
bcs _C_LABEL(sa110_cache_purgeID)
and r2, r0, #0x1f
--- 670,681 ----
add r0, r0, #32
subs r1, r1, #32
bpl sa110_cache_cleanD_rng_loop
+ mcr 15, 0, r0, c7, c10, 4 /* drain write buffer */
mov pc, lr
ENTRY(sa110_cache_purgeID_rng)
! cmp r1, #0x4000
bcs _C_LABEL(sa110_cache_purgeID)
and r2, r0, #0x1f
*************** sa110_cache_purgeID_rng_loop:
*** 674,685 ****
subs r1, r1, #32
bpl sa110_cache_purgeID_rng_loop
mcr 15, 0, r0, c7, c5, 0 /* flush I cache */
mov pc, lr
ENTRY(sa110_cache_purgeD_rng)
! cmp r1, #0x2000
bcs _C_LABEL(sa110_cache_purgeD)
and r2, r0, #0x1f
--- 689,701 ----
subs r1, r1, #32
bpl sa110_cache_purgeID_rng_loop
+ mcr 15, 0, r0, c7, c10, 4 /* drain write buffer */
mcr 15, 0, r0, c7, c5, 0 /* flush I cache */
mov pc, lr
ENTRY(sa110_cache_purgeD_rng)
! cmp r1, #0x4000
bcs _C_LABEL(sa110_cache_purgeD)
and r2, r0, #0x1f
*************** sa110_cache_purgeD_rng_loop:
*** 693,698 ****
--- 709,715 ----
subs r1, r1, #32
bpl sa110_cache_purgeD_rng_loop
+ mcr 15, 0, r0, c7, c10, 4 /* drain write buffer */
mov pc, lr
ENTRY(sa110_cache_syncI_rng)
*************** sa110_cache_syncI_rng_loop:
*** 709,714 ****
--- 726,732 ----
subs r1, r1, #32
bpl sa110_cache_syncI_rng_loop
+ mcr 15, 0, r0, c7, c10, 4 /* drain write buffer */
mcr 15, 0, r0, c7, c5, 0 /* flush I cache */
mov pc, lr
--==_Exmh_-11899661060--