Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/arm/arm Address PR/48710.
details: https://anonhg.NetBSD.org/src/rev/1a7bedf997d3
branches: trunk
changeset: 328622:1a7bedf997d3
user: matt <matt%NetBSD.org@localhost>
date: Thu Apr 10 02:55:13 2014 +0000
description:
Address PR/48710.
r3 is not trashed during the routine so the level is preserved.
The only two real bugs was not initializing r3 to 0 to start with L1 cache
and the invalid fetching the set count from r3. The mov r1, #0 should have
been mov r3, #0 and has been corrected.
Instead of two shifts, just use ubfx to extract the set bits and then compare
them to 0.
Add some other minor optimizations that make the code a little clearer.
diffstat:
sys/arch/arm/arm/cpufunc_asm_armv7.S | 64 ++++++++++++++++-------------------
1 files changed, 29 insertions(+), 35 deletions(-)
diffs (174 lines):
diff -r 103b1e1298a4 -r 1a7bedf997d3 sys/arch/arm/arm/cpufunc_asm_armv7.S
--- a/sys/arch/arm/arm/cpufunc_asm_armv7.S Thu Apr 10 02:49:42 2014 +0000
+++ b/sys/arch/arm/arm/cpufunc_asm_armv7.S Thu Apr 10 02:55:13 2014 +0000
@@ -245,8 +245,8 @@
add r1, r1, r3 @ add to length
bic r0, r0, ip @ clear offset from start.
1:
- mcr p15, 0, r0, c7, c6, 1 @ invalidate the D-Cache line
- add r0, r0, r2
+ mcr p15, 0, r0, c7, c6, 1 @ invalidate the D-Cache line
+ add r0, r0, r2
subs r1, r1, r2
bhi 1b
@@ -293,8 +293,8 @@
END(armv7_idcache_wbinv_all)
/*
- * These work very hard to not push registers onto the stack and to limit themselves
- * to use r0-r3 and ip.
+ * These work very hard to not push registers onto the stack
+ * and to limit themselves to use r0-r3 and ip.
*/
/* * LINTSTUB: void armv7_icache_inv_all(void); */
ENTRY_NP(armv7_icache_inv_all)
@@ -306,7 +306,7 @@
ubfx r3, r0, #3, #10 @ get numways - 1 from CCSIDR
clz r1, r3 @ number of bits to MSB of way
lsl r3, r3, r1 @ shift into position
- mov ip, #1 @
+ mov ip, #1 @
lsl ip, ip, r1 @ ip now contains the way decr
ubfx r0, r0, #0, #3 @ get linesize from CCSIDR
@@ -322,7 +322,7 @@
1: mcr p15, 0, r3, c7, c6, 2 @ invalidate line
movs r0, r3 @ get current way/set
beq 2f @ at 0 means we are done.
- movs r0, r0, lsl #10 @ clear way bits leaving only set bits
+ lsls r0, r0, #10 @ clear way bits leaving only set bits
subne r3, r3, r1 @ non-zero?, decrement set #
subeq r3, r3, r2 @ zero?, decrement way # and restore set count
b 1b
@@ -337,17 +337,15 @@
/* * LINTSTUB: void armv7_dcache_inv_all(void); */
ENTRY_NP(armv7_dcache_inv_all)
mrc p15, 1, r0, c0, c0, 1 @ read CLIDR
- ands r3, r0, #0x07000000
+ tst r0, #0x07000000
beq .Ldone_inv
- lsr r3, r3, #23 @ left align loc (low 4 bits)
+ mov r3, #0 @ start with L1
- mov r1, #0
.Lstart_inv:
add r2, r3, r3, lsr #1 @ r2 = level * 3 / 2
mov r1, r0, lsr r2 @ r1 = cache type
- and r1, r1, #7
- cmp r1, #2 @ is it data or i&d?
- blt .Lnext_level_inv @ nope, skip level
+ tst r1, #6 @ is it data or i&d?
+ beq .Lnext_level_inv @ nope, skip level
mcr p15, 2, r3, c0, c0, 0 @ select cache level
isb
@@ -357,14 +355,14 @@
add ip, ip, #4 @ apply bias
ubfx r2, r0, #13, #15 @ get numsets - 1 from CCSIDR
lsl r2, r2, ip @ shift to set position
- orr r3, r3, r2 @ merge set into way/set/level
+ orr r3, r3, r2 @ merge set into way/set/level
mov r1, #1
lsl r1, r1, ip @ r1 = set decr
ubfx ip, r0, #3, #10 @ get numways - 1 from [to be discarded] CCSIDR
clz r2, ip @ number of bits to MSB of way
lsl ip, ip, r2 @ shift by that into way position
- mov r0, #1 @
+ mov r0, #1 @
lsl r2, r0, r2 @ r2 now contains the way decr
mov r0, r3 @ get sets/level (no way yet)
orr r3, r3, ip @ merge way into way/set/level
@@ -373,20 +371,19 @@
/* r3 = ways/sets/level, r2 = way decr, r1 = set decr, r0 and ip are free */
1: mcr p15, 0, r3, c7, c6, 2 @ invalidate line
- cmp r3, #15 @ are we done with this level (way/set == 0)
+ cmp r3, #15 @ are we done with this level (way/set == 0)
bls .Lnext_level_inv @ yes, go to next level
- lsl r0, r3, #10 @ clear way bits leaving only set/level bits
- lsr r0, r0, #4 @ clear level bits leaving only set bits
+ ubfx r0, r3, #4, #18 @ extract set bits
+ cmp r0, #0 @ compare
subne r3, r3, r1 @ non-zero?, decrement set #
subeq r3, r3, r2 @ zero?, decrement way # and restore set count
b 1b
.Lnext_level_inv:
mrc p15, 1, r0, c0, c0, 1 @ read CLIDR
- and ip, r0, #0x07000000 @ narrow to LoC
- lsr ip, ip, #23 @ left align LoC (low 4 bits)
+ ubfx ip, r0, #24, #3 @ narrow to LoC
add r3, r3, #2 @ go to next level
- cmp r3, ip @ compare
+ cmp r3, ip, lsl #1 @ compare
blt .Lstart_inv @ not done, next level (r0 == CLIDR)
.Ldone_inv:
@@ -400,17 +397,15 @@
/* * LINTSTUB: void armv7_dcache_wbinv_all(void); */
ENTRY_NP(armv7_dcache_wbinv_all)
mrc p15, 1, r0, c0, c0, 1 @ read CLIDR
- ands r3, r0, #0x07000000
- beq .Ldone_wbinv
- lsr r3, r3, #23 @ left align loc (low 4 bits)
+ tst r0, #0x07000000
+ bxeq lr
+ mov r3, #0 @ start with L1
- mov r1, #0
.Lstart_wbinv:
add r2, r3, r3, lsr #1 @ r2 = level * 3 / 2
mov r1, r0, lsr r2 @ r1 = cache type
- bfc r1, #3, #29
- cmp r1, #2 @ is it data or i&d?
- blt .Lnext_level_wbinv @ nope, skip level
+ tst r1, #6 @ is it unified or data?
+ beq .Lnext_level_wbinv @ nope, skip level
mcr p15, 2, r3, c0, c0, 0 @ select cache level
isb
@@ -420,14 +415,14 @@
add ip, ip, #4 @ apply bias
ubfx r2, r0, #13, #15 @ get numsets - 1 from CCSIDR
lsl r2, r2, ip @ shift to set position
- orr r3, r3, r2 @ merge set into way/set/level
+ orr r3, r3, r2 @ merge set into way/set/level
mov r1, #1
lsl r1, r1, ip @ r1 = set decr
ubfx ip, r0, #3, #10 @ get numways - 1 from [to be discarded] CCSIDR
clz r2, ip @ number of bits to MSB of way
lsl ip, ip, r2 @ shift by that into way position
- mov r0, #1 @
+ mov r0, #1 @
lsl r2, r0, r2 @ r2 now contains the way decr
mov r0, r3 @ get sets/level (no way yet)
orr r3, r3, ip @ merge way into way/set/level
@@ -436,20 +431,19 @@
/* r3 = ways/sets/level, r2 = way decr, r1 = set decr, r0 and ip are free */
1: mcr p15, 0, r3, c7, c14, 2 @ writeback and invalidate line
- cmp r3, #15 @ are we done with this level (way/set == 0)
+ cmp r3, #15 @ are we done with this level (way/set == 0)
bls .Lnext_level_wbinv @ yes, go to next level
- lsl r0, r3, #10 @ clear way bits leaving only set/level bits
- lsr r0, r0, #4 @ clear level bits leaving only set bits
+ ubfx r0, r3, #4, #18 @ extract set bits
+ cmp r0, #0 @ compare
subne r3, r3, r1 @ non-zero?, decrement set #
subeq r3, r3, r2 @ zero?, decrement way # and restore set count
b 1b
.Lnext_level_wbinv:
mrc p15, 1, r0, c0, c0, 1 @ read CLIDR
- and ip, r0, #0x07000000 @ narrow to LoC
- lsr ip, ip, #23 @ left align LoC (low 4 bits)
+ ubfx ip, r0, #24, #3 @ narrow to LoC
add r3, r3, #2 @ go to next level
- cmp r3, ip @ compare
+ cmp r3, ip, lsl #1 @ compare
blt .Lstart_wbinv @ not done, next level (r0 == CLIDR)
.Ldone_wbinv:
Home |
Main Index |
Thread Index |
Old Index