Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/sh5/sh5 Performance tweak to the copy_page/zero_pag...
details: https://anonhg.NetBSD.org/src/rev/e4ec982ed8bd
branches: trunk
changeset: 536830:e4ec982ed8bd
user: scw <scw%NetBSD.org@localhost>
date: Sun Sep 22 20:45:31 2002 +0000
description:
Performance tweak to the copy_page/zero_page asm code.
Allocate/Prefetch one cache-line ahead of the one we're about to deal with.
This reduces the chances of the cpu stalling while waiting for the cache
to flush a dirty line in order to satisfy the Allocate/Prefetch request.
diffstat:
sys/arch/sh5/sh5/locore_subr.S | 31 ++++++++++++++++++++++++-------
1 files changed, 24 insertions(+), 7 deletions(-)
diffs (76 lines):
diff -r e18f142c3867 -r e4ec982ed8bd sys/arch/sh5/sh5/locore_subr.S
--- a/sys/arch/sh5/sh5/locore_subr.S Sun Sep 22 20:31:18 2002 +0000
+++ b/sys/arch/sh5/sh5/locore_subr.S Sun Sep 22 20:45:31 2002 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: locore_subr.S,v 1.9 2002/09/11 11:03:08 scw Exp $ */
+/* $NetBSD: locore_subr.S,v 1.10 2002/09/22 20:45:31 scw Exp $ */
/*
* Copyright 2002 Wasabi Systems, Inc.
@@ -834,19 +834,25 @@
#ifndef _LP64
add.l r2, r63, r2 /* Ensure kva is sign-extended */
#endif
- movi NBPG, r0
- add r2, r0, r0 /* End of page */
+ movi NBPG-32, r0
+ add r2, r0, r0 /* End of page (minus 1 cache-line) */
blink tr1, r63
/* Cache-align the loop */
.balign 32
-1: alloco r2, 0 /* Allocate a cache block */
+1: alloco r2, 32 /* Allocate next cache-line */
st.q r2, 0, r63 /* Zero the block */
st.q r2, 8, r63
st.q r2, 16, r63
st.q r2, 24, r63
addi r2, 32, r2 /* Next block */
bne/l r2, r0, tr1 /* Back for the next one, until done */
+
+ /* Finish the remaining block */
+ st.q r2, 0, r63
+ st.q r2, 8, r63
+ st.q r2, 16, r63
+ st.q r2, 24, r63
blink tr0, r63
@@ -863,8 +869,8 @@
add.l r2, r63, r2 /* Ensure src/dst are sign-extended */
add.l r3, r63, r3
#endif
- movi NBPG, r0
- add r2, r0, r0 /* End of page */
+ movi NBPG-32, r0
+ add r2, r0, r0 /* End of page (minus 1 cache-line) */
blink tr1, r63
/* Cache-align the loop */
@@ -873,14 +879,25 @@
ld.q r3, 8, r5
ld.q r3, 16, r6
ld.q r3, 24, r7
- alloco r2, 0 /* Allocate a cache block for dst */
+ ld.q r3, 32, r63 /* Pre-fetch next src cache-line */
st.q r2, 0, r4
st.q r2, 8, r5
st.q r2, 16, r6
st.q r2, 24, r7
+ alloco r2, 32 /* Allocate cache-line for next dst */
addi r2, 32, r2 /* Next dst block */
addi r3, 32, r3 /* Next src block */
bne/l r2, r0, tr1 /* Back for the next one, until done */
+
+ /* Finish the remaining block */
+ ld.q r3, 0, r4
+ ld.q r3, 8, r5
+ ld.q r3, 16, r6
+ ld.q r3, 24, r7
+ st.q r2, 0, r4
+ st.q r2, 8, r5
+ st.q r2, 16, r6
+ st.q r2, 24, r7
blink tr0, r63
Home |
Main Index |
Thread Index |
Old Index