Subject: new R10000 cache op implementation
To: None <port-mips@netbsd.org>
From: Takao Shinohara <shin@sm.sony.co.jp>
List: port-sgimips
Date: 10/25/2003 19:34:50
current implementation of R10000 cache op (arch/mips/cache_r10k.c rev. 1.1)
is broken. So, I wrote new implementation independent to Kiyohara-san.
How about this patch (at then end of this mail)?
--- Takao Shinohara
Index: include/cache_r10k.h
===================================================================
RCS file: /cvsroot/src/sys/arch/mips/include/cache_r10k.h,v
retrieving revision 1.1
diff -u -r1.1 cache_r10k.h
--- include/cache_r10k.h 2003/10/05 11:10:25 1.1
+++ include/cache_r10k.h 2003/10/25 10:01:13
@@ -69,94 +69,19 @@
#if defined(_KERNEL) && !defined(_LOCORE)
-/*
- * cache_r10k_op_8lines_64:
- *
- * Perform the specified cache operation on 8 64-byte cache lines.
- */
-#define cache_r10k_op_8lines_64(va, op) \
-do { \
- __asm __volatile( \
- ".set noreorder \n\t" \
- "cache %1, 0x000(%0); cache %1, 0x040(%0) \n\t" \
- "cache %1, 0x080(%0); cache %1, 0x0c0(%0) \n\t" \
- "cache %1, 0x100(%0); cache %1, 0x140(%0) \n\t" \
- "cache %1, 0x180(%0); cache %1, 0x1c0(%0) \n\t" \
- ".set reorder" \
- : \
- : "r" (va), "i" (op) \
- : "memory"); \
-} while (/*CONSTCOND*/0)
-
-/*
- * cache_r10k_op_32lines_64:
- *
- * Perform the specified cache operation on 32 64-byte
- * cache lines.
- */
-#define cache_r10k_op_32lines_64(va, op) \
-do { \
- __asm __volatile( \
- ".set noreorder \n\t" \
- "cache %1, 0x000(%0); cache %1, 0x040(%0); \n\t" \
- "cache %1, 0x080(%0); cache %1, 0x0c0(%0); \n\t" \
- "cache %1, 0x100(%0); cache %1, 0x140(%0); \n\t" \
- "cache %1, 0x180(%0); cache %1, 0x1c0(%0); \n\t" \
- "cache %1, 0x200(%0); cache %1, 0x240(%0); \n\t" \
- "cache %1, 0x280(%0); cache %1, 0x2c0(%0); \n\t" \
- "cache %1, 0x300(%0); cache %1, 0x340(%0); \n\t" \
- "cache %1, 0x380(%0); cache %1, 0x3c0(%0); \n\t" \
- "cache %1, 0x400(%0); cache %1, 0x440(%0); \n\t" \
- "cache %1, 0x480(%0); cache %1, 0x4c0(%0); \n\t" \
- "cache %1, 0x500(%0); cache %1, 0x540(%0); \n\t" \
- "cache %1, 0x580(%0); cache %1, 0x5c0(%0); \n\t" \
- "cache %1, 0x600(%0); cache %1, 0x640(%0); \n\t" \
- "cache %1, 0x680(%0); cache %1, 0x6c0(%0); \n\t" \
- "cache %1, 0x700(%0); cache %1, 0x740(%0); \n\t" \
- "cache %1, 0x780(%0); cache %1, 0x7c0(%0); \n\t" \
- ".set reorder" \
- : \
- : "r" (va), "i" (op) \
- : "memory"); \
-} while (/*CONSTCOND*/0)
-
-/*
- * cache_r10k_op_16lines_32_2way:
- *
- * Perform the specified cache operation on 16 64-byte
- * cache lines, 2-ways.
- */
-#define cache_r10k_op_16lines_64_2way(va1, va2, op) \
-do { \
- __asm __volatile( \
- ".set noreorder \n\t" \
- "cache %2, 0x000(%0); cache %2, 0x000(%1); \n\t" \
- "cache %2, 0x040(%0); cache %2, 0x040(%1); \n\t" \
- "cache %2, 0x080(%0); cache %2, 0x080(%1); \n\t" \
- "cache %2, 0x0c0(%0); cache %2, 0x0c0(%1); \n\t" \
- "cache %2, 0x100(%0); cache %2, 0x100(%1); \n\t" \
- "cache %2, 0x140(%0); cache %2, 0x140(%1); \n\t" \
- "cache %2, 0x180(%0); cache %2, 0x180(%1); \n\t" \
- "cache %2, 0x1c0(%0); cache %2, 0x1c0(%1); \n\t" \
- "cache %2, 0x200(%0); cache %2, 0x200(%1); \n\t" \
- "cache %2, 0x240(%0); cache %2, 0x240(%1); \n\t" \
- "cache %2, 0x280(%0); cache %2, 0x280(%1); \n\t" \
- "cache %2, 0x2c0(%0); cache %2, 0x2c0(%1); \n\t" \
- "cache %2, 0x300(%0); cache %2, 0x300(%1); \n\t" \
- "cache %2, 0x340(%0); cache %2, 0x340(%1); \n\t" \
- "cache %2, 0x380(%0); cache %2, 0x380(%1); \n\t" \
- "cache %2, 0x3c0(%0); cache %2, 0x3c0(%1); \n\t" \
- ".set reorder" \
- : \
- : "r" (va1), "r" (va2), "i" (op) \
- : "memory"); \
-} while (/*CONSTCOND*/0)
-
-void r10k_icache_sync_all_64(void);
-void r10k_icache_sync_range_64(vaddr_t, vsize_t);
-void r10k_icache_sync_range_index_64(vaddr_t, vsize_t);
-
+void r10k_icache_sync_all(void);
+void r10k_icache_sync_range(vaddr_t, vsize_t);
+void r10k_icache_sync_range_index(vaddr_t, vsize_t);
+void r10k_pdcache_wbinv_all(void);
+void r10k_pdcache_wbinv_range(vaddr_t, vsize_t);
+void r10k_pdcache_wbinv_range_index(vaddr_t, vsize_t);
+void r10k_pdcache_inv_range(vaddr_t, vsize_t);
void r10k_pdcache_wb_range(vaddr_t, vsize_t);
+void r10k_sdcache_wbinv_all(void);
+void r10k_sdcache_wbinv_range(vaddr_t, vsize_t);
+void r10k_sdcache_wbinv_range_index(vaddr_t, vsize_t);
+void r10k_sdcache_inv_range(vaddr_t, vsize_t);
+void r10k_sdcache_wb_range(vaddr_t, vsize_t);
#endif /* _KERNEL && !_LOCORE */
Index: mips/cache.c
===================================================================
RCS file: /cvsroot/src/sys/arch/mips/mips/cache.c,v
retrieving revision 1.22
diff -u -r1.22 cache.c
--- mips/cache.c 2003/10/11 09:09:15 1.22
+++ mips/cache.c 2003/10/25 10:01:14
@@ -609,45 +609,30 @@
#endif /* MIPS3_5900 */
#ifdef ENABLE_MIPS4_CACHE_R10K
case MIPS_R10000:
- /* cache spec */
+ case MIPS_R12000:
+ case MIPS_R14000:
mips_picache_ways = 2;
mips_pdcache_ways = 2;
mips_sdcache_ways = 2;
mips4_get_cache_config(csizebase);
- switch (mips_picache_line_size) {
- case 64: /* 64 Byte */
- mips_cache_ops.mco_icache_sync_all =
- r10k_icache_sync_all_64;
- mips_cache_ops.mco_icache_sync_range =
- r10k_icache_sync_range_64;
- mips_cache_ops.mco_icache_sync_range_index =
- r10k_icache_sync_range_index_64;
- break;
-
- default:
- panic("r10k picache line size %d",
- mips_picache_line_size);
- }
- switch (mips_pdcache_line_size) {
- case 32: /* 32 Byte */
- mips_cache_ops.mco_pdcache_wbinv_all =
- r5k_pdcache_wbinv_all_32;
- mips_cache_ops.mco_pdcache_wbinv_range =
- r5k_pdcache_wbinv_range_32;
- mips_cache_ops.mco_pdcache_wbinv_range_index =
- r5k_pdcache_wbinv_range_index_32;
- mips_cache_ops.mco_pdcache_inv_range =
- r5k_pdcache_inv_range_32;
- mips_cache_ops.mco_pdcache_wb_range =
- r10k_pdcache_wb_range;
- break;
-
- default:
- panic("r10k pdcache line size %d",
- mips_pdcache_line_size);
- }
+ mips_cache_ops.mco_icache_sync_all =
+ r10k_icache_sync_all;
+ mips_cache_ops.mco_icache_sync_range =
+ r10k_icache_sync_range;
+ mips_cache_ops.mco_icache_sync_range_index =
+ r10k_icache_sync_range_index;
+ mips_cache_ops.mco_pdcache_wbinv_all =
+ r10k_pdcache_wbinv_all;
+ mips_cache_ops.mco_pdcache_wbinv_range =
+ r10k_pdcache_wbinv_range;
+ mips_cache_ops.mco_pdcache_wbinv_range_index =
+ r10k_pdcache_wbinv_range_index;
+ mips_cache_ops.mco_pdcache_inv_range =
+ r10k_pdcache_inv_range;
+ mips_cache_ops.mco_pdcache_wb_range =
+ r10k_pdcache_wb_range;
break;
#endif /* ENABLE_MIPS4_CACHE_R10K */
#endif /* MIPS3 || MIPS4 */
@@ -768,53 +753,18 @@
break;
#ifdef ENABLE_MIPS4_CACHE_R10K
case MIPS_R10000:
- switch (mips_sdcache_ways) {
- case 2:
- switch (mips_sdcache_line_size) {
- case 64:
- mips_cache_ops.mco_sdcache_wbinv_all =
- r4k_sdcache_wbinv_all_generic;
- mips_cache_ops.mco_sdcache_wbinv_range =
- r4k_sdcache_wbinv_range_generic;
- mips_cache_ops.mco_sdcache_wbinv_range_index =
- r4k_sdcache_wbinv_range_index_generic;
- mips_cache_ops.mco_sdcache_inv_range =
- r4k_sdcache_inv_range_generic;
- mips_cache_ops.mco_sdcache_wb_range =
-#if 0 /* XXX needs real wb functions for r10k 2way L2 cache */
- r4k_sdcache_wb_range_generic;
-#else
- r4k_sdcache_wbinv_range_generic;
-#endif
- break;
-
- case 128:
- mips_cache_ops.mco_sdcache_wbinv_all =
- r4k_sdcache_wbinv_all_128;
- mips_cache_ops.mco_sdcache_wbinv_range =
- r4k_sdcache_wbinv_range_128;
- mips_cache_ops.mco_sdcache_wbinv_range_index =
- r4k_sdcache_wbinv_range_index_128;
- mips_cache_ops.mco_sdcache_inv_range =
- r4k_sdcache_inv_range_128;
- mips_cache_ops.mco_sdcache_wb_range =
-#if 0 /* XXX needs real wb functions for r10k 2way L2 cache */
- r4k_sdcache_wb_range_128;
-#else
- r4k_sdcache_wbinv_range_128;
-#endif
- break;
-
- default:
- panic("r10k sdcache %d way line size %d",
- mips_sdcache_ways, mips_sdcache_line_size);
- }
- break;
-
- default:
- panic("r10k sdcache %d way line size %d",
- mips_sdcache_ways, mips_sdcache_line_size);
- }
+ case MIPS_R12000:
+ case MIPS_R14000:
+ mips_cache_ops.mco_sdcache_wbinv_all =
+ r10k_sdcache_wbinv_all;
+ mips_cache_ops.mco_sdcache_wbinv_range =
+ r10k_sdcache_wbinv_range;
+ mips_cache_ops.mco_sdcache_wbinv_range_index =
+ r10k_sdcache_wbinv_range_index;
+ mips_cache_ops.mco_sdcache_inv_range =
+ r10k_sdcache_inv_range;
+ mips_cache_ops.mco_sdcache_wb_range =
+ r10k_sdcache_wb_range;
break;
#endif /* ENABLE_MIPS4_CACHE_R10K */
#endif /* MIPS3 || MIPS4 */
Index: mips/cache_r10k.c
===================================================================
RCS file: /cvsroot/src/sys/arch/mips/mips/cache_r10k.c,v
retrieving revision 1.1
diff -u -r1.1 cache_r10k.c
--- mips/cache_r10k.c 2003/10/05 11:10:25 1.1
+++ mips/cache_r10k.c 2003/10/25 10:01:15
@@ -1,7 +1,7 @@
-/* $NetBSD: cache_r10k.c,v 1.1 2003/10/05 11:10:25 tsutsui Exp $ */
+/* $NetBSD$ */
-/*
- * Copyright (c) 2003 KIYOHARA Takashi <kiyohara@kk.iij4u.or.jp>
+/*-
+ * Copyright (c) 2003 Takao Shinohara.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -59,53 +59,44 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include <sys/cdefs.h>
-
#include <sys/param.h>
#include <mips/cache.h>
#include <mips/cache_r4k.h>
-#include <mips/cache_r5k.h>
#include <mips/cache_r10k.h>
-#include <mips/locore.h>
/*
* Cache operations for R10000-style caches:
*
- * - 2-way set-associative
- * - Write-back
- * - Virtually indexed, physically tagged
- *
+ * 2-way, write-back
+ * primary cache: virtual index/physical tag
+ * secondary cache: physical index/physical tag
*/
-#define round_line(x) (((x) + 63) & ~63)
-#define trunc_line(x) ((x) & ~63)
-
__asm(".set mips3");
+#define round_line(x) (((x) + 64 - 1) & ~(64 - 1))
+#define trunc_line(x) ((x) & ~(64 - 1))
+
void
-r10k_icache_sync_all_64(void)
+r10k_icache_sync_all(void)
{
vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
- vaddr_t eva = va + mips_picache_size;
-
- /*
- * Since we're hitting the whole thing, we don't have to
- * worry about the 2 different "ways".
- */
+ vaddr_t eva = va + mips_picache_way_size;
mips_dcache_wbinv_all();
__asm __volatile("sync");
while (va < eva) {
- cache_r10k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
- va += (32 * 64);
+ cache_op_r4k_line(va+0, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
+ cache_op_r4k_line(va+1, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
+ va += 64;
}
}
void
-r10k_icache_sync_range_64(vaddr_t va, vsize_t size)
+r10k_icache_sync_range(vaddr_t va, vsize_t size)
{
vaddr_t eva = round_line(va + size);
@@ -115,11 +106,6 @@
__asm __volatile("sync");
- while ((eva - va) >= (32 * 64)) {
- cache_r10k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
- va += (32 * 64);
- }
-
while (va < eva) {
cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
va += 64;
@@ -127,9 +113,9 @@
}
void
-r10k_icache_sync_range_index_64(vaddr_t va, vsize_t size)
+r10k_icache_sync_range_index(vaddr_t va, vsize_t size)
{
- vaddr_t w2va, eva, orig_va;
+ vaddr_t eva, orig_va;
orig_va = va;
@@ -149,27 +135,183 @@
va = MIPS_PHYS_TO_KSEG0(orig_va & mips_picache_way_mask);
eva = round_line(va + size);
+ va = trunc_line(va);
+
+ while (va < eva) {
+ cache_op_r4k_line(va+0, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
+ cache_op_r4k_line(va+1, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
+ va += 64;
+ }
+}
+
+#undef round_line
+#undef trunc_line
+
+#define round_line(x) (((x) + 32 - 1) & ~(32 - 1))
+#define trunc_line(x) ((x) & ~(32 - 1))
+
+void
+r10k_pdcache_wbinv_all(void)
+{
+ vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
+ vaddr_t eva = va + mips_pdcache_way_size;
+
+ while (va < eva) {
+ cache_op_r4k_line(va+0, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
+ cache_op_r4k_line(va+1, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
+ va += 32;
+ }
+}
+
+void
+r10k_pdcache_wbinv_range(vaddr_t va, vsize_t size)
+{
+ vaddr_t eva = round_line(va + size);
+
+ va = trunc_line(va);
+
+ while (va < eva) {
+ cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
+ va += 32;
+ }
+}
+
+void
+r10k_pdcache_wbinv_range_index(vaddr_t va, vsize_t size)
+{
+ vaddr_t eva;
+
+ /*
+ * Since we're doing Index ops, we expect to not be able
+ * to access the address we've been given. So, get the
+ * bits that determine the cache index, and make a KSEG0
+ * address out of them.
+ */
+ va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask);
+
+ eva = round_line(va + size);
va = trunc_line(va);
- w2va = va + mips_picache_way_size;
- while ((eva - va) >= (16 * 64)) {
- cache_r10k_op_16lines_64_2way(va, w2va,
- CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
- va += (16 * 64);
- w2va += (16 * 64);
+ while (va < eva) {
+ cache_op_r4k_line(va+0, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
+ cache_op_r4k_line(va+1, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
+ va += 32;
}
+}
+void
+r10k_pdcache_inv_range(vaddr_t va, vsize_t size)
+{
+ vaddr_t eva = round_line(va + size);
+
+ va = trunc_line(va);
+
while (va < eva) {
- cache_op_r4k_line( va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
- cache_op_r4k_line(w2va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
- va += 64;
- w2va += 64;
+ cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
+ va += 32;
}
}
void
r10k_pdcache_wb_range(vaddr_t va, vsize_t size)
+{
+ vaddr_t eva = round_line(va + size);
+
+ va = trunc_line(va);
+
+ while (va < eva) {
+ /* R10000 does not support HitWriteBack operation */
+ cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
+ va += 32;
+ }
+}
+
+#undef round_line
+#undef trunc_line
+
+#define round_line(x) (((x) + mips_sdcache_line_size - 1) & ~(mips_sdcache_line_size - 1))
+#define trunc_line(x) ((x) & ~(mips_sdcache_line_size - 1))
+
+void
+r10k_sdcache_wbinv_all(void)
+{
+ vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
+ vaddr_t eva = va + mips_sdcache_way_size;
+ int line_size = mips_sdcache_line_size;
+
+ while (va < eva) {
+ cache_op_r4k_line(va+0, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
+ cache_op_r4k_line(va+1, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
+ va += line_size;
+ }
+}
+
+void
+r10k_sdcache_wbinv_range(vaddr_t va, vsize_t size)
+{
+ vaddr_t eva = round_line(va + size);
+ int line_size = mips_sdcache_line_size;
+
+ va = trunc_line(va);
+
+ while (va < eva) {
+ cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
+ va += line_size;
+ }
+}
+
+void
+r10k_sdcache_wbinv_range_index(vaddr_t va, vsize_t size)
+{
+ vaddr_t eva;
+ int line_size = mips_sdcache_line_size;
+
+ /*
+ * Since we're doing Index ops, we expect to not be able
+ * to access the address we've been given. So, get the
+ * bits that determine the cache index, and make a KSEG0
+ * address out of them.
+ */
+ va = MIPS_PHYS_TO_KSEG0(va & mips_sdcache_way_mask);
+
+ eva = round_line(va + size);
+ va = trunc_line(va);
+
+ while (va < eva) {
+ cache_op_r4k_line(va+0, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
+ cache_op_r4k_line(va+1, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
+ va += line_size;
+ }
+}
+
+void
+r10k_sdcache_inv_range(vaddr_t va, vsize_t size)
+{
+ vaddr_t eva = round_line(va + size);
+ int line_size = mips_sdcache_line_size;
+
+ va = trunc_line(va);
+
+ while (va < eva) {
+ cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
+ va += line_size;
+ }
+}
+
+void
+r10k_sdcache_wb_range(vaddr_t va, vsize_t size)
{
- /* R10000 processor does not support */
+ vaddr_t eva = round_line(va + size);
+ int line_size = mips_sdcache_line_size;
+
+ va = trunc_line(va);
+
+ while (va < eva) {
+ /* R10000 does not support HitWriteBack operation */
+ cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
+ va += line_size;
+ }
}
+#undef round_line
+#undef trunc_line