Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/sparc introduce a hack to workaround the savefpstat...
details: https://anonhg.NetBSD.org/src/rev/fee7b3750927
branches: trunk
changeset: 760795:fee7b3750927
user: mrg <mrg%NetBSD.org@localhost>
date: Thu Jan 13 05:20:27 2011 +0000
description:
introduce a hack to workaround the savefpstate IPI get's NULL
struct fpstate *.
add a new entry point ipi_savefpstate() that is used for this IPI
instead of the plain ipi_savefpstate(). check for %o0 being NULL,
and if so, just return.
add event counters for the savefpstate IPI, and another one for
when it detects the NULL problem.
make the cpu_info struct xmpsg be aligned to a single cache line.
with this change applied (and another minor change in testing) my
SS20 with dual SM75's has survived for 17 hours on build.sh -j5,
and has reported 7 NULL savefps. (the minor change seems to have
a significant effect at reducing this number, but more testing is
needed for it.)
XXX: this is horrible and we really need to find the real problem
XXX: but this should let people use sparc smp again, and fixes
XXX: for the real problem can be tested by seeing if any of the
XXX: savefp IPI null counter becomes non-zero.
diffstat:
sys/arch/sparc/include/cpu.h | 3 +-
sys/arch/sparc/sparc/cpu.c | 25 +++++++++++++++++++++-
sys/arch/sparc/sparc/cpuvar.h | 14 +++++++++---
sys/arch/sparc/sparc/genassym.cf | 4 ++-
sys/arch/sparc/sparc/locore.s | 42 +++++++++++++++++++++++++++++++++++++-
sys/arch/sparc/sparc/machdep.c | 6 ++--
sys/arch/sparc/sparc/trap.c | 6 ++--
sys/arch/sparc/sparc/vm_machdep.c | 8 +++---
8 files changed, 88 insertions(+), 20 deletions(-)
diffs (truncated from 307 to 300 lines):
diff -r 1f40df2c9b77 -r fee7b3750927 sys/arch/sparc/include/cpu.h
--- a/sys/arch/sparc/include/cpu.h Thu Jan 13 05:14:48 2011 +0000
+++ b/sys/arch/sparc/include/cpu.h Thu Jan 13 05:20:27 2011 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.h,v 1.89 2010/01/03 23:03:21 mrg Exp $ */
+/* $NetBSD: cpu.h,v 1.90 2011/01/13 05:20:27 mrg Exp $ */
/*
* Copyright (c) 1992, 1993
@@ -193,6 +193,7 @@
/* locore.s */
struct fpstate;
+void ipi_savefpstate(struct fpstate *);
void savefpstate(struct fpstate *);
void loadfpstate(struct fpstate *);
int probeget(void *, int);
diff -r 1f40df2c9b77 -r fee7b3750927 sys/arch/sparc/sparc/cpu.c
--- a/sys/arch/sparc/sparc/cpu.c Thu Jan 13 05:14:48 2011 +0000
+++ b/sys/arch/sparc/sparc/cpu.c Thu Jan 13 05:20:27 2011 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.c,v 1.223 2010/06/22 18:29:02 rmind Exp $ */
+/* $NetBSD: cpu.c,v 1.224 2011/01/13 05:20:27 mrg Exp $ */
/*
* Copyright (c) 1996
@@ -52,7 +52,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.223 2010/06/22 18:29:02 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.224 2011/01/13 05:20:27 mrg Exp $");
#include "opt_multiprocessor.h"
#include "opt_lockdebug.h"
@@ -64,7 +64,9 @@
#include <sys/device.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
+#include <sys/evcnt.h>
#include <sys/xcall.h>
+#include <sys/cpu.h>
#include <uvm/uvm.h>
@@ -119,6 +121,7 @@
cpu_cpuunit_match, cpu_cpuunit_attach, NULL, NULL);
#endif /* SUN4D */
+static void cpu_init_evcnt(struct cpu_info *cpi);
static void cpu_attach(struct cpu_softc *, int, int);
static const char *fsrtoname(int, int, int);
@@ -293,6 +296,21 @@
}
#endif /* SUN4D */
+static void
+cpu_init_evcnt(struct cpu_info *cpi)
+{
+
+ /*
+ * Setup the per-cpu savefpstate counters. The "savefp null"
+ * counter should go away when the NULL struct fpstate * bug
+ * is fixed.
+ */
+ evcnt_attach_dynamic(&cpi->ci_savefpstate, EVCNT_TYPE_MISC,
+ NULL, cpu_name(cpi), "savefp ipi");
+ evcnt_attach_dynamic(&cpi->ci_savefpstate_null, EVCNT_TYPE_MISC,
+ NULL, cpu_name(cpi), "savefp null ipi");
+}
+
/*
* Attach the CPU.
* Discover interesting goop about the virtual address cache
@@ -341,10 +359,13 @@
#if defined(MULTIPROCESSOR)
if (cpu_attach_count > 1) {
cpu_attach_non_boot(sc, cpi, node);
+ cpu_init_evcnt(cpi);
return;
}
#endif /* MULTIPROCESSOR */
+ cpu_init_evcnt(cpi);
+
/* Stuff to only run on the boot CPU */
cpu_setup();
snprintf(buf, sizeof buf, "%s @ %s MHz, %s FPU",
diff -r 1f40df2c9b77 -r fee7b3750927 sys/arch/sparc/sparc/cpuvar.h
--- a/sys/arch/sparc/sparc/cpuvar.h Thu Jan 13 05:14:48 2011 +0000
+++ b/sys/arch/sparc/sparc/cpuvar.h Thu Jan 13 05:20:27 2011 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cpuvar.h,v 1.83 2010/01/23 16:06:57 mrg Exp $ */
+/* $NetBSD: cpuvar.h,v 1.84 2011/01/13 05:20:27 mrg Exp $ */
/*
* Copyright (c) 1996 The NetBSD Foundation, Inc.
@@ -127,6 +127,13 @@
struct cpu_info {
struct cpu_data ci_data; /* MI per-cpu data */
+ /*
+ * Primary Inter-processor message area. Keep this aligned
+ * to a cache line boundary if possible, as the structure
+ * itself is one (normal 32 byte) cache-line.
+ */
+ struct xpmsg msg __aligned(32);
+
/* Scheduler flags */
int ci_want_ast;
int ci_want_resched;
@@ -142,9 +149,6 @@
*/
struct cpu_info * volatile ci_self;
- /* Primary Inter-processor message area */
- struct xpmsg msg;
-
int ci_cpuid; /* CPU index (see cpus[] array) */
/* Context administration */
@@ -334,6 +338,8 @@
struct evcnt ci_lev10;
struct evcnt ci_lev14;
+ struct evcnt ci_savefpstate;
+ struct evcnt ci_savefpstate_null;
};
/*
diff -r 1f40df2c9b77 -r fee7b3750927 sys/arch/sparc/sparc/genassym.cf
--- a/sys/arch/sparc/sparc/genassym.cf Thu Jan 13 05:14:48 2011 +0000
+++ b/sys/arch/sparc/sparc/genassym.cf Thu Jan 13 05:20:27 2011 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: genassym.cf,v 1.61 2010/12/20 00:25:44 matt Exp $
+# $NetBSD: genassym.cf,v 1.62 2011/01/13 05:20:27 mrg Exp $
#
# Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -166,6 +166,8 @@
define CPUINFO_MTX_COUNT offsetof(struct cpu_info, ci_mtx_count)
define CPUINFO_MTX_OLDSPL offsetof(struct cpu_info, ci_mtx_oldspl)
define CPUINFO_IDEPTH offsetof(struct cpu_info, ci_idepth)
+define CPUINFO_SAVEFPSTATE offsetof(struct cpu_info, ci_savefpstate)
+define CPUINFO_SAVEFPSTATE_NULL offsetof(struct cpu_info, ci_savefpstate_null)
# PTE bits and related information
define PG_W PG_W
diff -r 1f40df2c9b77 -r fee7b3750927 sys/arch/sparc/sparc/locore.s
--- a/sys/arch/sparc/sparc/locore.s Thu Jan 13 05:14:48 2011 +0000
+++ b/sys/arch/sparc/sparc/locore.s Thu Jan 13 05:20:27 2011 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: locore.s,v 1.258 2010/12/20 00:25:44 matt Exp $ */
+/* $NetBSD: locore.s,v 1.259 2011/01/13 05:20:27 mrg Exp $ */
/*
* Copyright (c) 1996 Paul Kranenburg
@@ -5841,12 +5841,29 @@
/* NOTREACHED */
/*
- * savefpstate(f) struct fpstate *f;
+ * savefpstate(struct fpstate *f);
+ * ipi_savefpstate(struct fpstate *f);
*
* Store the current FPU state. The first `st %fsr' may cause a trap;
* our trap handler knows how to recover (by `returning' to savefpcont).
+ *
+ * The IPI version just deals with updating event counters first.
*/
+Lpanic_savefpstate:
+ .asciz "cpu%d: NULL fpstate"
+ _ALIGN
+
+ENTRY(ipi_savefpstate)
+ sethi %hi(CPUINFO_VA), %o5
+ ldd [%o5 + CPUINFO_SAVEFPSTATE], %o2
+ inccc %o3
+ addx %o2, 0, %o2
+ std %o2, [%o5 + CPUINFO_SAVEFPSTATE]
+
ENTRY(savefpstate)
+ cmp %o0, 0
+ bz Lfp_null_fpstate
+ nop
rd %psr, %o1 ! enable FP before we begin
set PSR_EF, %o2
or %o1, %o2, %o1
@@ -5889,6 +5906,27 @@
std %f30, [%o0 + FS_REGS + (4*30)]
/*
+ * We really should panic here but while we figure out what the bug is
+ * that a remote CPU gets a NULL struct fpstate *, this lets the system
+ * work at least seemingly stably.
+ */
+Lfp_null_fpstate:
+#if 1
+ sethi %hi(CPUINFO_VA), %o5
+ ldd [%o5 + CPUINFO_SAVEFPSTATE_NULL], %o2
+ inccc %o3
+ addx %o2, 0, %o2
+ retl
+ std %o2, [%o5 + CPUINFO_SAVEFPSTATE_NULL]
+#else
+ ld [%o5 + CPUINFO_CPUNO], %o1
+ sethi %hi(Lpanic_savefpstate), %o0
+ call _C_LABEL(panic)
+ or %o0, %lo(Lpanic_savefpstate), %o0
+#endif
+1:
+
+/*
* Store the (now known nonempty) FP queue.
* We have to reread the fsr each time in order to get the new QNE bit.
*/
diff -r 1f40df2c9b77 -r fee7b3750927 sys/arch/sparc/sparc/machdep.c
--- a/sys/arch/sparc/sparc/machdep.c Thu Jan 13 05:14:48 2011 +0000
+++ b/sys/arch/sparc/sparc/machdep.c Thu Jan 13 05:20:27 2011 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: machdep.c,v 1.303 2010/12/20 00:25:44 matt Exp $ */
+/* $NetBSD: machdep.c,v 1.304 2011/01/13 05:20:27 mrg Exp $ */
/*-
* Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
@@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.303 2010/12/20 00:25:44 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.304 2011/01/13 05:20:27 mrg Exp $");
#include "opt_compat_netbsd.h"
#include "opt_compat_sunos.h"
@@ -376,7 +376,7 @@
savefpstate(fs);
#if defined(MULTIPROCESSOR)
else
- XCALL1(savefpstate, fs, 1 << cpi->ci_cpuid);
+ XCALL1(ipi_savefpstate, fs, 1 << cpi->ci_cpuid);
#endif
cpi->fplwp = NULL;
}
diff -r 1f40df2c9b77 -r fee7b3750927 sys/arch/sparc/sparc/trap.c
--- a/sys/arch/sparc/sparc/trap.c Thu Jan 13 05:14:48 2011 +0000
+++ b/sys/arch/sparc/sparc/trap.c Thu Jan 13 05:20:27 2011 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: trap.c,v 1.184 2010/12/20 00:25:44 matt Exp $ */
+/* $NetBSD: trap.c,v 1.185 2011/01/13 05:20:27 mrg Exp $ */
/*
* Copyright (c) 1996
@@ -49,7 +49,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.184 2010/12/20 00:25:44 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.185 2011/01/13 05:20:27 mrg Exp $");
#include "opt_ddb.h"
#include "opt_compat_svr4.h"
@@ -477,7 +477,7 @@
panic("FPU(%d): state for %p",
cpi->ci_cpuid, l);
#if defined(MULTIPROCESSOR)
- XCALL1(savefpstate, fs, 1 << cpi->ci_cpuid);
+ XCALL1(ipi_savefpstate, fs, 1 << cpi->ci_cpuid);
#endif
cpi->fplwp = NULL;
}
diff -r 1f40df2c9b77 -r fee7b3750927 sys/arch/sparc/sparc/vm_machdep.c
--- a/sys/arch/sparc/sparc/vm_machdep.c Thu Jan 13 05:14:48 2011 +0000
+++ b/sys/arch/sparc/sparc/vm_machdep.c Thu Jan 13 05:20:27 2011 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: vm_machdep.c,v 1.101 2009/11/21 04:16:52 rmind Exp $ */
+/* $NetBSD: vm_machdep.c,v 1.102 2011/01/13 05:20:27 mrg Exp $ */
/*
* Copyright (c) 1996
@@ -49,7 +49,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.101 2009/11/21 04:16:52 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.102 2011/01/13 05:20:27 mrg Exp $");
#include "opt_multiprocessor.h"
@@ -228,7 +228,7 @@
savefpstate(l1->l_md.md_fpstate);
#if defined(MULTIPROCESSOR)
else
- XCALL1(savefpstate, l1->l_md.md_fpstate,
+ XCALL1(ipi_savefpstate, l1->l_md.md_fpstate,
1 << cpi->ci_cpuid);
#endif
}
@@ -306,7 +306,7 @@
savefpstate(fs);
Home |
Main Index |
Thread Index |
Old Index