Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/netbsd-7]: src/sys/arch/sparc/sparc Pull up following revision(s) (reque...



details:   https://anonhg.NetBSD.org/src/rev/5ab658d330ef
branches:  netbsd-7
changeset: 800449:5ab658d330ef
user:      martin <martin%NetBSD.org@localhost>
date:      Wed Mar 21 11:52:49 2018 +0000

description:
Pull up following revision(s) (requested by mrg in ticket #1585):
        sys/arch/sparc/sparc/cpu.c: revision 1.250 (patch)
        sys/arch/sparc/include/cpu.h: revision 1.99 (patch -> cpuvar.h)
        sys/arch/sparc/sparc/intr.c: revision 1.119 (patch)

- return early in xcall() if the function is sparc_noop() instead of triggering
  the IPI and then ignoring responses ( or lack thereof )
- write the .tag field last to avoid a race when polling for an incoming
  IPI
- add event counters for IPIs being caught with the mutex not held, and for
  messages that are already marked as completed

With this my SS20 made it through 48 hours of pkgsrc with MAKE_JOBS=3 and a
pair of SM81s.

Hypersparcs still crash but instead of craziness we get actual error messages,
apparently one CPU will occasionally do a watchdog reset, which according to
the manual is caused by catching a trap with traps disabled. Now to figure
out how that can even happen...

diffstat:

 sys/arch/sparc/sparc/cpu.c  |  24 ++++++++++++++++++------
 sys/arch/sparc/sparc/intr.c |  29 ++++++++++++++++++++++++++---
 2 files changed, 44 insertions(+), 9 deletions(-)

diffs (151 lines):

diff -r 5c9116ad6be4 -r 5ab658d330ef sys/arch/sparc/sparc/cpu.c
--- a/sys/arch/sparc/sparc/cpu.c        Wed Mar 21 11:42:17 2018 +0000
+++ b/sys/arch/sparc/sparc/cpu.c        Wed Mar 21 11:52:49 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: cpu.c,v 1.248 2014/07/25 17:21:32 nakayama Exp $ */
+/*     $NetBSD: cpu.c,v 1.248.2.1 2018/03/21 11:52:49 martin Exp $ */
 
 /*
  * Copyright (c) 1996
@@ -52,7 +52,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.248 2014/07/25 17:21:32 nakayama Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.248.2.1 2018/03/21 11:52:49 martin Exp $");
 
 #include "opt_multiprocessor.h"
 #include "opt_lockdebug.h"
@@ -183,7 +183,7 @@
  * This must be locked around all message transactions to ensure only
  * one CPU is generating them.
  */
-static kmutex_t xpmsg_mutex;
+kmutex_t xpmsg_mutex;
 
 #endif /* MULTIPROCESSOR */
 
@@ -367,6 +367,10 @@
                             NULL, cpu_name(cpi), "IPI mutex_trylock fail");
        evcnt_attach_dynamic(&cpi->ci_xpmsg_mutex_fail_call, EVCNT_TYPE_MISC,
                             NULL, cpu_name(cpi), "IPI mutex_trylock fail/call");
+       evcnt_attach_dynamic(&cpi->ci_xpmsg_mutex_not_held, EVCNT_TYPE_MISC,
+                            NULL, cpu_name(cpi), "IPI with mutex not held");
+       evcnt_attach_dynamic(&cpi->ci_xpmsg_bogus, EVCNT_TYPE_MISC,
+                            NULL, cpu_name(cpi), "bogus IPI");
 
        /*
         * These are the per-cpu per-IPL hard & soft interrupt counters.
@@ -653,6 +657,8 @@
        char *bufp = errbuf;
        size_t bufsz = sizeof errbuf, wrsz;
 
+       if (is_noop) return;
+
        mybit = (1 << cpuinfo.ci_cpuid);
        callself = func && (cpuset & mybit) != 0;
        cpuset &= ~mybit;
@@ -714,7 +720,10 @@
                if ((cpuset & (1 << n)) == 0)
                        continue;
 
-               cpi->msg.tag = XPMSG_FUNC;
+               /*
+                * Write msg.tag last - if another CPU is polling above it may
+                * end up seeing an incomplete message. Not likely but still.
+                */ 
                cpi->msg.complete = 0;
                p = &cpi->msg.u.xpmsg_func;
                p->func = func;
@@ -722,6 +731,9 @@
                p->arg0 = arg0;
                p->arg1 = arg1;
                p->arg2 = arg2;
+               __insn_barrier();
+               cpi->msg.tag = XPMSG_FUNC;
+               __insn_barrier();
                /* Fast cross calls use interrupt level 14 */
                raise_ipi(cpi,13+fasttrap);/*xcall_cookie->pil*/
        }
@@ -737,7 +749,7 @@
         * have completed (bailing if it takes "too long", being loud about
         * this in the process).
         */
-       done = is_noop;
+       done = 0;
        i = 1000000;    /* time-out, not too long, but still an _AGE_ */
        while (!done) {
                if (--i < 0) {
@@ -774,7 +786,7 @@
 
        if (i >= 0 || debug_xcall == 0) {
                if (i < 0)
-                       printf_nolog("%s\n", errbuf);
+                       aprint_error("%s\n", errbuf);
                mutex_spin_exit(&xpmsg_mutex);
                return;
        }
diff -r 5c9116ad6be4 -r 5ab658d330ef sys/arch/sparc/sparc/intr.c
--- a/sys/arch/sparc/sparc/intr.c       Wed Mar 21 11:42:17 2018 +0000
+++ b/sys/arch/sparc/sparc/intr.c       Wed Mar 21 11:52:49 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: intr.c,v 1.118 2013/11/16 23:54:01 mrg Exp $ */
+/*     $NetBSD: intr.c,v 1.118.4.1 2018/03/21 11:52:49 martin Exp $ */
 
 /*
  * Copyright (c) 1992, 1993
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.118 2013/11/16 23:54:01 mrg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.118.4.1 2018/03/21 11:52:49 martin Exp $");
 
 #include "opt_multiprocessor.h"
 #include "opt_sparc_arch.h"
@@ -76,6 +76,8 @@
 void *xcall_cookie;
 #endif
 
+extern kmutex_t xpmsg_mutex;
+
 void   strayintr(struct clockframe *);
 #ifdef DIAGNOSTIC
 void   bogusintr(struct clockframe *);
@@ -241,7 +243,7 @@
                        DELAY(1);
                        if (n-- > 0)
                                continue;
-                       printf("nmi_hard: SMP botch.");
+                       printf("nmi_hard: SMP botch.\n");
                        break;
                }
        }
@@ -364,6 +366,27 @@
        if (v != xcallintr)
                cpuinfo.ci_sintrcnt[13].ev_count++;
 
+       if (mutex_owned(&xpmsg_mutex) == 0) {
+               cpuinfo.ci_xpmsg_mutex_not_held.ev_count++;
+#ifdef DEBUG
+               printf("%s: mutex not held\n", __func__);
+#endif
+               cpuinfo.msg.complete = 1;
+               kpreempt_enable();
+               return;
+       }
+
+       if (cpuinfo.msg.complete != 0) {
+               cpuinfo.ci_xpmsg_bogus.ev_count++;
+#ifdef DEBUG
+               volatile struct xpmsg_func *p = &cpuinfo.msg.u.xpmsg_func;
+               printf("%s: bogus message %08x %08x %08x %08x\n", __func__,
+                   cpuinfo.msg.tag, (uint32_t)p->func, p->arg0, p->arg1);
+#endif
+               kpreempt_enable();
+               return;
+       }
+
        /* notyet - cpuinfo.msg.received = 1; */
        switch (cpuinfo.msg.tag) {
        case XPMSG_FUNC:



Home | Main Index | Thread Index | Old Index