Subject: Re: mutex fault
To: Manuel Bouyer <bouyer@antioche.eu.org>
From: Kazushi (Jam) Marukawa <jam@pobox.com>
List: tech-kern
Date: 12/19/2007 00:04:25
Hi Manuel,
> > Anyway, it's running, so I will leave it as is for a while
> > to see whether it crashes or not. If you have any idea to
> > try, please let me know. Thanks.
>
> I'm running a LOCKDEBUG kernel now as well, and I didn't see a crash
> either.
Is your kernel crashed? Mine didn't crash. It looks like
this LOCKDEBUG kernel won't crash, so I stopped this test.
I'm now thinking Xen3 dom0 kernel on multi cores machine is
having race condition issues. Therefore, I made following
patch to enable some pre-exist codes to avoid lock race
conditions. With this patch, my machine has not been
crashed yet for 2 days, looks like pretty working.
I know this is not the answer. I just patched a hole. This
doesn't fix the source of problem. Manuel, is it possible
to investigate race condition issues in Xen3 dom0 kernel?
Thanks.
----
Little bit of descriptions, why I started thinking Xen3 dom0
kernel on multi cores machine is having race condition issues:
I sent small patch for locore.S at Jun 27th. My machine
needed CLI and STI to avoid race conditions although those
instructions are removed with a decision like those were not
required anymore after kernel modifications. I don't know
why, but my machine just needed them to run correctly.
Others didn't have problem without CLI and STI, but only my
machine had the problem.
I once asked whether I need to enable MULTIPROCESSOR to use
netbsd dom0 kernel since my machine was been having dual
cores. The answer was no. I heard Xen3 was taking care of
race conditions.
Now, I'm doubting it since I had CLI/STI race condition
problem before, and I've been having this mutex race
condition problem. That's all.
---
Here is my patch. I just enabled codes for MULTIPROCESSOR
environment. I modified i386 and amd64 codes, but never
tested it on amd64. I also modified ic/XXX codes. I guess
ic/XXX stuff are not needed, but I'm not sure. So, I just
left them as what I currently modified.
Index: arch/amd64/amd64/lock_stubs.S
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/amd64/lock_stubs.S,v
retrieving revision 1.8
diff -u -r1.8 lock_stubs.S
--- arch/amd64/amd64/lock_stubs.S 22 Nov 2007 16:16:42 -0000 1.8
+++ arch/amd64/amd64/lock_stubs.S 18 Dec 2007 14:30:54 -0000
@@ -53,7 +53,7 @@
#include "assym.h"
-#if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+#if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG) || defined(XEN3)
#define FULL
#endif
Index: arch/i386/i386/lock_stubs.S
===================================================================
RCS file: /cvsroot/src/sys/arch/i386/i386/lock_stubs.S,v
retrieving revision 1.8
diff -u -r1.8 lock_stubs.S
--- arch/i386/i386/lock_stubs.S 11 Nov 2007 01:27:43 -0000 1.8
+++ arch/i386/i386/lock_stubs.S 18 Dec 2007 14:30:56 -0000
@@ -52,7 +52,7 @@
#include "assym.h"
-#if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+#if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG) || defined(XEN3)
#define FULL
#endif
Index: dev/ic/atppc.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/atppc.c,v
retrieving revision 1.24
diff -u -r1.24 atppc.c
--- dev/ic/atppc.c 19 Oct 2007 11:59:48 -0000 1.24
+++ dev/ic/atppc.c 18 Dec 2007 14:31:02 -0000
@@ -181,7 +181,7 @@
lsc->sc_dev.dv_xname));
}
-#if defined (MULTIPROCESSOR) || defined (LOCKDEBUG)
+#if defined (MULTIPROCESSOR) || defined (LOCKDEBUG) || defined(XEN3)
/* Initialize lock structure */
simple_lock_init(&(lsc->sc_lock));
#endif
Index: dev/ic/atppcvar.h
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/atppcvar.h,v
retrieving revision 1.9
diff -u -r1.9 atppcvar.h
--- dev/ic/atppcvar.h 19 Oct 2007 11:59:48 -0000 1.9
+++ dev/ic/atppcvar.h 18 Dec 2007 14:31:02 -0000
@@ -79,18 +79,18 @@
/* Locking for atppc device */
-#if defined(MULTIPROCESSOR) || defined (LOCKDEBUG)
+#if defined(MULTIPROCESSOR) || defined (LOCKDEBUG) || defined(XEN3)
#include <sys/lock.h>
#define ATPPC_SC_LOCK(sc) (&((sc)->sc_lock))
#define ATPPC_LOCK_INIT(sc) simple_lock_init(ATPPC_SC_LOCK((sc)))
#define ATPPC_LOCK(sc) simple_lock(ATPPC_SC_LOCK((sc)))
#define ATPPC_UNLOCK(sc) simple_unlock(ATPPC_SC_LOCK((sc)))
-#else /* !(MULTIPROCESSOR) && !(LOCKDEBUG) */
+#else /* !(MULTIPROCESSOR) && !(LOCKDEBUG) && !(XEN3) */
#define ATPPC_LOCK_INIT(sc)
#define ATPPC_LOCK(sc)
#define ATPPC_UNLOCK(sc)
#define ATPPC_SC_LOCK(sc) NULL
-#endif /* MULTIPROCESSOR || LOCKDEBUG */
+#endif /* MULTIPROCESSOR || LOCKDEBUG || XEN3 */
/* Single softintr callback entry */
struct atppc_handler_node {
@@ -104,7 +104,7 @@
/* Generic device attributes */
struct device sc_dev;
-#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) || defined(XEN3)
/* Simple lock */
struct simplelock sc_lock;
#endif
Index: dev/ic/sl811hs.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/sl811hs.c,v
retrieving revision 1.16
diff -u -r1.16 sl811hs.c
--- dev/ic/sl811hs.c 6 Nov 2007 21:51:07 -0000 1.16
+++ dev/ic/sl811hs.c 18 Dec 2007 14:31:03 -0000
@@ -300,7 +300,7 @@
uint8_t ptype; /* Pipe type */
};
-#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) || defined(XEN3)
#define SLHCI_WAITLOCK 1
#endif
Index: kern/kern_mutex.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_mutex.c,v
retrieving revision 1.23
diff -u -r1.23 kern_mutex.c
--- kern/kern_mutex.c 21 Nov 2007 10:19:10 -0000 1.23
+++ kern/kern_mutex.c 18 Dec 2007 14:31:04 -0000
@@ -69,7 +69,7 @@
* more than an splraiseipl() and splx() pair.
*/
-#if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+#if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG) || defined(XEN3)
#define FULL
#endif
@@ -456,7 +456,7 @@
* Handle spin mutexes.
*/
if (MUTEX_SPIN_P(mtx)) {
-#if defined(LOCKDEBUG) && defined(MULTIPROCESSOR)
+#if defined(LOCKDEBUG) && (defined(MULTIPROCESSOR) || defined(XEN3))
u_int spins = 0;
#endif
MUTEX_SPIN_SPLRAISE(mtx);
Index: sys/lwp.h
===================================================================
RCS file: /cvsroot/src/sys/sys/lwp.h,v
retrieving revision 1.69
diff -u -r1.69 lwp.h
--- sys/lwp.h 12 Nov 2007 23:12:00 -0000 1.69
+++ sys/lwp.h 18 Dec 2007 14:31:05 -0000
@@ -294,7 +294,7 @@
static inline void
lwp_lock(lwp_t *l)
{
-#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) || defined(XEN3)
kmutex_t *old;
mutex_spin_enter(old = l->l_mutex);
Index: sys/simplelock.h
===================================================================
RCS file: /cvsroot/src/sys/sys/simplelock.h,v
retrieving revision 1.5
diff -u -r1.5 simplelock.h
--- sys/simplelock.h 11 Oct 2007 19:49:04 -0000 1.5
+++ sys/simplelock.h 18 Dec 2007 14:31:06 -0000
@@ -113,7 +113,7 @@
#ifdef _KERNEL
-#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) || defined(XEN3)
#define simple_lock_init(alp) __cpu_simple_lock_init(&(alp)->lock_data)
#define simple_lock(alp) __cpu_simple_lock(&(alp)->lock_data)
#define simple_lock_held(alp) (__SIMPLELOCK_LOCKED_P(&(alp)->lock_data))
-- Kazushi
UFO's are for real: the Air Force doesn't exist.