Subject: Re: lock debugging
To: None <tech-kern@netbsd.org>
From: Pavel Cahyna <pavel@netbsd.org>
List: tech-kern
Date: 11/17/2006 19:04:27
--YZ5djTAD1cGYuMQK
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
On Fri, Nov 17, 2006 at 05:49:54AM -0500, Jed Davis wrote:
> For diagnosing problems with locking (e.g., lots of processes getting
> stuck on vnlock), it would be nice to be able to dump, for each lock
> on the system that's being slept on, which lwp holds it and which are
> sleeping on it. Finding the thread at the root (or, for vnlock, leaf)
> of the trouble should then be easier.
I've took a bit different approach. If lockmgr() wants to sleep to wait
for a lock, we first note the LWP which currently holds the lock. We look
if that LWP is waiting for another lock. If it does, we repeat until we
hit a LWP which does not wait for a lock, or the original LWP. In the
second case, this is a deadlock and we panic.
I also added a function to traverse the chain of LWPs and print the last
one (the one which does not wait for a lock, which you've called the root
of the trouble). Could be useful for cases where the problem is not caused
by a deadlock. The function is lklast(struct lwp *), it can be called from
ddb.
> Given a struck lock*, the identity of the holder is right there; but
> getting from a sleeping lwp to the lock it's sleeping on seems harder.
> A quick glance shows that the sleep address will point to the struct
> lock (except in the LK_DRAIN case when it's the lk_flags field, it
> looks like), but it might also point to something else entirely.
I've added a new flag to struct lwp, L_LOCK, which is set if the LWP is
waiting for a lock and it is guaranteed that l_wchan point to a
struct lock.
I've also changed the reference to lock holder in struct lock from
{ pid_t, lwpid_t } pair to struct lwp *, because with a struct lwp *,
getting the PID and LWPID is easy, but the inverse is more difficult.
lockdebug.diff contains those changes, deadlock.diff contains two syscalls
which take two locks in different order to provoke deadlock. uncomment the
lockmgr() calls from the second one for deadlock, otherwise it produces a
process stuck on a lock held by another process - good for trying the
lklast() function. deadlock.c is a test program to trigger it from
userland.
Pavel
--YZ5djTAD1cGYuMQK
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="lockdebug.diff"
Index: kern/kern_lock.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_lock.c,v
retrieving revision 1.99
diff -u -p -r1.99 kern_lock.c
--- kern/kern_lock.c 7 Sep 2006 02:06:47 -0000 1.99
+++ kern/kern_lock.c 17 Nov 2006 17:21:58 -0000
@@ -83,6 +83,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_lock.c,
#include "opt_ddb.h"
#include <sys/param.h>
+#include <sys/lwp.h>
#include <sys/proc.h>
#include <sys/lock.h>
#include <sys/systm.h>
@@ -209,6 +210,56 @@ do { \
#define RETURN_ADDRESS ((uintptr_t)__builtin_return_address(0))
+static int lkfollow(volatile const struct lock *, struct lwp *, int);
+void lklast(struct lwp *);
+
+void lklast(struct lwp *l)
+{
+ if (l->l_stat == LSSLEEP && (l->l_flag & L_LOCK) &&
+ l->l_wchan ) {
+ if (lkfollow((volatile const struct lock *)l->l_wchan, l, 1))
+ printf("deadlock detected\n");
+ }
+}
+
+/* for lkp, find the LWP which holds it. If this LWP is waiting for a
+ * lock, find that lock and repeat until we reach the LWP "begin", or we
+ * reach a LWP which is not waiting for a lock. if report_end, print
+ * the last LWP if it is not waiting for a lock. otherwise, be silent
+ * in that case and print the whole chain of locks if we reached
+ * "begin". */
+
+static int lkfollow(volatile const struct lock *lkp, struct lwp* begin, int report_end)
+{
+ struct lwp *l;
+ if (lkp->lk_timo == 0 && !(lkp->lk_flags & LK_SPIN)) {
+ l = lkp->lk_lwp_p;
+ if (l == NULL)
+ return 0;
+ if (l == begin) {
+ printf("lock %p:", lkp);
+ lockmgr_printinfo(lkp);
+ printf("\n");
+ return 1;
+ }
+ if (l->l_stat == LSSLEEP && (l->l_flag & L_LOCK) &&
+ l->l_wchan ) {
+ if (lkfollow((volatile const struct lock *)l->l_wchan, begin, report_end)) {
+ printf("lock %p:", lkp);
+ lockmgr_printinfo(lkp);
+ printf("\n");
+ return 1;
+ }
+ } else if (report_end) {
+ printf("last lock %p held by:\n", lkp);
+ printf("LWP %d.%d=%p\n", l->l_proc->p_pid, l->l_lid, l);
+ }
+ }
+
+ return 0;
+}
+
+
/*
* Acquire a resource.
*/
@@ -261,12 +312,21 @@ acquire(volatile struct lock **lkpp, int
lkp->lk_waitcount++;
lkp->lk_flags |= LK_WAIT_NONZERO;
}
+ if (lkfollow(lkp, curlwp, 0))
+ panic("acquire: deadlock detected");
+
/* XXX Cast away volatile. */
LOCKSTAT_START_TIMER(slptime);
+ if (!drain) {
+ curlwp->l_flag |= L_LOCK;
+ }
error = ltsleep(drain ?
(volatile const void *)&lkp->lk_flags :
(volatile const void *)lkp, lkp->lk_prio,
lkp->lk_wmesg, lkp->lk_timo, &lkp->lk_interlock);
+ if (!drain) {
+ curlwp->l_flag &= ~L_LOCK;
+ }
LOCKSTAT_STOP_TIMER(slptime);
LOCKSTAT_EVENT_RA((void *)(uintptr_t)lkp,
LB_LOCKMGR | LB_SLEEP, 1, slptime, ra);
@@ -294,20 +354,20 @@ acquire(volatile struct lock **lkpp, int
return error;
}
-#define SETHOLDER(lkp, pid, lid, cpu_id) \
-do { \
- if ((lkp)->lk_flags & LK_SPIN) \
- (lkp)->lk_cpu = cpu_id; \
- else { \
- (lkp)->lk_lockholder = pid; \
- (lkp)->lk_locklwp = lid; \
- } \
+#define SETHOLDER(lkp, lwp, cpu_id) \
+do { \
+ if ((lkp)->lk_flags & LK_SPIN) \
+ (lkp)->lk_cpu = cpu_id; \
+ else { \
+ (lkp)->lk_lwp_p = lwp; \
+ } \
} while (/*CONSTCOND*/0)
#define WEHOLDIT(lkp, pid, lid, cpu_id) \
(((lkp)->lk_flags & LK_SPIN) != 0 ? \
((lkp)->lk_cpu == (cpu_id)) : \
- ((lkp)->lk_lockholder == (pid) && (lkp)->lk_locklwp == (lid)))
+ ((lkp)->lk_lwp_p == NULL ? (pid) == LK_NOPROC && (lid) == 0 : \
+ ((lkp)->lk_lockholder == (pid) && (lkp)->lk_locklwp == (lid))))
#define WAKEUP_WAITER(lkp) \
do { \
@@ -417,7 +477,7 @@ lockinit(struct lock *lkp, int prio, con
if (flags & LK_SPIN)
lkp->lk_cpu = LK_NOCPU;
else {
- lkp->lk_lockholder = LK_NOPROC;
+ lkp->lk_lwp_p = NULL;
lkp->lk_newlock = NULL;
lkp->lk_prio = prio;
lkp->lk_timo = timo;
@@ -665,7 +725,7 @@ lockmgr(volatile struct lock *lkp, u_int
lkp->lk_exclusivecount = 0;
lkp->lk_recurselevel = 0;
lkp->lk_flags &= ~LK_HAVE_EXCL;
- SETHOLDER(lkp, LK_NOPROC, 0, LK_NOCPU);
+ SETHOLDER(lkp, NULL, LK_NOCPU);
#if defined(LOCKDEBUG)
lkp->lk_unlock_file = file;
lkp->lk_unlock_line = line;
@@ -729,7 +789,7 @@ lockmgr(volatile struct lock *lkp, u_int
break;
}
lkp->lk_flags |= LK_HAVE_EXCL;
- SETHOLDER(lkp, pid, lid, cpu_num);
+ SETHOLDER(lkp, l, cpu_num);
#if defined(LOCKDEBUG)
lkp->lk_lock_file = file;
lkp->lk_lock_line = line;
@@ -801,7 +861,7 @@ lockmgr(volatile struct lock *lkp, u_int
break;
}
lkp->lk_flags |= LK_HAVE_EXCL;
- SETHOLDER(lkp, pid, lid, cpu_num);
+ SETHOLDER(lkp, l, cpu_num);
#if defined(LOCKDEBUG)
lkp->lk_lock_file = file;
lkp->lk_lock_line = line;
@@ -835,7 +895,7 @@ lockmgr(volatile struct lock *lkp, u_int
COUNT(lkp, l, cpu_num, -1);
if (lkp->lk_exclusivecount == 0) {
lkp->lk_flags &= ~LK_HAVE_EXCL;
- SETHOLDER(lkp, LK_NOPROC, 0, LK_NOCPU);
+ SETHOLDER(lkp, NULL, LK_NOCPU);
#if defined(LOCKDEBUG)
lkp->lk_unlock_file = file;
lkp->lk_unlock_line = line;
@@ -880,7 +940,7 @@ lockmgr(volatile struct lock *lkp, u_int
if (error)
break;
lkp->lk_flags |= LK_DRAINING | LK_HAVE_EXCL;
- SETHOLDER(lkp, pid, lid, cpu_num);
+ SETHOLDER(lkp, l, cpu_num);
#if defined(LOCKDEBUG)
lkp->lk_lock_file = file;
lkp->lk_lock_line = line;
@@ -952,7 +1012,7 @@ spinlock_release_all(volatile struct loc
lkp->lk_exclusivecount = 0;
COUNT_CPU(cpu_num, -count);
lkp->lk_flags &= ~LK_HAVE_EXCL;
- SETHOLDER(lkp, LK_NOPROC, 0, LK_NOCPU);
+ SETHOLDER(lkp, NULL, LK_NOCPU);
#if defined(LOCKDEBUG)
lkp->lk_unlock_file = file;
lkp->lk_unlock_line = line;
@@ -1011,7 +1071,7 @@ spinlock_acquire_count(volatile struct l
RETURN_ADDRESS);
lkp->lk_flags &= ~LK_WANT_EXCL;
lkp->lk_flags |= LK_HAVE_EXCL;
- SETHOLDER(lkp, LK_NOPROC, 0, cpu_num);
+ SETHOLDER(lkp, NULL, cpu_num);
#if defined(LOCKDEBUG)
lkp->lk_lock_file = file;
lkp->lk_lock_line = line;
@@ -1033,7 +1093,7 @@ spinlock_acquire_count(volatile struct l
* routines to display ststus about contained locks.
*/
void
-lockmgr_printinfo(volatile struct lock *lkp)
+lockmgr_printinfo(volatile const struct lock *lkp)
{
if (lkp->lk_sharecount)
Index: sys/lock.h
===================================================================
RCS file: /cvsroot/src/sys/sys/lock.h,v
retrieving revision 1.66
diff -u -p -r1.66 lock.h
--- sys/lock.h 7 Sep 2006 01:59:23 -0000 1.66
+++ sys/lock.h 17 Nov 2006 17:21:58 -0000
@@ -132,8 +132,7 @@ struct lock {
union {
struct {
/* pid of exclusive lock holder */
- pid_t lk_sleep_lockholder;
- lwpid_t lk_sleep_locklwp;
+ struct lwp * lk_sleep_lwp;
/* priority at which to sleep */
int lk_sleep_prio;
@@ -153,8 +152,9 @@ struct lock {
} lk_un_spin;
} lk_un;
-#define lk_lockholder lk_un.lk_un_sleep.lk_sleep_lockholder
-#define lk_locklwp lk_un.lk_un_sleep.lk_sleep_locklwp
+#define lk_lockholder lk_un.lk_un_sleep.lk_sleep_lwp->l_proc->p_pid
+#define lk_locklwp lk_un.lk_un_sleep.lk_sleep_lwp->l_lid
+#define lk_lwp_p lk_un.lk_un_sleep.lk_sleep_lwp
#define lk_prio lk_un.lk_un_sleep.lk_sleep_prio
#define lk_timo lk_un.lk_un_sleep.lk_sleep_timo
#define lk_newlock lk_un.lk_un_sleep.lk_newlock
@@ -181,7 +181,7 @@ struct lock {
0, /* recurselevel */ \
0, /* waitcount */ \
(wmesg), /* waitmesg */ \
- { .lk_un_sleep = { 0, 0, (prio), (timo), NULL } }, \
+ { .lk_un_sleep = { NULL, (prio), (timo), NULL } }, \
}
#else
#define LOCK_INITIALIZER(prio, wmesg, timo, flags) \
@@ -192,7 +192,7 @@ struct lock {
0, /* recurselevel */ \
0, /* waitcount */ \
(wmesg), /* waitmesg */ \
- { .lk_un_sleep = { 0, 0, (prio), (timo), NULL } }, \
+ { .lk_un_sleep = { NULL, (prio), (timo), NULL } }, \
NULL, /* lk_lock_file */ \
NULL, /* lk_unlock_file */ \
0, /* lk_lock_line */ \
@@ -322,7 +322,7 @@ int lockmgr(volatile struct lock *, u_in
#endif /* LOCKDEBUG */
void transferlockers(struct lock *, struct lock *);
int lockstatus(struct lock *);
-void lockmgr_printinfo(volatile struct lock *);
+void lockmgr_printinfo(volatile const struct lock *);
#if defined(LOCKDEBUG)
void spinlock_switchcheck(void);
Index: sys/lwp.h
===================================================================
RCS file: /cvsroot/src/sys/sys/lwp.h,v
retrieving revision 1.41
diff -u -p -r1.41 lwp.h
--- sys/lwp.h 30 Jul 2006 21:58:11 -0000 1.41
+++ sys/lwp.h 17 Nov 2006 17:21:58 -0000
@@ -118,6 +118,8 @@ extern struct lwp lwp0; /* LWP for pro
#define L_SELECT 0x00000040 /* [*] Selecting; wakeup/waiting danger. */
#define L_SINTR 0x00000080 /* [*] Sleep is interruptible. */
#define L_SA 0x00000400 /* [*] Scheduler activations LWP */
+#define L_LOCK 0x00000800 /* Waiting for a lock. l_wchan
+ * points to a valid struct lock. */
#define L_SA_UPCALL 0x00200000 /* SA upcall is pending */
#define L_SA_BLOCKING 0x00400000 /* Blocking in tsleep() */
#define L_DETACHED 0x00800000 /* Won't be waited for. */
--YZ5djTAD1cGYuMQK
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="deadlock.diff"
Index: sys/syscall.h
===================================================================
RCS file: /cvsroot/src/sys/sys/syscall.h,v
retrieving revision 1.174
diff -u -p -r1.174 syscall.h
--- sys/syscall.h 1 Sep 2006 21:04:45 -0000 1.174
+++ sys/syscall.h 17 Nov 2006 17:21:58 -0000
@@ -1,4 +1,4 @@
-/* $NetBSD: syscall.h,v 1.174 2006/09/01 21:04:45 matt Exp $ */
+/* $NetBSD$ */
/*
* System call numbers.
@@ -1100,6 +1100,12 @@
/* syscall: "__fhstat40" ret: "int" args: "const void *" "size_t" "struct stat *" */
#define SYS___fhstat40 398
-#define SYS_MAXSYSCALL 399
+/* syscall: "lock_1" ret: "void" args: */
+#define SYS_lock_1 399
+
+/* syscall: "lock_2" ret: "void" args: */
+#define SYS_lock_2 400
+
+#define SYS_MAXSYSCALL 401
#define SYS_NSYSENT 512
#endif /* _SYS_SYSCALL_H_ */
Index: sys/syscallargs.h
===================================================================
RCS file: /cvsroot/src/sys/sys/syscallargs.h,v
retrieving revision 1.156
diff -u -p -r1.156 syscallargs.h
--- sys/syscallargs.h 1 Sep 2006 21:04:45 -0000 1.156
+++ sys/syscallargs.h 17 Nov 2006 17:21:58 -0000
@@ -1,4 +1,4 @@
-/* $NetBSD: syscallargs.h,v 1.156 2006/09/01 21:04:45 matt Exp $ */
+/* $NetBSD$ */
/*
* System call argument lists.
@@ -2445,4 +2445,8 @@ int sys___fhstatvfs140(struct lwp *, voi
int sys___fhstat40(struct lwp *, void *, register_t *);
+int sys_lock_1(struct lwp *, void *, register_t *);
+
+int sys_lock_2(struct lwp *, void *, register_t *);
+
#endif /* _SYS_SYSCALLARGS_H_ */
Index: kern/init_sysent.c
===================================================================
RCS file: /cvsroot/src/sys/kern/init_sysent.c,v
retrieving revision 1.181
diff -u -p -r1.181 init_sysent.c
--- kern/init_sysent.c 1 Sep 2006 21:04:45 -0000 1.181
+++ kern/init_sysent.c 17 Nov 2006 17:21:57 -0000
@@ -1,4 +1,4 @@
-/* $NetBSD: init_sysent.c,v 1.181 2006/09/01 21:04:45 matt Exp $ */
+/* $NetBSD$ */
/*
* System call switch table.
@@ -8,7 +8,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: init_sysent.c,v 1.181 2006/09/01 21:04:45 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD$");
#include "opt_ktrace.h"
#include "opt_nfsserver.h"
@@ -1049,9 +1049,9 @@ struct sysent sysent[] = {
{ 3, s(struct sys___fhstat40_args), 0,
sys___fhstat40 }, /* 398 = __fhstat40 */
{ 0, 0, 0,
- sys_nosys }, /* 399 = filler */
+ sys_lock_1 }, /* 399 = lock_1 */
{ 0, 0, 0,
- sys_nosys }, /* 400 = filler */
+ sys_lock_2 }, /* 400 = lock_2 */
{ 0, 0, 0,
sys_nosys }, /* 401 = filler */
{ 0, 0, 0,
Index: kern/sys_deadlock.c
===================================================================
RCS file: kern/sys_deadlock.c
diff -N kern/sys_deadlock.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ kern/sys_deadlock.c 17 Nov 2006 17:21:58 -0000
@@ -0,0 +1,37 @@
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/syscallargs.h>
+#include <sys/lock.h>
+
+
+static struct lock lock1 = LOCK_INITIALIZER(PRIBIO, "lock1", 0, 0);
+
+static struct lock lock2 = LOCK_INITIALIZER(PRIBIO, "lock2", 0, 0);
+
+int
+sys_lock_1(struct lwp *l, void *v, register_t *retval)
+{
+ lockmgr(&lock1, LK_EXCLUSIVE, NULL);
+
+ while(1) {
+ lockmgr(&lock2, LK_EXCLUSIVE, NULL);
+ yield();
+ lockmgr(&lock2, LK_RELEASE, NULL);
+ yield();
+ }
+ return 0;
+}
+
+int
+sys_lock_2(struct lwp *l, void *v, register_t *retval)
+{
+ lockmgr(&lock2, LK_EXCLUSIVE, NULL);
+
+ while(1) {
+ // lockmgr(&lock1, LK_EXCLUSIVE, NULL);
+ yield();
+ // lockmgr(&lock1, LK_RELEASE, NULL);
+ yield();
+ }
+ return 0;
+}
Index: kern/syscalls.c
===================================================================
RCS file: /cvsroot/src/sys/kern/syscalls.c,v
retrieving revision 1.177
diff -u -p -r1.177 syscalls.c
--- kern/syscalls.c 1 Sep 2006 22:23:18 -0000 1.177
+++ kern/syscalls.c 17 Nov 2006 17:21:58 -0000
@@ -1,4 +1,4 @@
-/* $NetBSD: syscalls.c,v 1.177 2006/09/01 22:23:18 matt Exp $ */
+/* $NetBSD$ */
/*
* System call names.
@@ -8,7 +8,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: syscalls.c,v 1.177 2006/09/01 22:23:18 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD$");
#if defined(_KERNEL_OPT)
#include "opt_ktrace.h"
@@ -534,4 +534,6 @@ const char *const syscallnames[] = {
"__fhopen40", /* 396 = __fhopen40 */
"__fhstatvfs140", /* 397 = __fhstatvfs140 */
"__fhstat40", /* 398 = __fhstat40 */
+ "lock_1", /* 399 = lock_1 */
+ "lock_2", /* 400 = lock_2 */
};
Index: kern/syscalls.master
===================================================================
RCS file: /cvsroot/src/sys/kern/syscalls.master,v
retrieving revision 1.159
diff -u -p -r1.159 syscalls.master
--- kern/syscalls.master 1 Sep 2006 20:58:18 -0000 1.159
+++ kern/syscalls.master 17 Nov 2006 17:21:58 -0000
@@ -797,3 +797,5 @@
size_t fh_size, struct statvfs *buf, int flags); }
398 STD { int sys___fhstat40(const void *fhp, \
size_t fh_size, struct stat *sb); }
+399 STD { void sys_lock_1(void); }
+400 STD { void sys_lock_2(void); }
--YZ5djTAD1cGYuMQK
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="deadlock.c"
#include <sys/syscall.h>
#include <unistd.h>
int main(void)
{
if (fork()) {
syscall(399);
} else {
syscall(400);
}
return 1;
}
--YZ5djTAD1cGYuMQK--