Subject: kern/24668: LK_SPIN spinout consistency check triggered by LOCKDEBUG
To: None <gnats-bugs@gnats.netbsd.org>
From: None <he@netbsd.org>
List: netbsd-bugs
Date: 03/04/2004 14:51:34
>Number: 24668
>Category: kern
>Synopsis: LK_SPIN spinout consistency check triggered by LOCKDEBUG
>Confidential: no
>Severity: serious
>Priority: medium
>Responsible: kern-bug-people
>State: open
>Class: sw-bug
>Submitter-Id: net
>Arrival-Date: Thu Mar 04 13:52:01 UTC 2004
>Closed-Date:
>Last-Modified:
>Originator: Havard Eidnes
>Release: NetBSD 1.6ZK
>Organization:
>Environment:
System:
Architecture: i386
Machine: i386
>Description:
Doing a "build.sh -j 3" and some setiathomes in the
background, a 4-CPU machine tripped over a LOCKDEBUG
consistency check: "LK_SPIN spinout". Various trace
information follows below. It seems that some of the printf
output is mangled -- output from multiple CPUs?
LK_SPIN spinout, excl 1, share 0
LK_SPIN spinout, excl 1, share 0
LK_SPIN spinout, excl 1, share 0
held by CPU 3
held by CPU 3
held by CPU 3
last locked at /usr/src/sys/kern/kern_lock.c:1355
last locked at /usr/src/sys/kern/kern_lock.c:1355
last locked at /usr/src/sys/kern/kern_lock.c:1355
last unlocked at /usr/src/sys/kern/kern_lock.c:1364
last unlocked at /usr/src/sys/kern/kern_lock.c:1364
_lockmgr(_lockmgr(c07fa8a0c07fa8a0,last unlocked at /usr/src/sys/kern/kern_lock.c:1364
400042,,400002_lockmgr(0,c07fa8a00,,c06fb835c06fb835,,53854b,) at ) at netbsd:_lockmgr400002+0x53cnetbsd:_lockmgr
,+0x53c_kernel_lock(042,,c06fb835400006,,54b0_kernel_proc_lock() at ,cdc9092cc0754200,,831a00031e,netbsd:_lockmgr) at 0+0x53cnetbsd:_kernel_lock,
+0x3b2_kernel_proc_lock(
,cdc90c38pmap_load(,1fda61b7d0,) at 1fnetbsd:_kernel_proc_lock4854a000,,10001f+0x390,
bfbf001ftrap(,,) at 2100000,netbsd:trapda61bba8) at ) at netbsd:pmap_load+0x8ab+0xab
nStopped in pid 497.1 (setiathome) at netbsd:cpu_Debugger+0x4: leave
db{2}> machine cpu 0
using CPU 0
db{2}> trace
__cpu_simple_lock(c079921c,ffffffff,0,5,18) at netbsd:__cpu_simple_lock+0x1d
printf_nolog(c07045fd,da7cbcc4,c07012f5,da7cbd78,202d2d2d) at netbsd:printf_nolog+0x32
lock_printf(c07012f5,6,c08a806f,c036f2e9,0) at netbsd:lock_printf+0x4c
db_nextframe(da7cbdd4,da7cbdd8,da7cbddc,da7cbde0,da7cbfa4) at netbsd:db_nextframe+0x116
db_stack_trace_print(da7cbe8c,1,fffd,c06ea638,c036d2b8) at netbsd:db_stack_trace_print+0x199
acquire(c07fa8a0,da7cbecc,400000,0,600) at netbsd:acquire+0x181
_lockmgr(c07fa8a0,400002,0,c06fb835,54b) at netbsd:_lockmgr+0x53c
_kernel_proc_lock(cdc9092c,831a000,0,2,da61b7d0) at netbsd:_kernel_proc_lock+0x39
trap() at netbsd:trap+0x8ab
--- trap (number 6) ---
0x81a2767:
db{2}> show reg
ds 0x10
es 0x10
fs 0x30
gs 0x10
edi 0xda7cbde0
esi 0xc036d2b8 lock_printf
ebp 0xda7cbc8c
ebx 0
edx 0xc079921c kprintf_slock
ecx 0x96
eax 0x1
eip 0xc041f335 __cpu_simple_lock+0x1d
cs 0x8
eflags 0x246
esp 0xcc2affbc usb_all_tasks+0xba98784
ss 0xcc2a0010 usb_all_tasks+0xba887d8
netbsd:__cpu_simple_lock+0x1d: decl %eax
db{2}> machine cpu 1
using CPU 1
db{2}> trace
com_common_putc(800,0,3f8,6e,120010) at netbsd:com_common_putc+0xd5
comcnputc(800,6e,da640bcc,3f8,1) at netbsd:comcnputc+0x1d
cnputc(6e,c026de69,8,246,c035f517) at netbsd:cnputc+0x26
putchar(6e,1,0,ffffffe8,0) at netbsd:putchar+0xc5
kprintf(c07045fd,1,0,0,da640c7c) at netbsd:kprintf+0x359
printf_nolog(c07045fd,da640c84,c06fa8d1,da640d38,6274656e) at netbsd:printf_nolog+0x46
lock_printf(c06fa8d1,c06fa8a1,c08ac3c9,0,c06fa8a1) at netbsd:lock_printf+0x4c
db_printsym(c036f2e9,41,c036d2b8,10,73752f20) at netbsd:db_printsym+0x153
db_stack_trace_print(da640e8c,1,fffe,c06ea638,c036d2b8) at netbsd:db_stack_trace_print+0x15b
acquire(c07fa8a0,da640ecc,400000,0,600) at netbsd:acquire+0x181
_lockmgr(c07fa8a0,400002,0,c06fb835,54b) at netbsd:_lockmgr+0x53c
_kernel_proc_lock(cdc90c38,4854a000,0,2,da61bba8) at netbsd:_kernel_proc_lock+0x39
trap() at netbsd:trap+0x8ab
--- trap (number 6) ---
0x81bef08:
db{2}> show reg
ds 0x10
es 0x10
fs 0x30
gs 0x10
edi 0xd
esi 0
ebp 0xda640b4c
ebx 0x3f8
edx 0x3fd
ecx 0x3fd
eax 0
eip 0xc026de69 com_common_putc+0xd5
cs 0x8
eflags 0x246
esp 0xcc2cbfbc usb_all_tasks+0xbab4784
ss 0xcc2c0010 usb_all_tasks+0xbaa87d8
netbsd:com_common_putc+0xd5: testb $0x20,%al
db{2}> machine cpu 2
using CPU 2
db{2}> trace
cpu_Debugger(c07985e4,cdc83f0c,c037dfab,cdc83f30,c07fa8a0) at netbsd:cpu_Debugger+0x4
acquire(c07fa8a0,cdc83f30,400040,0,600) at netbsd:acquire+0x189
_lockmgr(c07fa8a0,400042,0,c06fb835,538) at netbsd:_lockmgr+0x53c
_kernel_lock(42,400006,0,c0754200,31e) at netbsd:_kernel_lock+0x3b
pmap_load(1f,1f,10001f,bfbf001f,100000) at netbsd:pmap_load+0xab
db{2}> show reg
ds 0x10
es 0x10
fs 0x30
gs 0x10
edi 0
esi 0
ebp 0xcdc83ed0 usb_all_tasks+0xd46c698
ebx 0xc07fa8a0 kernel_lock
edx 0xc06fa8a1 copyright+0x10e61
ecx 0xc08433c0 usb_all_tasks+0x2bb88
eax 0xfffd
eip 0xc0424c5c cpu_Debugger+0x4
cs 0x8
eflags 0x286
esp 0xcdc83ed0 usb_all_tasks+0xd46c698
ss 0x10
netbsd:cpu_Debugger+0x4: leave
db{2}> machine cpu 3
using CPU 3
db{2}> trace
buf_canrelease(400,0,c2a4e0ac,c2a4e0ac,0) at netbsd:buf_canrelease+0x3b
allocbuf(c2a4e0ac,2000,0,f8014cf8,4) at netbsd:allocbuf+0x1d1
getblk(cfedf9dc,fffffff4,ffffffff,2000,0) at netbsd:getblk+0x22c
ffs_balloc_ufs1(da6ccc94,4a69ce0,844a000,cfedf9dc,c05e92c0) at netbsd:ffs_balloc_ufs1+0xc3a
VOP_BALLOC(cfedf9dc,18000,0,2000,c2655700) at netbsd:VOP_BALLOC+0x46
ufs_gop_alloc(cfedf9dc,18000,0,2000,0) at netbsd:ufs_gop_alloc+0x93
ffs_write(da6cce24,40471815,da6cce5c,c03bc940,c05e8b40) at netbsd:ffs_write+0x730
VOP_WRITE(cfedf9dc,da6ccec4,25,c2655700,cfedf9dc) at netbsd:VOP_WRITE+0x34
vn_write(da5f5a1c,da5f5a50,da6ccec4,c2655700,1) at netbsd:vn_write+0xc0
dofilewrite(da61bd94,1,da5f5a1c,83c9000,2000) at netbsd:dofilewrite+0x85
sys_write(cdc90278,da6ccf64,da6ccf5c,4,0) at netbsd:sys_write+0x6f
syscall_plain(da6ccfa8,1f,1f,1f,1f) at netbsd:syscall_plain+0x17e
db{2}> show reg
ds 0x10
es 0x10
fs 0x30
gs 0x10
edi 0x2000
esi 0
ebp 0xda6ccaac
ebx 0x163c800
edx 0x1330800
ecx 0x923ac00
eax 0xc31c27ec usb_all_tasks+0x29aafb4
eip 0xc03aa9eb buf_canrelease+0x3b
cs 0x8
eflags 0x206
esp 0xcc293fbc usb_all_tasks+0xba7c784
ss 0xcc290010 usb_all_tasks+0xba787d8
netbsd:buf_canrelease+0x3b: movl 0x98(%eax),%eax
db{2}>
>How-To-Repeat:
The recipe seems to be "put some load on a 4-cpu machine
running a LOCKDEBUG kernel; watch it crash and burn".
>Fix:
Sorry, don't know. Hints for further debugging greatly
appreciated.
>Release-Note:
>Audit-Trail:
>Unformatted: