Subject: port-sparc/19831: 1.6_STABLE locks up under load
To: None <gnats-bugs@gnats.netbsd.org>
From: Hauke Fath <hauke@Espresso.Rhein-Neckar.DE>
List: netbsd-bugs
Date: 01/13/2003 08:06:46
>Number:         19831
>Category:       port-sparc
>Synopsis:       1.6_STABLE locks up under load
>Confidential:   no
>Severity:       critical
>Priority:       medium
>Responsible:    port-sparc-maintainer
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Sun Jan 12 23:12:01 PST 2003
>Closed-Date:
>Last-Modified:
>Originator:     Hauke Fath <hauke@Espresso.Rhein-Neckar.DE>
>Release:        NetBSD 1.6_STABLE
>Organization:
Einzeln auftretender Radfahrer
>Environment:
System: NetBSD pizza.causeuse.org 1.6_STABLE NetBSD 1.6_STABLE (PIZZA) #6: Tue Jan 7 22:51:54 CET 2003 hauke@pizza.causeuse.org:/usr/src/sys/arch/sparc/compile/PIZZA sparc
Architecture: sparc
Machine: sparc
>Description:

	Under heavy load - typically, a nightly Amanda server run with
	the /etc/daily cron jobs kicking in - my ss10 running 1.6 with
	a STABLE kernel from early Jan 2003 sources is likely to lock
	up.

	I have reported this before in 
	http://mail-index.netbsd.org/port-sparc/2002/10/20/0000.html
	and have just experienced the very same lockup again. /tmp was
	mounted as an mfs, and most mounts were softdep. Note that the
	lockups appeared to happen with softdep mounts only, and the
	kernel has the mfs and softdep fixes pulled up to the
	1.6 branch recently.

	In response to
	http://mail-index.netbsd.org/port-sparc/2002/10/27/0001.html
	a breakpoint at mi_switch() is not triggered, and if I let the
	kernel run and break to ddb repeatedly, it always hangs at the
	same spot.

>How-To-Repeat:

	Run 1.6 with a kernel built from the tip of the 1.6 branch on
	a ss10. Set up the machine with softdep mounts, /tmp mounted
	on mfs; run Amanda server and the set of nightly housekeeping
	cron jobs.

ddb Stack traces (also at
http://www.causeuse.org/hauke/NetBSD/16_STABLE_2003-01-13_crashlog
with unwrapped lines):

Stopped in pid 1367 (dumper) at cpu_Debugger+0x4:       jmpl            [%o7 + 0x8], %g0
db{0}> t
zstty_stint(0xf08bbc68, 0x0, 0xf01bf7d4, 0xf024c930, 0xf0285800, 0x2ebd79) at zstty_stint+0x88
zsc_intr_hard(0x8, 0xf08b4eb0, 0xf0245800, 0x809c4000, 0xa00, 0xf0ebb000) at zsc_intr_hard+0x68
zshard(0x0, 0xf01b5478, 0x0, 0xf0335000, 0xf029aba8, 0xf029abac) at zshard+0x40
sparc_interrupt44c(0xff000000, 0xf029d588, 0x86cd, 0xf0335010, 0x8e2c, 0xf02412e0) at sparc_interrupt44c+0x128
uvm_km_pgremove_intrsafe(0xf0b3b000, 0x8000, 0xf0b43000, 0xf02808e8, 0xf029a800, 0xf0256400) at uvm_km_pgremove_intrsafe+0xf8
uvm_unmap_remove(0xf02563d8, 0x8000, 0xf0b43000, 0xf02413c4, 0x100, 0xf024c6cc) at uvm_unmap_remove+0x164
uvm_unmap(0xf02563d8, 0xf0b3b000, 0xf0b43000, 0xf6c1e210, 0xf00b1c00, 0xf04f4098) at uvm_unmap+0x100
uvm_km_free(0xf02563d8, 0xf0b3b000, 0x8000, 0xf0248800, 0xf08b1a20, 0x8000) at uvm_km_free+0x14
free(0xf0b3b000, 0x68, 0xf, 0xf0285c00, 0xf092f630, 0xec5b5c00) at free+0x88
softdep_disk_write_complete(0xf21068c0, 0xf0072adc, 0xf21068c0, 0x500, 0xfc070fff, 0xf0ebb000) at softdep_disk_write_complete+0x2c0
biodone(0xf21068c0, 0x500, 0x0, 0xf01cc3ec, 0xf0b09f58, 0xfc071000) at biodone+0x74
scsipi_complete(0x0, 0x22009, 0x200000, 0x0, 0x0, 0x0) at scsipi_complete+0x46c
scsipi_done(0xf0a108c8, 0x30553, 0xf01baf00, 0xf024c6cc, 0x0, 0x0) at scsipi_done+0x154
ncr53c9x_done(0xf0931e00, 0xf0933118, 0x14, 0xf01baf14, 0x100, 0xf024c6cc) at ncr53c9x_done+0x1c8
ncr53c9x_intr(0x3, 0xf001039c, 0xf00b18c0, 0xf6c1e210, 0xf00b1c00, 0xf04f4098) at ncr53c9x_intr+0x12a8
sparc_interrupt44c(0xf0eb3000, 0xf02ececc, 0xffffffff, 0x0, 0xf02ece00, 0x3a) at sparc_interrupt44c+0x128
pmap_kenter_pa4m(0xf0eb3000, 0xffff0eb3, 0xf028865c, 0xf6c1e210, 0xf00b1c00, 0x0) at pmap_kenter_pa4m+0x8c
uvm_pagermapout(0xf0eb3000, 0x8000, 0x0, 0xf0288400, 0x8000, 0xf0ebb000) at uvm_pagermapout+0xc
genfs_getpages(0x0, 0x0, 0x0, 0x1668000, 0xf0b09f58, 0xf6c4a618) at genfs_getpages+0xe8c
ffs_getpages(0xf6cce920, 0xbc, 0xf08b3400, 0xf0079bcc, 0x0, 0xf6cce918) at ffs_getpages+0x84
ubc_fault(0xf6ccea68, 0xf0211800, 0x2, 0x1, 0x0, 0x0) at ubc_fault+0x1e4
uvm_fault(0x0, 0xf622b000, 0x0, 0x1, 0x7, 0xf622a000) at uvm_fault+0x710
mem_access_fault4m(0x9, 0x3a6, 0xf622a000, 0xf6cceb98, 0x0, 0x7fff) at mem_access_fault4m+0x22c
memfault_sun4m(0x39ea8, 0xf622a000, 0x1ff8, 0xf6ccd000, 0x45b55479, 0x4a3adc9a) at memfault_sun4m+0xe4
Ldocopy(0xf0002000, 0x2000, 0xf6ccee28, 0xf6ccec10, 0x2, 0x0) at Ldocopy+0x10
ffs_write(0x0, 0x1668000, 0x0, 0x1668000, 0xf09e8000, 0x0) at ffs_write+0x5fc
vn_write(0xf6d131d8, 0xf6d13200, 0xf6ccee28, 0xf0b17600, 0x1, 0xf00edef8) at vn_write+0x110
dofilewrite(0xf6c1e210, 0x4, 0xf6d131d8, 0x8000, 0x8000, 0xf6d13200) at dofilewrite+0x8c
sys_write(0xf6c1e210, 0xf6ccef28, 0xf6ccef20, 0xf00c6bac, 0xf0a, 0xf0002000) at sys_write+0x58
syscall(0x4, 0xf6ccefb0, 0x0, 0x5e3, 0x4000, 0xf6d13510) at syscall+0x1b8
_syscall(0x4, 0x39ea8, 0x8000, 0x0, 0x20, 0x39c00) at _syscall+0xb8
db{0}> break mi_switch
db{0}> continue
Stopped in pid 1367 (dumper) at cpu_Debugger+0x4:       jmpl            [%o7 + 0x8], %g0
db{0}> t
zstty_stint(0xf08bbc68, 0x0, 0xf01bf7d4, 0xf024c930, 0xf0285800, 0x2fc59d) at zstty_stint+0x88
zsc_intr_hard(0x8, 0xf08b4eb0, 0xf0245800, 0x161, 0xffff, 0xf0ebb000) at zsc_intr_hard+0x68
zshard(0x0, 0xf01b5478, 0x0, 0xf0335000, 0xf029aba8, 0xf029abac) at zshard+0x40
sparc_interrupt44c(0xff000000, 0xf029d588, 0x86cd, 0xf0335010, 0x8e2c, 0xf02412e0) at sparc_interrupt44c+0x128
uvm_km_pgremove_intrsafe(0xf0b3b000, 0x8000, 0xf0b43000, 0xf02808e8, 0xf029a800, 0xf0256400) at uvm_km_pgremove_intrsafe+0xf8
uvm_unmap_remove(0xf02563d8, 0x8000, 0xf0b43000, 0xf02413c4, 0x100, 0xf024c6cc) at uvm_unmap_remove+0x164
uvm_unmap(0xf02563d8, 0xf0b3b000, 0xf0b43000, 0xf6c1e210, 0xf00b1c00, 0xf04f4098) at uvm_unmap+0x100
uvm_km_free(0xf02563d8, 0xf0b3b000, 0x8000, 0xf0248800, 0xf08b1a20, 0x8000) at uvm_km_free+0x14
free(0xf0b3b000, 0x68, 0xf, 0xf0285c00, 0xf092f630, 0xec5b5c00) at free+0x88
softdep_disk_write_complete(0xf21068c0, 0xf0072adc, 0xf21068c0, 0x500, 0xfc070fff, 0xf0ebb000) at softdep_disk_write_complete+0x2c0
biodone(0xf21068c0, 0x500, 0x0, 0xf01cc3ec, 0xf0b09f58, 0xfc071000) at biodone+0x74
scsipi_complete(0x0, 0x22009, 0x200000, 0x0, 0x0, 0x0) at scsipi_complete+0x46c
scsipi_done(0xf0a108c8, 0x30553, 0xf01baf00, 0xf024c6cc, 0x0, 0x0) at scsipi_done+0x154
ncr53c9x_done(0xf0931e00, 0xf0933118, 0x14, 0xf01baf14, 0x100, 0xf024c6cc) at ncr53c9x_done+0x1c8
ncr53c9x_intr(0x3, 0xf001039c, 0xf00b18c0, 0xf6c1e210, 0xf00b1c00, 0xf04f4098) at ncr53c9x_intr+0x12a8
sparc_interrupt44c(0xf0eb3000, 0xf02ececc, 0xffffffff, 0x0, 0xf02ece00, 0x3a) at sparc_interrupt44c+0x128
pmap_kenter_pa4m(0xf0eb3000, 0xffff0eb3, 0xf028865c, 0xf6c1e210, 0xf00b1c00, 0x0) at pmap_kenter_pa4m+0x8c
uvm_pagermapout(0xf0eb3000, 0x8000, 0x0, 0xf0288400, 0x8000, 0xf0ebb000) at uvm_pagermapout+0xc
genfs_getpages(0x0, 0x0, 0x0, 0x1668000, 0xf0b09f58, 0xf6c4a618) at genfs_getpages+0xe8c
ffs_getpages(0xf6cce920, 0xbc, 0xf08b3400, 0xf0079bcc, 0x0, 0xf6cce918) at ffs_getpages+0x84
ubc_fault(0xf6ccea68, 0xf0211800, 0x2, 0x1, 0x0, 0x0) at ubc_fault+0x1e4
uvm_fault(0x0, 0xf622b000, 0x0, 0x1, 0x7, 0xf622a000) at uvm_fault+0x710
mem_access_fault4m(0x9, 0x3a6, 0xf622a000, 0xf6cceb98, 0x0, 0x7fff) at mem_access_fault4m+0x22c
memfault_sun4m(0x39ea8, 0xf622a000, 0x1ff8, 0xf6ccd000, 0x45b55479, 0x4a3adc9a) at memfault_sun4m+0xe4
Ldocopy(0xf0002000, 0x2000, 0xf6ccee28, 0xf6ccec10, 0x2, 0x0) at Ldocopy+0x10ffs_write(0x0, 0x1668000, 0x0, 0x1668000, 0xf09e8000, 0x0) at ffs_write+0x5fc
vn_write(0xf6d131d8, 0xf6d13200, 0xf6ccee28, 0xf0b17600, 0x1, 0xf00edef8) at vn_write+0x110
dofilewrite(0xf6c1e210, 0x4, 0xf6d131d8, 0x8000, 0x8000, 0xf6d13200) at dofilewrite+0x8c
sys_write(0xf6c1e210, 0xf6ccef28, 0xf6ccef20, 0xf00c6bac, 0xf0a, 0xf0002000) at sys_write+0x58
syscall(0x4, 0xf6ccefb0, 0x0, 0x5e3, 0x4000, 0xf6d13510) at syscall+0x1b8
_syscall(0x4, 0x39ea8, 0x8000, 0x0, 0x20, 0x39c00) at _syscall+0xb8
db{0}> sync
syncing disks... hme0: status=30001<GOTFRAME,RXTOHOST,NORXD>
panic: lockmgr: locking against myself
Begin traceback...
genfs_lock(0xf0240930, 0xf08b3400, 0xf00ef568, 0x7c, 0xf0ae9f20, 0x0) at genfs_lock+0x10
vn_lock(0xf6c4a618, 0x10012, 0x2, 0xf6c1e210, 0xf00b1c00, 0x2e) at vn_lock+0xb8
vget(0xf6c4a618, 0x10012, 0x0, 0xf0285b00, 0xf0285ac8, 0x100) at vget+0x138
ffs_sync(0xf0211000, 0x2, 0x0, 0x0, 0xf09d4400, 0xf0b17600) at ffs_sync+0x138
sys_sync(0xf6c1e210, 0x0, 0x0, 0xf0287c00, 0xf0240b58, 0x0) at sys_sync+0x98
vfs_shutdown(0xf0285400, 0xf0248800, 0xf0002000, 0xf024ec00, 0xf0255a30, 0xf0247000) at vfs_shutdown+0x74
cpu_reboot(0x100, 0x0, 0xf0284c00, 0xf0255800, 0xf00c424c, 0x0) at cpu_reboot+0x6c
db_sync_cmd(0xf01e0f24, 0x0, 0xffffffff, 0xf0240c58, 0xf009d7dc, 0xf0284c90) at db_sync_cmd+0x10
db_command(0xf0246fe0, 0x0, 0xf0002000, 0xf0247000, 0xf0284c00, 0xf0240d40) at db_command+0x278
db_command_loop(0xf01e0f24, 0xf01e0f24, 0xf0284c00, 0xf6c1e210, 0xf5e19000, 0xf029abac) at db_command_loop+0xa4
db_trap(0xf024f000, 0x0, 0x0, 0xf0240ed0, 0xf5e19000, 0xf02412e0) at db_trap+0x138
kdb_trap(0x81, 0xf0240fe0, 0xf0002000, 0x83, 0xffff, 0xf0256400) at kdb_trap+0xe4
trap(0x81, 0x40400cc2, 0xf01e0f20, 0xf0240fe0, 0xffff, 0xf024c6cc) at trap+0x38
Lslowtrap_reenter(0x0, 0x23, 0x23, 0x4dd, 0xffff, 0xf04f4098) at Lslowtrap_reenter+0x38
zs_abort(0x0, 0x100, 0xf02562c0, 0xf6c1e210, 0xf00b1c00, 0x8000) at zs_abort+0x24
zstty_stint(0xf08bbc68, 0x0, 0xf01bf7d4, 0xf024c930, 0xf0285800, 0x2fc59d) at zstty_stint+0x88
zsc_intr_hard(0x8, 0xf08b4eb0, 0xf0245800, 0x161, 0xffff, 0xf0ebb000) at zsc_intr_hard+0x68
zshard(0x0, 0xf01b5478, 0x0, 0xf0335000, 0xf029aba8, 0xf029abac) at zshard+0x40
sparc_interrupt44c(0xff000000, 0xf029d588, 0x86cd, 0xf0335010, 0x8e2c, 0xf02412e0) at sparc_interrupt44c+0x128
uvm_km_pgremove_intrsafe(0xf0b3b000, 0x8000, 0xf0b43000, 0xf02808e8, 0xf029a800, 0xf0256400) at uvm_km_pgremove_intrsafe+0xf8
uvm_unmap_remove(0xf02563d8, 0x8000, 0xf0b43000, 0xf02413c4, 0x100, 0xf024c6cc) at uvm_unmap_remove+0x164
uvm_unmap(0xf02563d8, 0xf0b3b000, 0xf0b43000, 0xf6c1e210, 0xf00b1c00, 0xf04f4098) at uvm_unmap+0x100
uvm_km_free(0xf02563d8, 0xf0b3b000, 0x8000, 0xf0248800, 0xf08b1a20, 0x8000) at uvm_km_free+0x14
free(0xf0b3b000, 0x68, 0xf, 0xf0285c00, 0xf092f630, 0xec5b5c00) at free+0x88
softdep_disk_write_complete(0xf21068c0, 0xf0072adc, 0xf21068c0, 0x500, 0xfc070fff, 0xf0ebb000) at softdep_disk_write_complete+0x2c0
biodone(0xf21068c0, 0x500, 0x0, 0xf01cc3ec, 0xf0b09f58, 0xfc071000) at biodone+0x74
scsipi_complete(0x0, 0x22009, 0x200000, 0x0, 0x0, 0x0) at scsipi_complete+0x46c
scsipi_done(0xf0a108c8, 0x30553, 0xf01baf00, 0xf024c6cc, 0x0, 0x0) at scsipi_done+0x154
ncr53c9x_done(0xf0931e00, 0xf0933118, 0x14, 0xf01baf14, 0x100, 0xf024c6cc) at ncr53c9x_done+0x1c8
ncr53c9x_intr(0x3, 0xf001039c, 0xf00b18c0, 0xf6c1e210, 0xf00b1c00, 0xf04f4098) at ncr53c9x_intr+0x12a8
sparc_interrupt44c(0xf0eb3000, 0xf02ececc, 0xffffffff, 0x0, 0xf02ece00, 0x3a) at sparc_interrupt44c+0x128
pmap_kenter_pa4m(0xf0eb3000, 0xffff0eb3, 0xf028865c, 0xf6c1e210, 0xf00b1c00, 0x0) at pmap_kenter_pa4m+0x8c
uvm_pagermapout(0xf0eb3000, 0x8000, 0x0, 0xf0288400, 0x8000, 0xf0ebb000) at uvm_pagermapout+0xc
genfs_getpages(0x0, 0x0, 0x0, 0x1668000, 0xf0b09f58, 0xf6c4a618) at genfs_getpages+0xe8c
ffs_getpages(0xf6cce920, 0xbc, 0xf08b3400, 0xf0079bcc, 0x0, 0xf6cce918) at ffs_getpages+0x84
ubc_fault(0xf6ccea68, 0xf0211800, 0x2, 0x1, 0x0, 0x0) at ubc_fault+0x1e4
uvm_fault(0x0, 0xf622b000, 0x0, 0x1, 0x7, 0xf622a000) at uvm_fault+0x710
mem_access_fault4m(0x9, 0x3a6, 0xf622a000, 0xf6cceb98, 0x0, 0x7fff) at mem_access_fault4m+0x22c
memfault_sun4m(0x39ea8, 0xf622a000, 0x1ff8, 0xf6ccd000, 0x45b55479, 0x4a3adc9a) at memfault_sun4m+0xe4
Ldocopy(0xf0002000, 0x2000, 0xf6ccee28, 0xf6ccec10, 0x2, 0x0) at Ldocopy+0x10
ffs_write(0x0, 0x1668000, 0x0, 0x1668000, 0xf09e8000, 0x0) at ffs_write+0x5fc
vn_write(0xf6d131d8, 0xf6d13200, 0xf6ccee28, 0xf0b17600, 0x1, 0xf00edef8) at vn_write+0x110
dofilewrite(0xf6c1e210, 0x4, 0xf6d131d8, 0x8000, 0x8000, 0xf6d13200) at dofilewrite+0x8c
sys_write(0xf6c1e210, 0xf6ccef28, 0xf6ccef20, 0xf00c6bac, 0xf0a, 0xf0002000) at sys_write+0x58
syscall(0x4, 0xf6ccefb0, 0x0, 0x5e3, 0x4000, 0xf6d13510) at syscall+0x1b8
_syscall(0x4, 0x39ea8, 0x8000, 0x0, 0x20, 0x39c00) at _syscall+0xb8
End traceback...
Frame pointer is at 0xf02406c8
Call traceback:
  pc = 0xf01d7980  args = (0x0, 0x40000fe2, 0x0, 0x0, 0xf02407e0, 0x0, 0xf0240730) fp = 0xf0240730
  pc = 0xf00c3a9c  args = (0x104, 0x0, 0xf0249000, 0xf0287c00, 0xf00c4284, 0xf0247000, 0xf0240798) fp = 0xf0240798
  pc = 0xf00b1598  args = (0xf020de80, 0xf0287c00, 0xf0287c00, 0xf6c1e210, 0xf00b1c00, 0x7ff, 0xf0240800) fp = 0xf0240800
  pc = 0xf00ef578  args = (0xf6c4a684, 0x2, 0x10, 0xf0ae9f20, 0x1, 0x58, 0xf0240868) fp = 0xf0240868
  pc = 0xf00ee568  args = (0xf0240930, 0xf08b3400, 0xf00ef568, 0x7c, 0xf0ae9f20, 0x0, 0xf02408d0) fp = 0xf02408d0
  pc = 0xf00e5438  args = (0xf6c4a618, 0x10012, 0x2, 0xf6c1e210, 0xf00b1c00, 0x2e, 0xf0240948) fp = 0xf0240948
  pc = 0xf0077b24  args = (0xf6c4a618, 0x10012, 0x0, 0xf0285b00, 0xf0285ac8, 0x100, 0xf02409b0) fp = 0xf02409b0
  pc = 0xf00e88ec  args = (0xf0211000, 0x2, 0x0, 0x0, 0xf09d4400, 0xf0b17600, 0xf0240a40) fp = 0xf0240a40
  pc = 0xf00e7458  args = (0xf6c1e210, 0x0, 0x0, 0xf0287c00, 0xf0240b58, 0x0, 0xf0240aa8) fp = 0xf0240aa8
  pc = 0xf01d7938  args = (0xf0285400, 0xf0248800, 0xf0002000, 0xf024ec00, 0xf0255a30, 0xf0247000, 0xf0240b18) fp = 0xf0240b18
  pc = 0xf009d7ec  args = (0x100, 0x0, 0xf0284c00, 0xf0255800, 0xf00c424c, 0x0,0xf0240b80) fp = 0xf0240b80
  pc = 0xf009d370  args = (0xf01e0f24, 0x0, 0xffffffff, 0xf0240c58, 0xf009d7dc,0xf0284c90, 0xf0240be8) fp = 0xf0240be8
  pc = 0xf009ce68  args = (0xf0246fe0, 0x0, 0xf0002000, 0xf0247000, 0xf0284c00,0xf0240d40, 0xf0240cd8) fp = 0xf0240cd8
  pc = 0xf00a172c  args = (0xf01e0f24, 0xf01e0f24, 0xf0284c00, 0xf6c1e210, 0xf5e19000, 0xf029abac, 0xf0240d50) fp = 0xf0240d50
  pc = 0xf01e1204  args = (0xf024f000, 0x0, 0x0, 0xf0240ed0, 0xf5e19000, 0xf02412e0, 0xf0240dc0) fp = 0xf0240dc0
  pc = 0xf01defec  args = (0x81, 0xf0240fe0, 0xf0002000, 0x83, 0xffff, 0xf0256400, 0xf0240ed8) fp = 0xf0240ed8
  pc = 0xf00063d0  args = (0x81, 0x40400cc2, 0xf01e0f20, 0xf0240fe0, 0xffff, 0xf024c6cc, 0xf0240f80) fp = 0xf0240f80
  pc = 0xf01b590c  args = (0x0, 0x23, 0x23, 0x4dd, 0xffff, 0xf04f4098, 0xf0241030) fp = 0xf0241030

dumping to dev 7,1 offset 165739
dump

[Locks up hard, no reaction to console break, toggle power switch]


>Fix:
	Appears to be less likely without softdep mounts. Apart from
	that, no idea, sorry.
>Release-Note:
>Audit-Trail:
>Unformatted: