Subject: Re: kern/25285: i386 MP panic: TLB IPI rendezvous failed (mask 1)
To: None <yamt@mwd.biglobe.ne.jp>
From: Paul Dokas <dokas@cs.umn.edu>
List: current-users
Date: 06/10/2004 12:00:55
On Thu, 10 Jun 2004 09:13:46 -0500 Paul Dokas <dokas@cs.umn.edu> wrote:
>
> I've been running with this additional patch for over 12 hours under a
> moderate load and no panics so far. I'll keep trying to crash the machine.
Ok, it crashed. Cpu 0 shows the same panic location from all other previous
panics. I'm not going to reproduce the whole trace here. The others seem
to have been attempting to gain kernel locks.
Here's the backtraces (copied by hand):
cpu 0:
pmap_tlb_shootdown()
.
.
.
cpu 2:
acquire()
spinlock_acquire_count()
_kernel_lock_acquire_count()
mi_switch()
ltsleep()
sbwat()
so_receive()
soo_read()
dofileread()
sys_read()
syscall_plain()
--- syscall (number 3) ---
cpu 4:
acquire()
spinlock_acquire_count()
_kernel_lock_acquire_count()
mi_switch()
ltsleep()
sys_nanosleep()
syscall_plain()
--- syscall (number 240) ---
cpu 6:
acquire()
spinlock_acquire_count()
_kernel_lock_acquire_count()
mi_switch()
ltsleep()
sched_sync()
And, here's a summary of all of the patches that were running when I got this panic:
*** ./arch/i386/i386/vector.S.orig Tue May 18 10:03:48 2004
--- ./arch/i386/i386/vector.S Wed Jun 9 21:47:15 2004
***************
*** 163,169 ****
pushl $0
pushl $T_ASTFLT
INTRENTRY
- movl $0,_C_LABEL(local_apic)+LAPIC_EOI
movl CPUVAR(ILEVEL),%ebx
cmpl $IPL_IPI,%ebx
jae 2f
--- 163,168 ----
***************
*** 173,178 ****
--- 172,178 ----
sti
pushl %ebx
call _C_LABEL(x86_ipi_handler)
+ movl $0,_C_LABEL(local_apic)+LAPIC_EOI
jmp _C_LABEL(Xdoreti)
2:
orl $(1 << LIR_IPI),CPUVAR(IPENDING)
***************
*** 624,629 ****
--- 624,630 ----
IDTVEC(softserial)
movl $IPL_SOFTSERIAL, CPUVAR(ILEVEL)
+ sti
incl CPUVAR(IDEPTH)
#ifdef MULTIPROCESSOR
call _C_LABEL(x86_softintlock)
***************
*** 642,647 ****
--- 643,649 ----
IDTVEC(softnet)
movl $IPL_SOFTNET, CPUVAR(ILEVEL)
+ sti
incl CPUVAR(IDEPTH)
#ifdef MULTIPROCESSOR
call _C_LABEL(x86_softintlock)
***************
*** 673,678 ****
--- 675,681 ----
IDTVEC(softclock)
movl $IPL_SOFTCLOCK, CPUVAR(ILEVEL)
+ sti
incl CPUVAR(IDEPTH)
#ifdef MULTIPROCESSOR
call _C_LABEL(x86_softintlock)
*** ./arch/i386/i386/spl.S.orig Wed Jun 9 21:43:23 2004
--- ./arch/i386/i386/spl.S Wed Jun 9 21:44:31 2004
***************
*** 109,118 ****
cli
andl CPUVAR(IPENDING),%eax # any non-masked bits left?
jz 2f
- sti
bsrl %eax,%eax
btrl %eax,CPUVAR(IPENDING)
- jnc 1b
movl CPUVAR(ISOURCES)(,%eax,4),%eax
jmp *IS_RECURSE(%eax)
2:
--- 109,116 ----
***************
*** 143,152 ****
cli
andl CPUVAR(IPENDING),%eax
jz 2f
- sti
bsrl %eax,%eax # slow, but not worth optimizing
btrl %eax,CPUVAR(IPENDING)
- jnc 1b # some intr cleared the in-memory bit
movl CPUVAR(ISOURCES)(,%eax, 4),%eax
jmp *IS_RESUME(%eax)
2: /* Check for ASTs on exit to user mode. */
--- 141,148 ----
*** ./arch/x86/include/intr.h.orig Fri Jun 4 13:15:11 2004
--- ./arch/x86/include/intr.h Fri Jun 4 13:17:13 2004
***************
*** 156,171 ****
spllower(int nlevel)
{
struct cpu_info *ci = curcpu();
__splbarrier();
! /*
! * Since this should only lower the interrupt level,
! * the XOR below should only show interrupts that
! * are being unmasked.
! */
! ci->ci_ilevel = nlevel;
! if (ci->ci_ipending & IUNMASK(ci,nlevel))
! Xspllower(nlevel);
}
/*
--- 156,176 ----
spllower(int nlevel)
{
struct cpu_info *ci = curcpu();
+ u_int32_t imask;
+ u_long psl;
__splbarrier();
!
! imask = IUNMASK(ci, nlevel);
! psl = read_psl();
! disable_intr();
! if (ci->ci_ipending & imask) {
! Xspllower(nlevel);
! /* Xspllower does enable_intr() */
! } else {
! ci->ci_ilevel = nlevel;
! write_psl(psl);
! }
}
/*
--
Paul Dokas dokas@cs.umn.edu
======================================================================
Don Juan Matus: "an enigma wrapped in mystery wrapped in a tortilla."