Subject: port-powerpc/16068: gdb provokes DSI trap panic
To: None <gnats-bugs@gnats.netbsd.org>
From: der Mouse <mouse@Rodents.Montreal.QC.CA>
List: netbsd-bugs
Date: 03/26/2002 11:14:35
>Number:         16068
>Category:       port-powerpc
>Synopsis:       [dM] kernel panic as a result of DSI trap and gdb locks out
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    port-powerpc-maintainer
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Tue Mar 26 08:17:00 PST 2002
>Closed-Date:
>Last-Modified:
>Originator:     der Mouse
>Release:        Proprietary PPC port derived from 1.5W
>Organization:
	Dis-
>Environment:
	Proprietary PPC port derived from 1.5W
>Description:
	I've been asked to post this by a company that's been working
	with NetBSD.  I don't know much about the problem beyond what's
	here, as I did not see this myself, but I can get mail back to
	the actual originators.  (I realize this may seem to be a
	rather disorganized collection of information; it's what they
	sent me, and as I say, I didn't see it happen myself.  I'm not
	entirely sure why they asked me to post it instead of doing it
	themselves.  I'm trying to help them some in tracking it down
	myself; if anyone has any ideas, I/we would most appreciate
	hearing them.)

	In this case, I also find it odd in another way: if the kernel
	panics, I see it as perfectly normal that gdb (a userland
	program, after all) will be "lock[ed] out"; I don't quite get
	why they wrote that.

Symptom: kernel panic as a result of DSI trap and gdb locks out 

Proprietary NetBSD PPC port derived from the 1.5W

LOCKDEBUG not defined, not a SMP config.
/*	$NetBSD: param.h,v 1.128 2001/06/03 02:48:45 thorpej Exp $	*/
    #define	__NetBSD_Version__	105230000	/* NetBSD 1.5W */
/*	$NetBSD: sys_process.c,v 1.67 2001/03/17 09:38:36 pooka Exp $	*/
/*	$NetBSD: kern_synch.c,v 1.104 2001/05/28 22:20:03 chs Exp $	*/
/*	$NetBSD: kern_lock.c,v 1.55 2001/06/05 04:38:09 thorpej Exp $	*/
/*	$NetBSD: uvm_fault_i.h,v 1.13 2001/06/02 18:09:26 chs Exp $	*/
/*	$NetBSD: uvm_map.h,v 1.28 2001/06/02 18:09:27 chs Exp $	*/
/*	$NetBSD: uvm_fault.c,v 1.64 2001/06/02 18:09:26 chs Exp $	*/
/*	$NetBSD: uvm_map.c,v 1.99 2001/06/02 18:09:26 chs Exp $	*/
/*	$NetBSD: uvm_io.c,v 1.15 2001/06/02 18:09:26 chs Exp $	*/
/*	$NetBSD: uvm_vnode.c,v 1.50 2001/05/26 21:27:21 chs Exp $	*/
/*	$NetBSD: procfs_mem.c,v 1.27 2000/11/24 18:58:37 chs Exp $	*/
/*	$NetBSD: layer_vnops.c,v 1.6 2001/06/07 13:32:47 wiz Exp $	*/

In platform-dependent part: arch/my_ppc
/*	$NetBSD: cpu.c,v 1.1 2000/02/29 15:21:46 nonaka Exp $	*/
/*	$NetBSD: locore.s,v 1.8 2000/11/16 05:38:33 thorpej Exp $	*/
/*	$NetBSD: machdep.c,v 1.11 2000/09/13 15:00:22 thorpej Exp $	*/
in arch/powerpc
/*	$NetBSD: Locore.c,v 1.4 2000/06/08 06:48:45 kleink Exp $	*/
/*	$NetBSD: locore_subr.S,v 1.2 2001/02/28 20:44:41 tsubai Exp $	*/
/*	$NetBSD: mem.c,v 1.9 2001/02/04 17:38:11 briggs Exp $ */
/*	$NetBSD: pmap.c,v 1.44 2001/06/10 11:01:27 tsubai Exp $	*/
/*	$NetBSD: powerpc_machdep.c,v 1.4 2001/04/05 09:58:05 tsubai Exp $	*/
/*	$NetBSD: process_machdep.c,v 1.5 2001/02/04 17:38:11 briggs Exp $	*/
/*	$NetBSD: sys_machdep.c,v 1.3 2000/06/09 14:08:45 kleink Exp $	*/
/*	$NetBSD: trap.c,v 1.46 2001/06/10 16:31:59 tsubai Exp $	*/
/*	$NetBSD: trap_subr.S,v 1.6 2001/06/08 00:16:25 matt Exp $	*/
/*	$NetBSD: trap_subr_mp.S,v 1.2 2001/06/10 11:09:28 tsubai Exp $	*/
/*	$NetBSD: vm_machdep.c,v 1.28 2001/06/10 11:01:28 tsubai Exp $	*/




bash-2.05# gdb my_shlib_test
GNU gdb 4.17
Copyright 1998 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for details.
This GDB was configured as "powerpc--netbsd"...
(gdb) set verbose on
(gdb) set verbose 1
(gdb) set auto-solib 1
(gdb) r -N
Starting program: /usr/local/bin/my_shlib_test -N
Stopped in pid 89 (my_shlib_test) at    tlbdsmsize+0x618:       mtspr r1, , SPRG1


db> show reg                  
r0                 0xe
r1            0x1f9160  ddbstk+0x1800
r2                   0
r3                 0xe
r4                 0xb
r5             0x20080  db_write_cmd+0x20
r6                 0xb
r7              0x9032  lance_get+0x222
r8            0x1171a9  nfs_readrpc+0xd59
r9              0x9032  lance_get+0x222
r10           0x1854f4  intrctlr_find
r11           0x1f9160  ddbstk+0x1800
r12         0x44000000
r13                  0
r14                  0
r15                  0
r16                  0
r17                  0
r18                  0
r19                  0
r20                  0
r21         0xffffec14
r22         0xffffec08
r23         0x219450d0
r24         0xffffec8c
r25         0xffffec64
r26         0xffffec6c
r27                  0
r28         0xe0895c78
r29         0x40000000
r30           0x185330  intrctlr_setpl+0x54
r31           0x1f9160  ddbstk+0x1800
iar              0x700  tlbdsmsize+0x618
msr             0x1000  tlbdsmsize+0xf18
tlbdsmsize+0x618:       mtspr r1, , SPRG1
db> ps
 PID             PPID       PGRP        UID S   FLAGS          COMMAND    WAIT
>How-To-Repeat:
	
>Fix:
	<how to correct or work around the problem, if known (multiple
	<lines)>
>Release-Note:
>Audit-Trail:
>Unformatted:
 >89                88         89          0 7  0x5806    my_shlib_test
  88                87         88          0 3  0x4086              gdb    wait
  87                85         87          0 3  0x4086             bash    wait
  85                 1         85          0 3  0x4086           crunch    wait
  84                 1         84          0 3    0x84           crunch  select
  77                 1         77          0 3    0x84           crunch  select
  52                 1         52          0 3    0x84           crunch  mfsidl
  7                  0          0          0 3 0x20204         aiodoned aiodone
  6                  0          0          0 3 0x20204          ioflush  syncer
  5                  0          0          0 3 0x20204           reaper  reaper
  4                  0          0          0 3 0x20204       pagedaemon pgdaemo
  1                  0          1          0 3  0x4084           crunch    wait
  0                 -1          0          0 3 0x20204          swapper schedul
 db> trace /t 89
 at db_force_whitespace+ac
 at db_putchar+13c
 
 db> trace
 at putchar+268
 at kprintf+1bc
 at db_printf+c4
 at db_stack_trace_print+294
 at db_stack_trace_cmd+58
 at db_command+378
 at db_command_loop+bc
 at db_trap+1b8
 at ddb_trap_glue+c0
 saved LR(0x5c1c) is invalid.
 
 db> trace 0x1f9160
 at putchar+268
 at kprintf+1bc
 at db_printf+c4
 at db_stack_trace_print+294
 at db_stack_trace_cmd+58
 at db_command+378
 at db_command_loop+bc
 at db_trap+1b8
 at ddb_trap_glue+c0
 saved LR(0x5c1c) is invalid.
 
 db>
 
 1f9160      -> 1f9180   1ceb8  (db_putchar)
 1f9180      -> 1b91b0   514d4
 1f91b0      -> 1f91d0 	52998
 
 
 Stopped at      cpu_Debugger+24:        lwz r11, r1, 0x0,
 db> ps
  PID             PPID       PGRP        UID S   FLAGS          COMMAND    WAIT
  119               92        119          0 4  0x5806    my_shlib_test
  92                87         92          0 3  0x4006              gdb vmmaplk
  87                85         87          0 3  0x4086             bash    wait
  85                 1         85          0 3  0x4086           crunch    wait
  84                 1         84          0 3    0x84           crunch  select
  77                 1         77          0 3    0x84           crunch  select
  52                 1         52          0 3    0x84           crunch  mfsidl
  7                  0          0          0 3 0x20204         aiodoned aiodone
  6                  0          0          0 3 0x20204          ioflush  syncer
  5                  0          0          0 3 0x20204           reaper  reaper
  4                  0          0          0 3 0x20204       pagedaemon pgdaemo
  1                  0          1          0 3  0x4084           crunch    wait
  0                 -1          0          0 3 0x20204          swapper schedul
 db> ps /a
  PID          COMMAND      STRUCT PROC *            UAREA *     VMSPACE/VM_MAP
  119    my_shlib_test          0x3542398         0xefb8f000          0x32ed258
  92               gdb          0x3542ab8         0xefb8b000          0x32ed640
  87              bash          0x3542008         0xefb7a000          0x32ed190
  85            crunch          0x32e8c78         0xefb76000          0x32ed0c8
  84            crunch          0x3542728         0xefb87000          0x32ed4b0
  77            crunch          0x35428f0         0xefb83000          0x32ed578
  52            crunch          0x3542560         0xefb7f000          0x32ed320
  7           aiodoned          0x32e8ab0         0xefb71000           0x212c90
  6            ioflush          0x32e88e8         0xefb6d000           0x212c90
  5             reaper          0x32e8720         0xefb69000           0x212c90
  4         pagedaemon          0x32e8558         0xefb65000           0x212c90
  1             crunch          0x32e8000         0xefb59000          0x32ed000
  0            swapper           0x212d58           0x266000           0x212c90
 db> set $radix=0x10
 $radix                  10 =         16
 db>
 my_shlib_test proc: 0x3542ab8
 
 db> examine /x 0x3542ab8,0x10
  0x3542ab8:     0 #p_forw=syslogd          
  				0 #p_back                   
  		  3542008 #p_list=bash (first proc)       
  		  35423a0        
  		  32e9150 #p_cred       
  		  32ea400 #p_fd       
  		  32eb080 #p_cwdi       
  		 efb8b128 #p_stats     
  		  3be8140 #p_limit        
  		  32ed640 #p_vmspace        
  		  39dc968 #p_sigacts       
  		       14 #p_exitsig
              4006 #p_flag             
            224ab0 #p_cpu  (->struct schedstate_percpu ci_schedstate)      
           3000000 #p_stat=3       
                5c #PID=92
 
 Find out PCB (ptr to pcb at proc+0x1b8):
 db> examine /x 0x3542ab8+0x1b8
  0x3542c70:     efb8b000
 db>
 Display PCB contents:
 db> examine /x efb8b000,10
 0xefb8b000:     e09f2180    61ec180     efb8eb40    a           0           1
 0xefb8b018:     0           1b000       0           0           0           0
 0xefb8b030:     0           0           0           0
 db>
 The SPL = 0xa and SP=efb8eb40
 The map of the stack as pointed by PCB_SP(4):
 +00  R01(sv) ->  +00 
                  +04  
 +08  R10         +08  R30
 +0C  R11         +0C  R31
  ......          +10  <-Process R1
 +5C  R31         +14  <-Process LR
 
 db> examine /x 0xefb8eb40
 0xefb8eb40:     efb8eba0
 
 0xefb8eba0:     
 	efb8ebb0    
 		5164        
 	21adbcd0 R30   
 	efb8ebb0 R31    
 	efb8ebe0 R01   
 	   419c4 LR   (in mi_switch after cpu_switch)     
 	 3542ab8     
 	  224ab0      
 	 3be8140     
 db>
 0xefb8ebe0:     efb8ec20     40d90	   ltsleep (kern_sync.c#474)
 									   Parameters:
 									   0xefb8ebe8	 1fa09c	  id
 													 4		  priority	  (PVM)
 													 1e076c	  msg
 													 0		  tout
 													 1fa09c	  lock
 
 0xefb8ec20:     efb8ec70     32920	   lockmgr (kern_lock.c#687 after AQUIRE w/ LK_EXCLUSIVE)
 0xefb8ec70:     efb8eca0    164274	   vm_map_lock (uvm_map.h#420, after call to lockmgr)
 0xefb8eca0:     efb8ed10    15e968	   uvm_map (uvm_map.c#543 after call to vm_map_lock)
 									   Parameters to uvm_map:
 									   0xefb8ecb0		 1fa098 (map)
 									   0xefb8ecb4	   efb8ed60 (startp)
 									   0xefb8ecb8	       1000 (size)
 									   0xefb8ecbc			  0	(NULL uobj)
 									   0xefb8ecc0			  0 (R8??)
 									   0xefb8ecc4	    1955000	(offset)	
 									   0xefb8ecc8			  0 (align)
 
 0xefb8ed10:     efb8ed40    15fb54	   uvm_map_reserve (uvm_map.c#1226)
 									   Parameters to uvm_map_reserve:
 									   0xefb8ed18        1fa098	(map)
 									   0xefb8ed1c		   1000	(size)
 									   0xefb8ed20		1955000	(offset)
 									   0xefb8ed24			  0 (align)
 									   0xefb8ed24      efb8ed60 (raddr)
 
 0xefb8ed40:     efb8edd0    15fff4	   uvm_map_extract (uvm_map.c#1400)
 									   Parameters to uvm_map_extract:
 									   0xefb8ed48	   	32ed258	(srcmap)
 									   0xefb8ed4c		1955000	(start)
 									   0xefb8ed50		   1000	(len)
 									   0xefb8ed54		 1fa098	(dstmap)
 									   0xefb8ed58	   efb8edec (dstaddrp)
 									   0xefb8ed5c			  e	(flags)
 
 0xefb8edd0:     efb8ee10    15ac3c	   uvm_io (uvm_io.c#107)
 0xefb8ee10:     efb8ee40     a88fc	   procfs_domem (procfs_mem.c#93)
 0xefb8ee40:     efb8eeb0     5dc98	   sys_ptrace (sys_process.c#243)
 									   Parameters to sys_ptrace:
 									   0xefb8ee48      3542ab8 (proc for gdb!!!)
 									   0xefb8ee4c      efb8ef64 (sys_ptrace_args)
 													   req = 1  pid = 0x77(119)
 													   addr=0x1955318  data = 0
 									   0xefb8ee50      efb8eee0	(retval)
 
 0xefb8eeb0:     efb8ef50    17d590	   trap	(trap.c#253) after sy_call()
 0xefb8ef50:     ffffe458      578c	   before trapexit
 
 
 db> show map 1fa098
 MAP 0x1fa098: [0xe0000000->0xf0000000]
         #ent=15, sz=263806976, ref=1, version=5700, flags=0x1
         pmap=0x22470c(resident=13580)
 db> show uvmexp
 Current UVM status:
   pagesize=4096 (0x1000), pagemask=0xfff, pageshift=12
   127620 VM pages: 7797 active, 0 inactive, 56 wired, 109936 free
   min  10% (25) anon, 10% (25) vnode, 5% (12) vtext
   pages  3081 anon, 3594 vnode, 1122 vtext
   freemin=64, free-target=85, inactive-target=0, wired-max=127620
   faults=35545, traps=0, intrs=0, ctxswitch=3585
   softint=2316, syscalls=33771, swapins=0, swapouts=0
   fault counts:
     noram=0, noanon=0, pgwait=0, pgrele=0
     ok relocks(total)=759(759), anget(retrys)=3916(0), amapcopy=3087
     neighbor anon/obj pg=2220/35009, gets(lock/unlock)=16605/759
     cases: anon=2737, anoncow=1179, obj=10129, prcopy=6476, przero=13135
   daemon and swap counts:
     woke=0, revs=0, scans=0, obscans=0, anscans=0
     busy=0, freed=0, reactivate=0, deactivate=0
     pageouts=0, pending=0, nswget=0
     nswapdev=0, nanon=119228, nanonneeded=119228 nfreeanon=116147
     swpages=0, swpginuse=0, swpgonly=0 paging=0
   kernel pointers:
     objs(kern/kmem/mb)=0x1fa034/0x1fa130/0x1fa148
 db>
 
 /* vm_map flags */
 #define	VM_MAP_PAGEABLE		0x01		/* ro: entries are pageable */
 #define	VM_MAP_INTRSAFE		0x02		/* ro: interrupt safe map */
 #define	VM_MAP_WIREFUTURE	0x04		/* rw: wire future mappings */
 #define	VM_MAP_BUSY		    0x08		/* rw: map is busy */
 #define	VM_MAP_WANTLOCK		0x10		/* rw: want to write-lock */
 
 
 struct vm_map {
 	struct pmap *		pmap;		/* Physical map */
 	struct lock		    lock;		/* Lock for map data */
 	struct vm_map_entry	header;		/* List of entries */
 	int			        nentries;	/* Number of entries */	  /* +0x70 */
 	vsize_t			    size;		/* virtual size */		  /* +0x74 */
 	int			        ref_count;	/* Reference count */
 	struct simplelock	ref_lock;	/* Lock for ref_count field */
 	struct vm_map_entry *	hint;		/* hint for quick lookups */
 	struct simplelock	hint_lock;	/* lock for hint storage */
 	struct vm_map_entry *	first_free;	/* First free space hint */
 	int			        flags;		/* flags */
 	struct simplelock	flags_lock;	/* Lock for flags field */
 	unsigned int		timestamp;	/* Version number */
 #define	min_offset		header.start
 #define max_offset		header.end
 };
 
 struct lock {
 	struct	simplelock lk_interlock; /* lock on remaining fields */
 	u_int	lk_flags;		/* see below */
 	int	lk_sharecount;		/* # of accepted shared locks */
 	short	lk_exclusivecount;	/* # of recursive exclusive locks */
 	short	lk_recurselevel;	/* lvl above which recursion ok */
 	int	lk_waitcount;		/* # of sleepers/spinners */
 
 	/*
 	 * This is the sleep message for sleep locks, and a simple name
 	 * for spin locks.
 	 */
 	const char *lk_wmesg;
 
 	union {
 		struct {
 			/* pid of exclusive lock holder */
 			pid_t lk_sleep_lockholder;
 
 			/* priority at which to sleep */
 			int lk_sleep_prio;
 
 			/* maximum sleep time (for tsleep) */
 			int lk_sleep_timo;
 		} lk_un_sleep;
 		struct {
 			/* CPU ID of exclusive lock holder */
 			cpuid_t lk_spin_cpu;
 #if defined(LOCKDEBUG)
 			TAILQ_ENTRY(lock) lk_spin_list;
 #endif
 		} lk_un_spin;
 	} lk_un;
 
 #define	lk_lockholder	lk_un.lk_un_sleep.lk_sleep_lockholder
 #define	lk_prio		    lk_un.lk_un_sleep.lk_sleep_prio
 #define	lk_timo		    lk_un.lk_un_sleep.lk_sleep_timo
 
 #define	lk_cpu		    lk_un.lk_un_spin.lk_spin_cpu
 #if defined(LOCKDEBUG)
 #define	lk_list		    lk_un.lk_un_spin.lk_spin_list
 #endif
 
 #if defined(LOCKDEBUG)
 	const char *lk_lock_file;
 	const char *lk_unlock_file;
 	int lk_lock_line;
 	int lk_unlock_line;
 #endif
 };
 
 
 struct simplelock {
 	__cpu_simple_lock_t lock_data;	  /* int */
 #ifdef LOCKDEBUG
 	const char *lock_file;
 	const char *unlock_file;
 	short lock_line;
 	short unlock_line;
 	TAILQ_ENTRY(simplelock) list;
 	cpuid_t lock_holder;		/* CPU ID */
 #endif
 };
 
 db> exam 1fa098,0x40
       	   22470c      	   pmap
       	        0          lk_interlock 
       	      200          lk_flags
       	        1          lk_sharecount 
       	        0		   lk_exclusivecount , lk_recurselevel
       	        1          lk_waitcount 
 		   1e076c      	   "vmmaplk"
 		 ffffffff    	   lk_sleep_lockholder (no lock holder?)
 		        4          lk_sleep_prio 
 		        0		   lk_sleep_timo
 AT offset +0x70
 				f		   nentries
 		  fb96000		   size
 				1		   ref_count
 				0		   refr_lock
 		   20d6a0		   hint
 				0		   hint_lock
 		   20d658		   first_free
 				1		   flags	   (VM_MAP_PAGEABLE)
 				0		   flags_lock  (not asserted, i.e. not locked?)
 			 1644		   timestamp