Subject: Re: kernel stack overflow detection
To: None <cgd@broadcom.com>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 06/10/2002 21:47:20
----Next_Part(Mon_Jun_10_21:47:20_2002_157)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
From: cgd@broadcom.com
Date: 30 May 2002 09:43:14 -0700
> you've given yourself an extra page, so fill it all with the magic
> number. It's possible for a stack overflow to get 'lucky' and miss
> a single magic location.
>
>
> Actually, while you're at it...
>
> You might provide a more generally-useful mechanism for debugging and
> system analysis:
>
> * fill _all_ kernel stack areas _completely_ with a magic number, and
>
> * provide a mechanism to check all of them to see how much has been
> used.
i slightly updated my patch as you suggest. (attached)
btw, kernel stack overflows i saw seems due to
ipsec+nfsclient. is anyone else using them?
---
YAMAMOTO Takashi<yamt@mwd.biglobe.ne.jp>
----Next_Part(Mon_Jun_10_21:47:20_2002_157)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="kstack2.diff"
Index: arch/i386/conf/files.i386
===================================================================
RCS file: /cvs/cvsroot/syssrc/sys/arch/i386/conf/files.i386,v
retrieving revision 1.206
diff -u -p -r1.206 files.i386
--- arch/i386/conf/files.i386 2002/04/18 12:54:12 1.206
+++ arch/i386/conf/files.i386 2002/06/10 10:50:43
@@ -57,6 +57,10 @@ defparam opt_pcibios.h PCIBIOS_IRQS_HINT
# Large page size
defflag LARGEPAGES
+# kernel stack debug
+defflag opt_kstack_safe.h KSTACK_SAFE
+defflag opt_kstack_dr0.h KSTACK_CHECK_DR0
+
file arch/i386/i386/autoconf.c
file arch/i386/i386/bus_machdep.c
file arch/i386/i386/conf.c
Index: arch/i386/i386/pmap.c
===================================================================
RCS file: /cvs/cvsroot/syssrc/sys/arch/i386/i386/pmap.c,v
retrieving revision 1.136
diff -u -p -r1.136 pmap.c
--- arch/i386/i386/pmap.c 2002/03/27 04:47:28 1.136
+++ arch/i386/i386/pmap.c 2002/06/10 10:50:44
@@ -65,6 +65,7 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.1
#include "opt_cputype.h"
#include "opt_user_ldt.h"
#include "opt_largepages.h"
+#include "opt_kstack_dr0.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -1654,6 +1655,11 @@ pmap_ldt_cleanup(p)
}
#endif /* USER_LDT */
+#ifdef KSTACK_CHECK_DR0
+/* XXX should be in cpufunc.h or somewhere */
+void dr0(caddr_t, u_int32_t, u_int32_t, u_int32_t);
+#endif
+
/*
* pmap_activate: activate a process' pmap (fill in %cr3 and LDT info)
*
@@ -1675,6 +1681,16 @@ pmap_activate(p)
lcr3(pcb->pcb_cr3);
if (pcb == curpcb)
lldt(pcb->pcb_ldt_sel);
+
+#ifdef KSTACK_CHECK_DR0
+ /*
+ * setup breakpoint on the top of stack
+ */
+ if (p == &proc0)
+ dr0(0, 0, 0, 0);
+ else
+ dr0(KSTACK_END(p), 1, 3, 1);
+#endif
}
/*
Index: arch/i386/i386/trap.c
===================================================================
RCS file: /cvs/cvsroot/syssrc/sys/arch/i386/i386/trap.c,v
retrieving revision 1.165
diff -u -p -r1.165 trap.c
--- arch/i386/i386/trap.c 2002/02/18 15:58:02 1.165
+++ arch/i386/i386/trap.c 2002/06/10 10:50:44
@@ -86,6 +86,7 @@ __KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.1
#include "opt_math_emulate.h"
#include "opt_vm86.h"
#include "opt_cputype.h"
+#include "opt_kstack_dr0.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -155,6 +156,33 @@ int trapdebug = 0;
#define IDTVEC(name) __CONCAT(X, name)
+#ifdef KSTACK_CHECK_DR0
+/* XXX should be in cpufunc.h */
+static __inline u_int
+rdr6(void)
+{
+ u_int val;
+
+ __asm __volatile("movl %%dr6,%0" : "=r" (val));
+ return val;
+}
+
+/* XXX should be in cpufunc.h */
+static __inline void
+ldr6(u_int val)
+{
+
+ __asm __volatile("movl %0,%%dr6" : : "r" (val));
+}
+#endif
+
+#ifdef KSTACK_SAFE
+int kstack_check_dr0_panic;
+#ifdef DDB
+int kstack_check_dr0_debug;
+#endif
+#endif
+
/*
* trap(frame):
* Exception, fault, and trap interface to BSD kernel. This
@@ -202,6 +230,30 @@ trap(frame)
default:
we_re_toast:
+#ifdef KSTACK_CHECK_DR0
+ if (type == T_TRCTRAP) {
+ u_int mask, dr6 = rdr6();
+
+ mask = 1 << 0; /* dr0 */
+ if (dr6 & mask) {
+ /*
+ * XXX should be just panic
+ */
+#ifdef KSTACK_SAFE
+ if (kstack_check_dr0_panic)
+#endif
+ panic("trap on DR0: maybe kernel stack overflow\n");
+ printf("trap on DR0: maybe kernel stack overflow\n");
+ dr6 &= ~mask;
+ ldr6(dr6);
+#ifdef DDB
+ if (kstack_check_dr0_debug)
+ Debugger();
+#endif
+ return;
+ }
+ }
+#endif
#ifdef KGDB
if (kgdb_trap(type, &frame))
return;
Index: arch/i386/include/param.h
===================================================================
RCS file: /cvs/cvsroot/syssrc/sys/arch/i386/include/param.h,v
retrieving revision 1.48
diff -u -p -r1.48 param.h
--- arch/i386/include/param.h 2002/02/26 15:13:23 1.48
+++ arch/i386/include/param.h 2002/06/10 10:50:44
@@ -90,7 +90,17 @@
#define SSIZE 1 /* initial stack size/NBPG */
#define SINCR 1 /* increment of stack/NBPG */
-#define UPAGES 2 /* pages of u-area */
+
+#if defined(_KERNEL_OPT)
+#include "opt_kstack_safe.h"
+#endif
+#define SHAM_UPAGES 2
+#define SHAM_USPACE (SHAM_UPAGES * NBPG)
+#ifdef KSTACK_SAFE
+#define UPAGES (SHAM_UPAGES + 1) /* "real" pages of u-area */
+#else
+#define UPAGES SHAM_UPAGES /* pages of u-area */
+#endif
#define USPACE (UPAGES * NBPG) /* total size of u-area */
#ifndef MSGBUFSIZE
Index: conf/files
===================================================================
RCS file: /cvs/cvsroot/syssrc/sys/conf/files,v
retrieving revision 1.532
diff -u -p -r1.532 files
--- conf/files 2002/06/06 23:54:48 1.532
+++ conf/files 2002/06/10 10:51:04
@@ -138,6 +138,7 @@ defparam opt_kgdb.h KGDB_DEV KGDB_DEVNA
KGDB_DEVADDR KGDB_DEVRATE KGDB_DEVMODE
defflag LOCKDEBUG
defflag SYSCALL_DEBUG
+defflag opt_kstack.h KSTACK_CHECK_MAGIC
# memory (ram) disk options
#
@@ -263,6 +264,13 @@ include "dev/raidframe/files.raidframe"
#
device aic: scsi
file dev/ic/aic6360.c aic
+
+define scsi_low
+file dev/scsipi/scsi_low.c scsi_low
+
+# FutureDomain TMC18C[35]0
+device stg: scsi, scsi_low
+file dev/ic/tmc18c30.c stg
# SMC 93Cx6 Serial EEPROM devices
#
Index: kern/kern_fork.c
===================================================================
RCS file: /cvs/cvsroot/syssrc/sys/kern/kern_fork.c,v
retrieving revision 1.88
diff -u -p -r1.88 kern_fork.c
--- kern/kern_fork.c 2001/12/08 00:35:30 1.88
+++ kern/kern_fork.c 2002/06/10 10:51:17
@@ -81,6 +81,7 @@
__KERNEL_RCSID(0, "$NetBSD: kern_fork.c,v 1.88 2001/12/08 00:35:30 thorpej Exp $");
#include "opt_ktrace.h"
+#include "opt_kstack.h"
#include "opt_multiprocessor.h"
#include <sys/param.h>
@@ -101,6 +102,96 @@ __KERNEL_RCSID(0, "$NetBSD: kern_fork.c,
#include <sys/sched.h>
#include <sys/signalvar.h>
+#ifdef KSTACK_CHECK_MAGIC
+#include <sys/user.h>
+
+#define KSTACK_MAGIC 0xdeadbeaf
+
+void kstack_setup_magic(const struct proc *);
+void kstack_check_magic(const struct proc *);
+
+#define KSTACK_SIZE (SHAM_USPACE - sizeof(struct user))
+
+/* XXX should be per process basis? */
+int kstackleftmin = KSTACK_SIZE;
+int kstackleftthres = KSTACK_SIZE / 8; /* warn if remaining stack is less than this */
+
+void
+kstack_setup_magic(const struct proc *p)
+{
+ u_int32_t *ip;
+ u_int32_t const *end;
+
+ KASSERT(p != 0);
+ KASSERT(p != &proc0);
+
+ /*
+ * fill all the stack with magic number
+ * so that later modification on it can be detected.
+ */
+ ip = (u_int32_t *)(KSTACK_END(p) + SHAM_USPACE - USPACE);
+ end = (u_int32_t *)((caddr_t)KSTACK_END(p) + KSTACK_SIZE / 2); /* XXX */
+ for (; ip < end; ip++) {
+ *ip = KSTACK_MAGIC;
+ }
+}
+
+void
+kstack_check_magic(const struct proc *p)
+{
+ static int dont_check;
+ u_int32_t const *ip, *end;
+ int stackleft;
+
+ KASSERT(p != 0);
+
+ /* don't check proc0 */ /*XXX*/
+ if (p == &proc0)
+ return;
+
+ ip = (u_int32_t *)(KSTACK_END(p) + SHAM_USPACE - USPACE);
+ end = (u_int32_t *)((caddr_t)p->p_addr + USPACE);
+ for (; ip < end; ip++)
+ if (*ip != KSTACK_MAGIC)
+ break;
+
+ stackleft = (caddr_t)ip - KSTACK_END(p);
+ if (kstackleftmin > stackleft) {
+ kstackleftmin = stackleft;
+ if (stackleft < kstackleftthres)
+ printf("warning: kernel stack left %d bytes(pid %u)\n",
+ stackleft, p->p_pid);
+ }
+
+ if (dont_check)
+ return;
+
+ if (stackleft <= 0) {
+#ifndef KSTACK_SAFE
+ panic("magic on the top of kernel stack changed for pid %u: "
+ "maybe kernel stack overflow\n", p->p_pid);
+#else
+ printf("magic on the top of kernel stack changed for pid %u: "
+ "maybe kernel stack overflow\n", p->p_pid);
+
+ /*
+ * no more checks.
+ * if kernel stack once overflow, it's enough bad..
+ */
+ dont_check = 1;
+ printf("kernel stack magic check disabled\n");
+#if 0
+ /*
+ * re-setup magic
+ * XXX this will break stack more..
+ */
+ kstack_setup_magic(p);
+#endif
+#endif
+ }
+}
+#endif /*KSTACK_CHECK_MAGIC*/
+
#include <sys/syscallargs.h>
#include <uvm/uvm_extern.h>
@@ -376,6 +467,9 @@ fork1(struct proc *p1, int flags, int ex
stack, stacksize,
(func != NULL) ? func : child_return,
(arg != NULL) ? arg : p2);
+#ifdef KSTACK_CHECK_MAGIC
+ kstack_setup_magic(p2);
+#endif
/*
* BEGIN PID ALLOCATION.
Index: kern/kern_synch.c
===================================================================
RCS file: /cvs/cvsroot/syssrc/sys/kern/kern_synch.c,v
retrieving revision 1.108
diff -u -p -r1.108 kern_synch.c
--- kern/kern_synch.c 2002/05/21 01:38:27 1.108
+++ kern/kern_synch.c 2002/06/10 10:51:17
@@ -82,6 +82,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_synch.c
#include "opt_ddb.h"
#include "opt_ktrace.h"
+#include "opt_kstack.h"
#include "opt_lockdebug.h"
#include "opt_multiprocessor.h"
@@ -95,6 +96,11 @@ __KERNEL_RCSID(0, "$NetBSD: kern_synch.c
#include <sys/resourcevar.h>
#include <sys/sched.h>
+#ifdef KSTACK_CHECK_MAGIC
+/* XXX should be in proc.h or user.h */
+void kstack_check_magic(const struct proc *);
+#endif
+
#include <uvm/uvm_extern.h>
#ifdef KTRACE
@@ -847,6 +853,10 @@ mi_switch(struct proc *p)
* scheduling flags.
*/
spc->spc_flags &= ~SPCF_SWITCHCLEAR;
+
+#ifdef KSTACK_CHECK_MAGIC
+ kstack_check_magic(p);
+#endif
/*
* Pick a new current process and switch to it. When we
Index: sys/param.h
===================================================================
RCS file: /cvs/cvsroot/syssrc/sys/sys/param.h,v
retrieving revision 1.140
diff -u -p -r1.140 param.h
--- sys/param.h 2002/05/22 03:29:09 1.140
+++ sys/param.h 2002/06/10 10:51:17
@@ -277,6 +277,14 @@
#ifdef _KERNEL
/*
+ * kernel stack paramaters
+ */
+#ifndef SHAM_USPACE
+#define SHAM_USPACE USPACE
+#endif
+#define KSTACK_END(p) ((caddr_t)ALIGN((p)->p_addr + 1) + USPACE - SHAM_USPACE)
+
+/*
* macro to convert from milliseconds to hz without integer overflow
* Default version using only 32bits arithmetics.
* 64bit port can define 64bit version in their <machine/param.h>
Index: uvm/uvm_meter.c
===================================================================
RCS file: /cvs/cvsroot/syssrc/sys/uvm/uvm_meter.c,v
retrieving revision 1.23
diff -u -p -r1.23 uvm_meter.c
--- uvm/uvm_meter.c 2001/12/09 03:07:19 1.23
+++ uvm/uvm_meter.c 2002/06/10 10:51:18
@@ -215,7 +215,7 @@ uvm_sysctl(name, namelen, oldp, oldlenp,
return (sysctl_rdint(oldp, oldlenp, newp, maxslp));
case VM_USPACE:
- return (sysctl_rdint(oldp, oldlenp, newp, USPACE));
+ return (sysctl_rdint(oldp, oldlenp, newp, SHAM_USPACE));
default:
return (EOPNOTSUPP);
----Next_Part(Mon_Jun_10_21:47:20_2002_157)----