Subject: kern/36969: locking cleanup patches for limit structures
To: None <kern-bug-people@netbsd.org, gnats-admin@netbsd.org,>
From: None <dholland@eecs.harvard.edu>
List: netbsd-bugs
Date: 09/11/2007 20:45:00
>Number: 36969
>Category: kern
>Synopsis: locking cleanup patches for limit structures
>Confidential: no
>Severity: serious
>Priority: medium
>Responsible: kern-bug-people
>State: open
>Class: sw-bug
>Submitter-Id: net
>Arrival-Date: Tue Sep 11 20:45:00 +0000 2007
>Originator: David A. Holland <dholland@eecs.harvard.edu>
>Release: NetBSD 4.99.30 (20070910)
>Organization:
Harvard EECS
>Environment:
System: NetBSD tanaqui 4.99.30 NetBSD 4.99.30 (TANAQUI) #18: Tue Sep 4 19:02:21 EDT 2007 dholland@tanaqui:/usr/src/sys/arch/i386/compile/TANAQUI i386
Architecture: i386
Machine: i386
>Description:
I have, occasionally, been getting this DIAGNOSTIC panic:
panic: pool_get(plimitpl): free list modified: magic=deadbef4; page [...]; item addr [...]
(This then almost invariably leads to a deadlock trying to sync.)
Upon investigation, it turns out that the locking for the plimit
objects held in plimitpl is bodgy and inconsistent. I have reworked
it; patches follow.
>How-To-Repeat:
The panic is not readily repeatable. One may, however, read the code.
>Fix:
This patch is against -current of 20070910.
I haven't tested the compat_irix bit. However, as the currently
existing code there misuses p_refcnt and will panic, I doubt I've made
it any worse.
Index: sys/compat/irix/irix_prctl.c
===================================================================
RCS file: /cvsroot/src/sys/compat/irix/irix_prctl.c,v
retrieving revision 1.37
diff -u -p -r1.37 irix_prctl.c
--- sys/compat/irix/irix_prctl.c 6 Mar 2007 12:43:09 -0000 1.37
+++ sys/compat/irix/irix_prctl.c 11 Sep 2007 01:31:14 -0000
@@ -498,10 +498,8 @@ irix_sproc_child(isc)
*/
if (inh & IRIX_PR_SULIMIT) {
pl = p2->p_limit;
- parent->p_limit->p_refcnt++;
- p2->p_limit = parent->p_limit;
- if(--pl->p_refcnt == 0)
- limfree(pl);
+ p2->p_limit = limshare(parent);
+ limfree(pl);
}
/*
Index: sys/compat/netbsd32/netbsd32_netbsd.c
===================================================================
RCS file: /cvsroot/src/sys/compat/netbsd32/netbsd32_netbsd.c,v
retrieving revision 1.126
diff -u -p -r1.126 netbsd32_netbsd.c
--- sys/compat/netbsd32/netbsd32_netbsd.c 15 Aug 2007 12:07:31 -0000 1.126
+++ sys/compat/netbsd32/netbsd32_netbsd.c 11 Sep 2007 01:31:25 -0000
@@ -2298,13 +2298,7 @@ netbsd32_adjust_limits(struct proc *p)
return;
}
- if (p->p_limit->p_refcnt > 1 &&
- (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
- struct plimit *oldplim;
- oldplim = p->p_limit;
- p->p_limit = limcopy(p);
- limfree(oldplim);
- }
+ limprivatize(p);
for (i = 0; i < __arraycount(val); i++)
p->p_rlimit[lm[i].id] = val[i];
Index: sys/kern/kern_acct.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_acct.c,v
retrieving revision 1.75
diff -u -p -r1.75 kern_acct.c
--- sys/kern/kern_acct.c 9 Jul 2007 21:10:51 -0000 1.75
+++ sys/kern/kern_acct.c 11 Sep 2007 01:34:47 -0000
@@ -387,7 +387,6 @@ acct_process(struct lwp *l)
struct timeval ut, st, tmp;
struct rusage *r;
int t, error = 0;
- struct plimit *oplim = NULL;
struct proc *p = l->l_proc;
mutex_enter(&acct_lock);
@@ -403,10 +402,7 @@ acct_process(struct lwp *l)
* XXX We should think about the CPU limit, too.
*/
mutex_enter(&p->p_mutex);
- if (p->p_limit->p_refcnt > 1) {
- oplim = p->p_limit;
- p->p_limit = limcopy(p);
- }
+ limprivatize(p, 1 /* even when sharing changes */);
p->p_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
mutex_exit(&p->p_mutex);
@@ -467,13 +463,6 @@ acct_process(struct lwp *l)
if (error != 0)
log(LOG_ERR, "Accounting: write failed %d\n", error);
- if (oplim) {
- mutex_enter(&p->p_mutex);
- limfree(p->p_limit);
- p->p_limit = oplim;
- mutex_exit(&p->p_mutex);
- }
-
out:
mutex_exit(&acct_lock);
return (error);
Index: sys/kern/kern_core.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_core.c,v
retrieving revision 1.5
diff -u -p -r1.5 kern_core.c
--- sys/kern/kern_core.c 3 Apr 2007 16:11:31 -0000 1.5
+++ sys/kern/kern_core.c 11 Sep 2007 01:34:47 -0000
@@ -141,12 +141,15 @@ coredump(struct lwp *l, const char *patt
if ((p->p_flag & PK_SUGID) && security_setidcore_dump)
pattern = security_setidcore_path;
+ /* Could avoid taking p_limit->p_lock if we don't use pl_corename */
+ mutex_enter(&p->p_limit->p_lock);
if (pattern == NULL)
pattern = p->p_limit->pl_corename;
if (name == NULL) {
name = PNBUF_GET();
}
error = coredump_buildname(p, name, pattern, MAXPATHLEN);
+ mutex_exit(&p->p_limit->p_lock);
mutex_exit(&p->p_mutex);
mutex_exit(&proclist_lock);
if (error)
Index: sys/kern/kern_fork.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_fork.c,v
retrieving revision 1.142
diff -u -p -r1.142 kern_fork.c
--- sys/kern/kern_fork.c 15 Aug 2007 12:07:33 -0000 1.142
+++ sys/kern/kern_fork.c 11 Sep 2007 01:34:48 -0000
@@ -352,21 +352,13 @@ fork1(struct lwp *l1, int flags, int exi
p2->p_cwdi = cwdinit(p1);
/*
- * If p_limit is still copy-on-write, bump refcnt,
- * otherwise get a copy that won't be modified.
- * (If PL_SHAREMOD is clear, the structure is shared
- * copy-on-write.)
- */
- if (p1->p_limit->p_lflags & PL_SHAREMOD) {
- mutex_enter(&p1->p_mutex);
- p2->p_limit = limcopy(p1);
- mutex_exit(&p1->p_mutex);
- } else {
- mutex_enter(&p1->p_limit->p_lock);
- p1->p_limit->p_refcnt++;
- mutex_exit(&p1->p_limit->p_lock);
- p2->p_limit = p1->p_limit;
- }
+ * Share p_limit copy-on-write.
+ * (If flags grows a FORK_SHARELIMIT for compat_irix, use limshare
+ * for that instead of limsharecow.)
+ */
+ mutex_enter(&p1->p_mutex);
+ p2->p_limit = limsharecow(p1);
+ mutex_exit(&p1->p_mutex);
p2->p_sflag = ((flags & FORK_PPWAIT) ? PS_PPWAIT : 0);
p2->p_lflag = 0;
Index: sys/kern/kern_proc.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_proc.c,v
retrieving revision 1.115
diff -u -p -r1.115 kern_proc.c
--- sys/kern/kern_proc.c 6 Sep 2007 23:58:57 -0000 1.115
+++ sys/kern/kern_proc.c 11 Sep 2007 01:34:49 -0000
@@ -1325,13 +1325,12 @@ proc_crmod_enter(void)
/* Reset what needs to be reset in plimit. */
lim = p->p_limit;
+ mutex_enter(&lim->p_lock);
if (lim->pl_corename != defcorename) {
- if (lim->p_refcnt > 1 &&
- (lim->p_lflags & PL_SHAREMOD) == 0) {
- p->p_limit = limcopy(p);
- limfree(lim);
- lim = p->p_limit;
- }
+ mutex_exit(&lim->p_lock);
+ limprivatize(p, 0);
+ /* refetch lim, it might have changed */
+ lim = p->p_limit;
mutex_enter(&lim->p_lock);
cn = lim->pl_corename;
lim->pl_corename = defcorename;
@@ -1339,6 +1338,9 @@ proc_crmod_enter(void)
if (cn != defcorename)
free(cn, M_TEMP);
}
+ else {
+ mutex_exit(&lim->p_lock);
+ }
}
/*
Index: sys/kern/kern_resource.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_resource.c,v
retrieving revision 1.120
diff -u -p -r1.120 kern_resource.c
--- sys/kern/kern_resource.c 6 Sep 2007 02:03:06 -0000 1.120
+++ sys/kern/kern_resource.c 11 Sep 2007 01:34:49 -0000
@@ -256,7 +256,6 @@ int
dosetrlimit(struct lwp *l, struct proc *p, int which, struct rlimit *limp)
{
struct rlimit *alimp;
- struct plimit *oldplim;
int error;
if ((u_int)which >= RLIM_NLIMITS)
@@ -266,7 +265,7 @@ dosetrlimit(struct lwp *l, struct proc *
return (EINVAL);
alimp = &p->p_rlimit[which];
- /* if we don't change the value, no need to limcopy() */
+ /* if we don't change the value, no need to limprivatize() */
if (limp->rlim_cur == alimp->rlim_cur &&
limp->rlim_max == alimp->rlim_max)
return 0;
@@ -284,13 +283,9 @@ dosetrlimit(struct lwp *l, struct proc *
return (error);
mutex_enter(&p->p_mutex);
- if (p->p_limit->p_refcnt > 1 &&
- (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
- oldplim = p->p_limit;
- p->p_limit = limcopy(p);
- limfree(oldplim);
- alimp = &p->p_rlimit[which];
- }
+ limprivatize(p, 0);
+ /* refetch this in case it's moved */
+ alimp = &p->p_rlimit[which];
switch (which) {
@@ -527,63 +522,201 @@ ruadd(struct rusage *ru, struct rusage *
}
/*
- * Make a copy of the plimit structure.
- * We share these structures copy-on-write after fork,
- * and copy when a limit is changed.
+ * Make a copy of the target process's plimit structure.
+ *
+ * Must hold p->p_mutex, but *not* p->p_limit->p_lock.
*
- * XXXSMP This is atrocious, need to simplify.
+ * We can't hold either of these locks while mallocing, so we have to
+ * do gross things. (The only reason we have p at all is so we can
+ * drop and reacquire p->p_mutex.)
+ *
+ * Note that p is not necessarily curproc.
*/
-struct plimit *
+static struct plimit *
limcopy(struct proc *p)
{
- struct plimit *lim, *newlim;
- char *corename;
- size_t l;
+ struct plimit *oldlim;
+ struct plimit *newlim;
+ char *newcorename;
+ size_t newcorelen;
+ size_t needlen;
KASSERT(mutex_owned(&p->p_mutex));
- mutex_exit(&p->p_mutex);
- newlim = pool_get(&plimit_pool, PR_WAITOK);
+ oldlim = NULL;
+ newlim = NULL;
+ newcorename = NULL;
+ newcorelen = 0;
+
+ for (;;) {
+ /* Figure out what we're copying */
+ oldlim = p->p_limit;
+ mutex_enter(&oldlim->p_lock);
+
+ /* Work out what we need */
+ if (oldlim->pl_corename == defcorename) {
+ needlen = 0;
+ }
+ else {
+ needlen = strlen(oldlim->pl_corename) + 1;
+ }
+
+ /* If we have what we need, stop */
+ if (newlim != NULL && newcorelen == needlen) {
+ break;
+ }
+
+ /* Otherwise, try to get it */
+ mutex_exit(&oldlim->p_lock);
+ mutex_exit(&p->p_mutex);
+
+ if (newlim == NULL) {
+ newlim = pool_get(&plimit_pool, PR_WAITOK);
+ }
+
+ if (newcorename) {
+ free(newcorename, M_TEMP);
+ }
+ if (needlen > 0) {
+ newcorename = malloc(needlen, M_TEMP, M_WAITOK);
+ }
+ else {
+ newcorename = NULL;
+ }
+ newcorelen = needlen;
+
+ mutex_enter(&p->p_mutex);
+ }
+
mutex_init(&newlim->p_lock, MUTEX_DEFAULT, IPL_NONE);
newlim->p_lflags = 0;
newlim->p_refcnt = 1;
- mutex_enter(&p->p_mutex);
+ memcpy(newlim->pl_rlimit, oldlim->pl_rlimit,
+ sizeof(struct rlimit) * RLIM_NLIMITS);
+ if (newcorelen != 0) {
+ strlcpy(newcorename, oldlim->pl_corename, newcorelen);
+ newlim->pl_corename = newcorename;
+ }
+ else {
+ newlim->pl_corename = defcorename;
+ }
- for (;;) {
- lim = p->p_limit;
- mutex_enter(&lim->p_lock);
- if (lim->pl_corename != defcorename) {
- l = strlen(lim->pl_corename) + 1;
+ mutex_exit(&oldlim->p_lock);
- mutex_exit(&lim->p_lock);
- mutex_exit(&p->p_mutex);
- corename = malloc(l, M_TEMP, M_WAITOK);
- mutex_enter(&p->p_mutex);
- mutex_enter(&lim->p_lock);
-
- if (l != strlen(lim->pl_corename) + 1) {
- mutex_exit(&lim->p_lock);
- mutex_exit(&p->p_mutex);
- free(corename, M_TEMP);
- mutex_enter(&p->p_mutex);
- continue;
- }
- } else
- l = 0;
-
- memcpy(newlim->pl_rlimit, lim->pl_rlimit,
- sizeof(struct rlimit) * RLIM_NLIMITS);
- if (l != 0)
- strlcpy(newlim->pl_corename, lim->pl_corename, l);
- else
- newlim->pl_corename = defcorename;
- mutex_exit(&lim->p_lock);
- break;
+ return (newlim);
+}
+
+/*
+ * Share a copy of p->p_limit, including future changes.
+ *
+ * Must hold p->p_mutex.
+ *
+ * Return the new reference.
+ */
+struct plimit *
+limshare(struct proc *p)
+{
+ struct plimit *lim;
+
+ KASSERT(mutex_owned(&p->p_mutex));
+
+ /* Must not affect anyone who has p->p_limit shared copy-on-write. */
+ limprivatize(p, 0);
+
+ lim = p->p_limit;
+ mutex_enter(&lim->p_lock);
+ lim->p_refcnt++;
+ lim->p_lflags |= PL_SHAREMOD;
+ mutex_exit(&lim->p_lock);
+
+ return lim;
+}
+
+/*
+ * Make a logically new plimit from p->p_limit.
+ *
+ * If the plimit structure is being shared for changes (PL_SHAREMOD
+ * set), copy it. Otherwise, share it. (If PL_SHAREMOD is not set,
+ * sharing is copy on write.)
+ *
+ * Must hold p->p_mutex.
+ *
+ * Return the new reference.
+ */
+struct plimit *
+limsharecow(struct proc *p)
+{
+ struct plimit *oldlim, *newlim;
+
+ KASSERT(mutex_owned(&p->p_mutex));
+
+ /*
+ * Because limcopy drops locks, p->p_limit might change under
+ * us. limcopy always copies the latest p_limit, however, so
+ * even if PL_SHAREMOD ceases to be set (which shouldn't
+ * happen anyhow) at worst we end up doing an unnecessary
+ * copy. Note: reload oldlim after limcopy if it becomes needed.
+ */
+
+ oldlim = p->p_limit;
+ mutex_enter(&oldlim->p_lock);
+ if (oldlim->p_lflags & PL_SHAREMOD) {
+ mutex_exit(&oldlim->p_lock);
+ newlim = limcopy(p);
+ }
+ else {
+ oldlim->p_refcnt++;
+ mutex_exit(&oldlim->p_lock);
+ newlim = oldlim;
}
- return (newlim);
+ return newlim;
+}
+
+/*
+ * Give the (current) proc its own private plimit structure.
+ *
+ * We share these structures copy-on-write after fork, and copy when a
+ * limit is changed.
+ *
+ * Must hold p->p_mutex and *not* p->p_limit->p_lock.
+ */
+void
+limprivatize(struct proc *p, int unshare)
+{
+ struct plimit *oldlim, *newlim;
+
+ KASSERT(mutex_owned(&p->p_mutex));
+
+ /*
+ * Because limcopy drops locks, someone else might call
+ * limprivatize behind its back. limcopy always copies the
+ * latest p->p_limit, as protected by p->p_mutex and
+ * p->p_limit->p_lock; so as long as we're careful not to hold
+ * onto a copy of an older p->p_limit, the worst that can
+ * happen is that we make an unnecessary copy.
+ */
+
+ oldlim = p->p_limit;
+ mutex_enter(&oldlim->p_lock);
+ if (oldlim->p_refcnt > 1 &&
+ (unshare || (oldlim->p_lflags & PL_SHAREMOD)==0)) {
+ mutex_exit(&oldlim->p_lock);
+
+ newlim = limcopy(p);
+ /* refetch oldlim; it might have changed */
+ oldlim = p->p_limit;
+ p->p_limit = newlim;
+ limfree(oldlim);
+ }
+ else {
+ mutex_exit(&oldlim->p_lock);
+ }
}
+/*
+ * decref (and optionally release) a plimit.
+ */
void
limfree(struct plimit *lim)
{
@@ -598,6 +731,9 @@ limfree(struct plimit *lim)
if (n < 0)
panic("limfree");
#endif
+ KASSERT(lim != &limit0);
+
+ /* no further locking necessary as we have the only ref */
if (lim->pl_corename != defcorename)
free(lim->pl_corename, M_TEMP);
mutex_destroy(&lim->p_lock);
@@ -694,7 +830,9 @@ sysctl_proc_corename(SYSCTLFN_ARGS)
* let them modify a temporary copy of the core name
*/
node = *rnode;
+ mutex_enter(&ptmp->p_limit->p_lock);
strlcpy(cname, ptmp->p_limit->pl_corename, MAXPATHLEN);
+ mutex_exit(&ptmp->p_limit->p_lock);
node.sysctl_data = cname;
error = sysctl_lookup(SYSCTLFN_CALL(&node));
@@ -702,10 +840,13 @@ sysctl_proc_corename(SYSCTLFN_ARGS)
* if that failed, or they have nothing new to say, or we've
* heard it before...
*/
+ mutex_enter(&ptmp->p_limit->p_lock);
if (error || newp == NULL ||
strcmp(cname, ptmp->p_limit->pl_corename) == 0) {
+ mutex_exit(&ptmp->p_limit->p_lock);
goto done;
}
+ mutex_exit(&ptmp->p_limit->p_lock);
error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CORENAME,
ptmp, cname, NULL, NULL);
@@ -740,15 +881,13 @@ sysctl_proc_corename(SYSCTLFN_ARGS)
strlcpy(tmp, cname, len + 1);
mutex_enter(&ptmp->p_mutex);
+ limprivatize(ptmp, 0);
lim = ptmp->p_limit;
- if (lim->p_refcnt > 1 && (lim->p_lflags & PL_SHAREMOD) == 0) {
- ptmp->p_limit = limcopy(ptmp);
- limfree(lim);
- lim = ptmp->p_limit;
- }
+ mutex_enter(&lim->p_lock);
if (lim->pl_corename != defcorename)
free(lim->pl_corename, M_TEMP);
lim->pl_corename = tmp;
+ mutex_exit(&lim->p_lock);
mutex_exit(&ptmp->p_mutex);
done:
PNBUF_PUT(cname);
Index: sys/sys/resourcevar.h
===================================================================
RCS file: /cvsroot/src/sys/sys/resourcevar.h,v
retrieving revision 1.38
diff -u -p -r1.38 resourcevar.h
--- sys/sys/resourcevar.h 12 Jul 2007 11:05:42 -0000 1.38
+++ sys/sys/resourcevar.h 11 Sep 2007 01:35:28 -0000
@@ -67,6 +67,9 @@ struct pstats {
* ("threads") share modifications, the PL_SHAREMOD flag is set,
* and a copy must be made for the child of a new fork that isn't
* sharing modifications to the limits.
+ *
+ * Because of this sharing, all members are locked by p_lock and never
+ * struct proc's p_mutex.
*/
struct plimit {
struct rlimit pl_rlimit[RLIM_NLIMITS];
@@ -121,7 +124,9 @@ void addupc_intr(struct lwp *, u_long);
void addupc_task(struct lwp *, u_long, u_int);
void calcru(struct proc *, struct timeval *, struct timeval *,
struct timeval *, struct timeval *);
-struct plimit *limcopy(struct proc *);
+struct plimit *limshare(struct proc *);
+struct plimit *limsharecow(struct proc *);
+void limprivatize(struct proc *, int);
void limfree(struct plimit *);
void ruadd(struct rusage *, struct rusage *);
struct pstats *pstatscopy(struct pstats *);