Subject: Pulling a fix from wrstuden-fixsa into NetBSD 4.0.1
To: None <tech-kern@netbsd.org>
From: Bill Stouder-Studenmund <wrstuden@netbsd.org>
List: tech-kern
Date: 09/28/2007 10:31:49
--E0h0CbphJD8hN+Gf
Content-Type: multipart/mixed; boundary="M2Pxvdb9QxnGd/3e"
Content-Disposition: inline
--M2Pxvdb9QxnGd/3e
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable
As some of you have noticed, I've been working on fixing some of the=20
issues with our SA implementation on the wrstuden-fixsa branch. This=20
branch is for fixing SA in context of NetBSD-4.
The hope is that I'll finish it all up in time for 4.1.
There is one fix however that I'd like everyone to think about for NetBSD=
=20
4.0.1. This is the fix that stops us from trying to allocate memory while=
=20
in tsleep.
The solution is to note that we never will have more than one "Blocked"=20
upcall per vp outstanding at any one time. Thus we can pre-allocate an=20
upcall event structure and stash it on the vp. When we block, we grab that=
=20
upcall event structure, set up the event, and go to sleep. When we go to=20
deliver an upcall to user space, before we free it, we check to see if our=
=20
vp is missing an upcall event structure. If it is, we just hang this event=
=20
structure off the vp instead of freeing.
Patch is attached.
I've tested this and it works fine. However I think it needs more testing=
=20
before I suggest pulling it into 4.0.1.
The main observable issue this change will fix is an occasional lost=20
wakeup. This would usually happen when the system's under memory pressure.=
=20
What happens is that we end up sleeping for memory, and whatever we wanted=
=20
to originally sleep for happens before we get the memeory. So we end up=20
waiting for an event that's already happened & thus wait forever.
Take care,
Bill
--M2Pxvdb9QxnGd/3e
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="fix-sa-4.0.diff.txt"
Content-Transfer-Encoding: quoted-printable
Index: src/sys/kern/kern_sa.c
diff -c src/sys/kern/kern_sa.c:1.87 src/sys/kern/kern_sa.c:1.87.4.1
*** src/sys/kern/kern_sa.c:1.87 Wed Nov 1 10:17:58 2006
--- src/sys/kern/kern_sa.c Thu May 17 22:53:05 2007
***************
*** 1,4 ****
! /* $NetBSD: kern_sa.c,v 1.87 2006/11/01 10:17:58 yamt Exp $ */
=20
/*-
* Copyright (c) 2001, 2004, 2005 The NetBSD Foundation, Inc.
--- 1,4 ----
! /* $NetBSD: kern_sa.c,v 1.87.4.1 2007/05/17 22:53:05 wrstuden Exp $ */
=20
/*-
* Copyright (c) 2001, 2004, 2005 The NetBSD Foundation, Inc.
***************
*** 40,46 ****
=20
#include "opt_ktrace.h"
#include "opt_multiprocessor.h"
! __KERNEL_RCSID(0, "$NetBSD: kern_sa.c,v 1.87 2006/11/01 10:17:58 yamt Exp=
$");
=20
#include <sys/param.h>
#include <sys/systm.h>
--- 40,46 ----
=20
#include "opt_ktrace.h"
#include "opt_multiprocessor.h"
! __KERNEL_RCSID(0, "$NetBSD: kern_sa.c,v 1.87.4.1 2007/05/17 22:53:05 wrst=
uden Exp $");
=20
#include <sys/param.h>
#include <sys/systm.h>
***************
*** 151,159 ****
--- 151,162 ----
sa_newsavp(struct sadata *sa)
{
struct sadata_vp *vp, *qvp;
+ struct sadata_upcall *sau;
=20
/* Allocate virtual processor data structure */
vp =3D pool_get(&savp_pool, PR_WAITOK);
+ /* And preallocate an upcall data structure for sleeping */
+ sau =3D sadata_upcall_alloc(1);
/* Initialize. */
memset(vp, 0, sizeof(*vp));
simple_lock_init(&vp->savp_lock);
***************
*** 163,168 ****
--- 166,172 ----
vp->savp_ofaultaddr =3D 0;
LIST_INIT(&vp->savp_lwpcache);
vp->savp_ncached =3D 0;
+ vp->savp_sleeper_upcall =3D sau;
SIMPLEQ_INIT(&vp->savp_upcalls);
=20
simple_lock(&sa->sa_lock);
***************
*** 269,274 ****
--- 273,282 ----
p->p_flag &=3D ~P_SA;
while ((vp =3D SLIST_FIRST(&p->p_sa->sa_vps)) !=3D NULL) {
SLIST_REMOVE_HEAD(&p->p_sa->sa_vps, savp_next);
+ if (vp->savp_sleeper_upcall) {
+ sadata_upcall_free(vp->savp_sleeper_upcall);
+ vp->savp_sleeper_upcall =3D NULL;
+ }
pool_put(&savp_pool, vp);
}
pool_put(&sadata_pool, sa);
***************
*** 504,509 ****
--- 512,518 ----
vaddr_t uaddr;
boolean_t inmem;
int addedconcurrency, error, s;
+ struct sadata_vp *vp;
=20
p =3D l->l_proc;
sa =3D p->p_sa;
***************
*** 527,543 ****
newlwp(l, p, uaddr, inmem, 0, NULL, 0,
child_return, 0, &l2);
l2->l_flag |=3D L_SA;
! l2->l_savp =3D sa_newsavp(sa);
! if (l2->l_savp) {
! l2->l_savp->savp_lwp =3D l2;
cpu_setfunc(l2, sa_switchcall, NULL);
error =3D sa_upcall(l2, SA_UPCALL_NEWPROC,
NULL, NULL, 0, NULL, NULL);
if (error) {
/* free new savp */
! SLIST_REMOVE(&sa->sa_vps, l2->l_savp,
sadata_vp, savp_next);
! pool_put(&savp_pool, l2->l_savp);
}
} else
error =3D 1;
--- 536,557 ----
newlwp(l, p, uaddr, inmem, 0, NULL, 0,
child_return, 0, &l2);
l2->l_flag |=3D L_SA;
! l2->l_savp =3D vp =3D sa_newsavp(sa);
! if (vp) {
! vp->savp_lwp =3D l2;
cpu_setfunc(l2, sa_switchcall, NULL);
error =3D sa_upcall(l2, SA_UPCALL_NEWPROC,
NULL, NULL, 0, NULL, NULL);
if (error) {
/* free new savp */
! SLIST_REMOVE(&sa->sa_vps, vp,
sadata_vp, savp_next);
! if (vp->savp_sleeper_upcall) {
! sadata_upcall_free(
! vp->savp_sleeper_upcall);
! vp->savp_sleeper_upcall =3D NULL;
! }
! pool_put(&savp_pool, vp);
}
} else
error =3D 1;
***************
*** 921,932 ****
*/
=20
void
! sa_switch(struct lwp *l, struct sadata_upcall *sau, int type)
{
struct proc *p =3D l->l_proc;
struct sadata_vp *vp =3D l->l_savp;
struct lwp *l2;
- struct sadata_upcall *freesau =3D NULL;
int s;
=20
DPRINTFN(4,("sa_switch(%d.%d type %d VP %d)\n", p->p_pid, l->l_lid,
--- 935,946 ----
*/
=20
void
! sa_switch(struct lwp *l, int type)
{
struct proc *p =3D l->l_proc;
struct sadata_vp *vp =3D l->l_savp;
+ struct sadata_upcall *sau =3D NULL;
struct lwp *l2;
int s;
=20
DPRINTFN(4,("sa_switch(%d.%d type %d VP %d)\n", p->p_pid, l->l_lid,
***************
*** 936,942 ****
=20
if (p->p_flag & P_WEXIT) {
mi_switch(l, NULL);
- sadata_upcall_free(sau);
return;
}
=20
--- 950,955 ----
***************
*** 956,962 ****
s =3D splsched();
SCHED_UNLOCK(s);
}
- sadata_upcall_free(sau);
return;
} else if (vp->savp_lwp =3D=3D l) {
/*
--- 969,974 ----
***************
*** 964,975 ****
--- 976,993 ----
* a SA_BLOCKED upcall and allocate resources for the
* UNBLOCKED upcall.
*/
+ if (vp->savp_sleeper_upcall) {
+ sau =3D vp->savp_sleeper_upcall;
+ vp->savp_sleeper_upcall =3D NULL;
+ }
=20
if (sau =3D=3D NULL) {
#ifdef DIAGNOSTIC
printf("sa_switch(%d.%d): no upcall data.\n",
p->p_pid, l->l_lid);
#endif
+ panic("Oops! Don't have a sleeper!\n");
+ /* XXXWRS Shouldn't we just kill the app here? */
mi_switch(l, NULL);
return;
}
***************
*** 1016,1022 ****
cpu_setfunc(l2, sa_switchcall, NULL);
sa_putcachelwp(p, l2); /* PHOLD from sa_getcachelwp */
mi_switch(l, NULL);
! sadata_upcall_free(sau);
DPRINTFN(10,("sa_switch(%d.%d) page fault resolved\n",
p->p_pid, l->l_lid));
if (vp->savp_faultaddr =3D=3D vp->savp_ofaultaddr)
--- 1034,1047 ----
cpu_setfunc(l2, sa_switchcall, NULL);
sa_putcachelwp(p, l2); /* PHOLD from sa_getcachelwp */
mi_switch(l, NULL);
! /*
! * WRS Not sure how vp->savp_sleeper_upcall !=3D NULL
! * but be careful none the less
! */
! if (vp->savp_sleeper_upcall =3D=3D NULL)
! vp->savp_sleeper_upcall =3D sau;
! else
! sadata_upcall_free(sau);
DPRINTFN(10,("sa_switch(%d.%d) page fault resolved\n",
p->p_pid, l->l_lid));
if (vp->savp_faultaddr =3D=3D vp->savp_ofaultaddr)
***************
*** 1044,1050 ****
* SA_UNBLOCKED upcall (select and poll cause this
* kind of behavior a lot).
*/
- freesau =3D sau;
l2 =3D NULL;
} else {
/* NOTREACHED */
--- 1069,1074 ----
***************
*** 1054,1060 ****
DPRINTFN(4,("sa_switch(%d.%d) switching to LWP %d.\n",
p->p_pid, l->l_lid, l2 ? l2->l_lid : 0));
mi_switch(l, l2);
- sadata_upcall_free(freesau);
DPRINTFN(4,("sa_switch(%d.%d flag %x) returned.\n",
p->p_pid, l->l_lid, l->l_flag));
KDASSERT(l->l_wchan =3D=3D 0);
--- 1078,1083 ----
***************
*** 1062,1067 ****
--- 1085,1098 ----
SCHED_ASSERT_UNLOCKED();
}
=20
+ /*
+ * sa_switchcall
+ *
+ * We need to pass an upcall to userland. We are now
+ * running on a spare stack and need to allocate a new
+ * one. Also, if we are passed an sa upcall, we need to dispatch
+ * it to the app.
+ */
static void
sa_switchcall(void *arg)
{
***************
*** 1100,1110 ****
SIMPLEQ_INSERT_TAIL(&vp->savp_upcalls, sau, sau_next);
l2->l_flag |=3D L_SA_UPCALL;
} else {
#ifdef DIAGNOSTIC
printf("sa_switchcall(%d.%d flag %x): Not enough stacks.\n",
p->p_pid, l->l_lid, l->l_flag);
#endif
! sadata_upcall_free(sau);
PHOLD(l2);
SCHED_LOCK(s);
sa_putcachelwp(p, l2); /* sets L_SA */
--- 1131,1156 ----
SIMPLEQ_INSERT_TAIL(&vp->savp_upcalls, sau, sau_next);
l2->l_flag |=3D L_SA_UPCALL;
} else {
+ /*
+ * Oops! We're in trouble. The app hasn't
+ * passeed us in any stacks on which to deliver
+ * the upcall.
+ *
+ * WRS: I think this code is wrong. If we can't
+ * get a stack, we are dead. We either need
+ * to block waiting for one (assuming there's a
+ * live vp still in userland so it can hand back
+ * stacks, or we should just kill the process
+ * as we're deadlocked.
+ */
#ifdef DIAGNOSTIC
printf("sa_switchcall(%d.%d flag %x): Not enough stacks.\n",
p->p_pid, l->l_lid, l->l_flag);
#endif
! if (vp->savp_sleeper_upcall =3D=3D NULL)
! vp->savp_sleeper_upcall =3D sau;
! else
! sadata_upcall_free(sau);
PHOLD(l2);
SCHED_LOCK(s);
sa_putcachelwp(p, l2); /* sets L_SA */
***************
*** 1585,1591 ****
}
type =3D sau->sau_type;
=20
! sadata_upcall_free(sau);
=20
DPRINTFN(7,("sa_makeupcalls(%d.%d): type %d\n", p->p_pid,
l->l_lid, type));
--- 1631,1640 ----
}
type =3D sau->sau_type;
=20
! if (vp->savp_sleeper_upcall =3D=3D NULL)
! vp->savp_sleeper_upcall =3D sau;
! else
! sadata_upcall_free(sau);
=20
DPRINTFN(7,("sa_makeupcalls(%d.%d): type %d\n", p->p_pid,
l->l_lid, type));
Index: src/sys/kern/kern_synch.c
diff -c src/sys/kern/kern_synch.c:1.173 src/sys/kern/kern_synch.c:1.173.4.1
*** src/sys/kern/kern_synch.c:1.173 Fri Nov 3 20:46:00 2006
--- src/sys/kern/kern_synch.c Thu May 17 22:53:05 2007
***************
*** 1,4 ****
! /* $NetBSD: kern_synch.c,v 1.173 2006/11/03 20:46:00 ad Exp $ */
=20
/*-
* Copyright (c) 1999, 2000, 2004 The NetBSD Foundation, Inc.
--- 1,4 ----
! /* $NetBSD: kern_synch.c,v 1.173.4.1 2007/05/17 22:53:05 wrstuden Exp $ */
=20
/*-
* Copyright (c) 1999, 2000, 2004 The NetBSD Foundation, Inc.
***************
*** 76,82 ****
*/
=20
#include <sys/cdefs.h>
! __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.173 2006/11/03 20:46:00 ad E=
xp $");
=20
#include "opt_ddb.h"
#include "opt_ktrace.h"
--- 76,82 ----
*/
=20
#include <sys/cdefs.h>
! __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.173.4.1 2007/05/17 22:53:05 =
wrstuden Exp $");
=20
#include "opt_ddb.h"
#include "opt_ktrace.h"
***************
*** 445,451 ****
struct lwp *l =3D curlwp;
struct proc *p =3D l ? l->l_proc : NULL;
struct slpque *qp;
- struct sadata_upcall *sau;
int sig, s;
int catch =3D priority & PCATCH;
int relock =3D (priority & PNORELOCK) =3D=3D 0;
--- 445,450 ----
***************
*** 482,499 ****
ktrcsw(l, 1, 0);
#endif
=20
- /*
- * XXX We need to allocate the sadata_upcall structure here,
- * XXX since we can't sleep while waiting for memory inside
- * XXX sa_upcall(). It would be nice if we could safely
- * XXX allocate the sadata_upcall structure on the stack, here.
- */
- if (l->l_flag & L_SA) {
- sau =3D sadata_upcall_alloc(0);
- } else {
- sau =3D NULL;
- }
-=20
SCHED_LOCK(s);
=20
#ifdef DIAGNOSTIC
--- 481,486 ----
***************
*** 567,573 ****
p->p_stats->p_ru.ru_nvcsw++;
SCHED_ASSERT_LOCKED();
if (l->l_flag & L_SA)
! sa_switch(l, sau, SA_UPCALL_BLOCKED);
else
mi_switch(l, NULL);
=20
--- 554,560 ----
p->p_stats->p_ru.ru_nvcsw++;
SCHED_ASSERT_LOCKED();
if (l->l_flag & L_SA)
! sa_switch(l, SA_UPCALL_BLOCKED);
else
mi_switch(l, NULL);
=20
Index: src/sys/sys/savar.h
diff -c src/sys/sys/savar.h:1.20 src/sys/sys/savar.h:1.20.10.1
*** src/sys/sys/savar.h:1.20 Sun Jun 25 08:12:54 2006
--- src/sys/sys/savar.h Thu May 17 22:53:07 2007
***************
*** 1,4 ****
! /* $NetBSD: savar.h,v 1.20 2006/06/25 08:12:54 yamt Exp $ */
=20
/*-
* Copyright (c) 2001 The NetBSD Foundation, Inc.
--- 1,4 ----
! /* $NetBSD: savar.h,v 1.20.10.1 2007/05/17 22:53:07 wrstuden Exp $ */
=20
/*-
* Copyright (c) 2001 The NetBSD Foundation, Inc.
***************
*** 110,115 ****
--- 110,116 ----
vaddr_t savp_ofaultaddr; /* old page fault address */
LIST_HEAD(, lwp) savp_lwpcache; /* list of available lwps */
int savp_ncached; /* list length */
+ struct sadata_upcall *savp_sleeper_upcall; /* cached upcall data */
SIMPLEQ_HEAD(, sadata_upcall) savp_upcalls; /* pending upcalls */
};
=20
***************
*** 134,140 ****
void sadata_upcall_free(struct sadata_upcall *);
=20
void sa_release(struct proc *);
! void sa_switch(struct lwp *, struct sadata_upcall *, int);
void sa_preempt(struct lwp *);
void sa_yield(struct lwp *);
int sa_upcall(struct lwp *, int, struct lwp *, struct lwp *, size_t, void=
*,
--- 135,141 ----
void sadata_upcall_free(struct sadata_upcall *);
=20
void sa_release(struct proc *);
! void sa_switch(struct lwp *, int);
void sa_preempt(struct lwp *);
void sa_yield(struct lwp *);
int sa_upcall(struct lwp *, int, struct lwp *, struct lwp *, size_t, void=
*,
--M2Pxvdb9QxnGd/3e--
--E0h0CbphJD8hN+Gf
Content-Type: application/pgp-signature
Content-Disposition: inline
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.7 (NetBSD)
iD8DBQFG/TqEWz+3JHUci9cRAs7ZAJ9kPvyiZRYVY+LgRJJhHpV5s23rggCeOWai
ksiEXf7L0Ouz94ldLyaVl+Y=
=FxPB
-----END PGP SIGNATURE-----
--E0h0CbphJD8hN+Gf--