tech-kern archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: Help for PR kern/46606 is needed
In article <6eefbae491842ca53ddd56690ba31ef3%mail.marples.name@localhost>,
Roy Marples <roy%marples.name@localhost> wrote:
>-=-=-=-=-=-
>
>Hi
>
>On 25/11/2013 10:33, Ryo ONODERA wrote:
>> pulseaudio needs pkgsrc/sysutils/hal, and running hal causes
>> PR kern/46606 kernel panic when the NetBSD system is shutdown.
>> See http://gnats.netbsd.org/46606 (and duplicated bug
>> http://gnats.netbsd.org/47012 ).
>>
>> How to debug this problem?
>> This problem is observed even on NetBSD/amd64 6.99.27
>> of Tue Nov 19 06:16:01 JST 2013.
>
>I have had this for months on i386.
>I am running the attached patch from christos@ which stops the crash,
>but probably Does Bad Things.
>
>The actual error seems to be when hal starts, but only causes a problem
>when hal stops.
>This happens for you at shutdown, because hal is stopped then.
>
>Here's the output from a run:
I think I understand what's going on finally.
>1st start
Creation:
>/usr/src/sys/kern/kern_lwp.c,731: hald[2231]: [uid=0] (0/1)
Setuid:
>/usr/src/sys/kern/kern_prot.c,357: hald[2231]: [uid=0] (1/-1)
>/usr/src/sys/kern/kern_prot.c,361: hald[2231]: [uid=1005] (0/1)
>/usr/src/sys/kern/kern_prot.c,381: hald[2231]: [uid=1005] (1/0)
Setuid after using p->p_cred for the uid of the process:
>/usr/src/sys/kern/kern_prot.c,386: hald[2231]: [uid=1005] (1/0)
>/usr/src/sys/kern/kern_lwp.c,731: hald-runner[1965]: [uid=0] (0/1)
>
>1st stop
Destruction:
XXX: This is using l->l_cred to find the uid of the lwp. This is still
pointing to root?!?!? Didn't we setuid just above to 1005? All creds
of hald should be pointing to 1005, yet this lwp cred is still pointing to
root.
>/usr/src/sys/kern/kern_lwp.c,1128: hald[2231]: [uid=0] (1/-1)
Now root has one cred missing! So when hald-runner dies we end up:
>/usr/src/sys/kern/kern_lwp.c,1128: hald-runner[1965]: [uid=0] (0/-1)
With lwp count == -1 as you can see below. We should have crashed now,
but my patch comments out the KASSERT!
Too late now, the damage has been done.
>
>2nd start
>/usr/src/sys/kern/kern_lwp.c,731: hald[1209]: [uid=0] (4294967295/1)
>/usr/src/sys/kern/kern_prot.c,357: hald[1209]: [uid=0] (0/-1)
>/usr/src/sys/kern/kern_prot.c,361: hald[1209]: [uid=1005] (1/1)
>/usr/src/sys/kern/kern_prot.c,381: hald[1209]: [uid=1005] (2/0)
>/usr/src/sys/kern/kern_prot.c,386: hald[1209]: [uid=1005] (2/0)
>/usr/src/sys/kern/kern_lwp.c,731: hald-runner[738]: [uid=0]
>(4294967295/1)
>
>2nd stop
>/usr/src/sys/kern/kern_lwp.c,1128: hald[1209]: [uid=1005] (2/-1)
>/usr/src/sys/kern/kern_lwp.c,1128: hald-runner[738]: [uid=0] (0/-1)
>
>Thanks
>
>Roy
>-=-=-=-=-=-
>Index: sys/kern/kern_lwp.c
>===================================================================
>RCS file: /cvsroot/src/sys/kern/kern_lwp.c,v
>retrieving revision 1.175
>diff -u -p -r1.175 kern_lwp.c
>--- sys/kern/kern_lwp.c 9 Jun 2013 01:13:47 -0000 1.175
>+++ sys/kern/kern_lwp.c 25 Nov 2013 14:31:20 -0000
>@@ -781,6 +781,12 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_
> */
> if (p2->p_nlwps != 0 && p2 != &proc0) {
> uid_t uid = kauth_cred_getuid(l1->l_cred);
>+ if (strncmp(p2->p_comm, "hald", 4) == 0) {
>+ struct uidinfo *uip = uid_find(uid);
>+ printf("%s,%d: %s[%d]: [uid=%d] (%lu/%d)\n", __FILE__,
>+ __LINE__, p2->p_comm, (int)p2->p_pid, (int)uid,
>+ uip->ui_lwpcnt, 1);
>+ }
> int count = chglwpcnt(uid, 1);
> if (__predict_false(count >
> p2->p_rlimit[RLIMIT_NTHR].rlim_cur)) {
>@@ -789,6 +795,13 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_
> KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
> &p2->p_rlimit[RLIMIT_NTHR], KAUTH_ARG(RLIMIT_NTHR))
> != 0) {
>+ if (strncmp(p2->p_comm, "hald", 4) == 0) {
>+ struct uidinfo *uip = uid_find(uid);
>+ printf("%s,%d: %s[%d]: [uid=%d]"
>+ " (%lu/%d)\n", __FILE__, __LINE__,
>+ p2->p_comm, (int)p2->p_pid,
>+ (int)uid, uip->ui_lwpcnt, -1);
>+ }
> (void)chglwpcnt(uid, -1);
> return EAGAIN;
> }
>@@ -1174,8 +1187,16 @@ lwp_free(struct lwp *l, bool recycle, bo
> KASSERT(l != curlwp);
> KASSERT(last || mutex_owned(p->p_lock));
>
>- if (p != &proc0 && p->p_nlwps != 1)
>+ if (p != &proc0 && p->p_nlwps != 1) {
>+ uid_t uid = kauth_cred_getuid(l->l_cred);
>+ if (strncmp(p->p_comm, "hald", 4) == 0) {
>+ struct uidinfo *uip = uid_find(uid);
>+ printf("%s,%d: %s[%d]: [uid=%d] (%lu/%d)\n", __FILE__,
>+ __LINE__, p->p_comm, (int)p->p_pid, (int)uid,
>+ uip->ui_lwpcnt, -1);
>+ }
> (void)chglwpcnt(kauth_cred_getuid(l->l_cred), -1);
>+ }
> /*
> * If this was not the last LWP in the process, then adjust
> * counters and unlock.
>Index: sys/kern/kern_prot.c
>===================================================================
>RCS file: /cvsroot/src/sys/kern/kern_prot.c,v
>retrieving revision 1.116
>diff -u -p -r1.116 kern_prot.c
>--- sys/kern/kern_prot.c 9 Jun 2012 02:55:32 -0000 1.116
>+++ sys/kern/kern_prot.c 25 Nov 2013 14:31:20 -0000
>@@ -299,6 +299,7 @@ do_setresuid(struct lwp *l, uid_t r, uid
> {
> struct proc *p = l->l_proc;
> kauth_cred_t cred, ncred;
>+ uid_t uid;
>
> ncred = kauth_cred_alloc();
>
>@@ -342,14 +343,25 @@ do_setresuid(struct lwp *l, uid_t r, uid
>
> kauth_cred_clone(cred, ncred);
>
>- if (r != -1 && r != kauth_cred_getuid(ncred)) {
>+ uid = kauth_cred_getuid(ncred);
>+ if (r != -1 && r != uid) {
> /* Update count of processes for this user */
>- (void)chgproccnt(kauth_cred_getuid(ncred), -1);
>+ (void)chgproccnt(uid, -1);
> (void)chgproccnt(r, 1);
>
> /* The first lwp of a process is not counted */
> int nlwps = p->p_nlwps - 1;
>- (void)chglwpcnt(kauth_cred_getuid(ncred), -nlwps);
>+ if (strncmp(p->p_comm, "hald", 4) == 0) {
>+ struct uidinfo *uip = uid_find(uid);
>+ printf("%s,%d: %s[%d]: [uid=%d] (%lu/%d)\n", __FILE__,
>+ __LINE__, p->p_comm, (int)p->p_pid, (int)uid,
>+ uip->ui_lwpcnt, -nlwps);
>+ uip = uid_find(r);
>+ printf("%s,%d: %s[%d]: [uid=%d] (%lu/%d)\n", __FILE__,
>+ __LINE__, p->p_comm, (int)p->p_pid, (int)r,
>+ uip->ui_lwpcnt, nlwps);
>+ }
>+ (void)chglwpcnt(uid, -nlwps);
> (void)chglwpcnt(r, nlwps);
>
> kauth_cred_setuid(ncred, r);
>@@ -362,6 +374,19 @@ do_setresuid(struct lwp *l, uid_t r, uid
> /* Broadcast our credentials to the process and other LWPs. */
> proc_crmod_leave(ncred, cred, true);
>
>+ if (strncmp(p->p_comm, "hald", 4) == 0) {
>+ uid = kauth_cred_getuid(p->p_cred);
>+ struct uidinfo *uip = uid_find(uid);
>+ printf("%s,%d: %s[%d]: [uid=%d] (%lu/%d)\n", __FILE__,
>+ __LINE__, p->p_comm, (int)p->p_pid, (int)uid,
>+ uip->ui_lwpcnt, 0);
>+ uid = kauth_cred_getuid(l->l_cred);
>+ uip = uid_find(uid);
>+ printf("%s,%d: %s[%d]: [uid=%d] (%lu/%d)\n", __FILE__,
>+ __LINE__, p->p_comm, (int)p->p_pid, (int)uid,
>+ uip->ui_lwpcnt, 0);
>+ }
>+
> return 0;
> }
>
>Index: sys/kern/kern_uidinfo.c
>===================================================================
>RCS file: /cvsroot/src/sys/kern/kern_uidinfo.c,v
>retrieving revision 1.8
>diff -u -p -r1.8 kern_uidinfo.c
>--- sys/kern/kern_uidinfo.c 10 Mar 2013 17:55:42 -0000 1.8
>+++ sys/kern/kern_uidinfo.c 25 Nov 2013 14:31:26 -0000
>@@ -214,7 +214,9 @@ chglwpcnt(uid_t uid, int diff)
>
> uip = uid_find(uid);
> lwpcnt = atomic_add_long_nv(&uip->ui_lwpcnt, diff);
>- KASSERT(lwpcnt >= 0);
>+ //KASSERT(lwpcnt >= 0);
>+ if (lwpcnt < 0)
>+ printf ("KASSERT! lwpcnt < 0\n");
> return lwpcnt;
> }
>
>-=-=-=-=-=-
Home |
Main Index |
Thread Index |
Old Index