Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/lib/libpthread Deal with a couple of problems with threads b...



details:   https://anonhg.NetBSD.org/src/rev/ee71b25a7925
branches:  trunk
changeset: 972700:ee71b25a7925
user:      ad <ad%NetBSD.org@localhost>
date:      Wed Jun 03 22:10:24 2020 +0000

description:
Deal with a couple of problems with threads being awoken early due to
timeouts or cancellation where:

- The restarting thread calls _lwp_exit() before another thread gets around
  to waking it with _lwp_unpark(), leading to ESRCH (observed by joerg@).
  (I may have removed a similar check mistakenly over the weekend.)

- The restarting thread considers itself gone off the sleep queue but
  at the same time another thread is part way through waking it, and hasn't
  fully completed that operation yet by setting thread->pt_mutexwait = 0.
  I think that could have potentially lead to the list of waiters getting
  messed up given the right circumstances.

diffstat:

 lib/libpthread/pthread.c       |  18 +++++++++-----
 lib/libpthread/pthread_cond.c  |  51 ++++++++++++++++++++++++++++-------------
 lib/libpthread/pthread_mutex.c |  17 ++++++++++++-
 3 files changed, 61 insertions(+), 25 deletions(-)

diffs (196 lines):

diff -r 1dc19abd7270 -r ee71b25a7925 lib/libpthread/pthread.c
--- a/lib/libpthread/pthread.c  Wed Jun 03 19:55:13 2020 +0000
+++ b/lib/libpthread/pthread.c  Wed Jun 03 22:10:24 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pthread.c,v 1.172 2020/06/02 00:29:53 joerg Exp $      */
+/*     $NetBSD: pthread.c,v 1.173 2020/06/03 22:10:24 ad Exp $ */
 
 /*-
  * Copyright (c) 2001, 2002, 2003, 2006, 2007, 2008, 2020
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: pthread.c,v 1.172 2020/06/02 00:29:53 joerg Exp $");
+__RCSID("$NetBSD: pthread.c,v 1.173 2020/06/03 22:10:24 ad Exp $");
 
 #define        __EXPOSE_STACK  1
 
@@ -599,9 +599,12 @@
 }
 
 /*
- * In case the thread is exiting at an inopportune time leaving waiters not
- * awoken (because cancelled, for instance) make sure we have no waiters
- * left.
+ * Wake all deferred waiters hanging off self.
+ *
+ * It's possible for threads to have called _lwp_exit() before we wake them,
+ * because of cancellation and timeout, so ESRCH is tolerated here.  If a
+ * thread exits and its LID is reused, and the a thread receives an wakeup
+ * meant for the previous incarnation of the LID, no harm will be done.
  */
 void
 pthread__clear_waiters(pthread_t self)
@@ -620,7 +623,7 @@
                rv = _lwp_unpark(self->pt_waiters[0], NULL);
                self->pt_waiters[0] = 0;
                self->pt_nwaiters = 0;
-               if (rv != 0) {
+               if (rv != 0 && errno != ESRCH) {
                        pthread__errorfunc(__FILE__, __LINE__, __func__,
                            "_lwp_unpark failed: %d", errno);
                }
@@ -629,7 +632,7 @@
                rv = _lwp_unpark_all(self->pt_waiters, self->pt_nwaiters, NULL);
                self->pt_waiters[0] = 0;
                self->pt_nwaiters = 0;
-               if (rv != 0) {
+               if (rv != 0 && errno != ESRCH) {
                        pthread__errorfunc(__FILE__, __LINE__, __func__,
                            "_lwp_unpark_all failed: %d", errno);
                }
@@ -1195,6 +1198,7 @@
                        switch (rv = errno) {
                        case EINTR:
                        case EALREADY:
+                       case ESRCH:
                                rv = 0;
                                break;
                        case ETIMEDOUT:
diff -r 1dc19abd7270 -r ee71b25a7925 lib/libpthread/pthread_cond.c
--- a/lib/libpthread/pthread_cond.c     Wed Jun 03 19:55:13 2020 +0000
+++ b/lib/libpthread/pthread_cond.c     Wed Jun 03 22:10:24 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pthread_cond.c,v 1.70 2020/06/01 11:44:59 ad Exp $     */
+/*     $NetBSD: pthread_cond.c,v 1.71 2020/06/03 22:10:24 ad Exp $     */
 
 /*-
  * Copyright (c) 2001, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: pthread_cond.c,v 1.70 2020/06/01 11:44:59 ad Exp $");
+__RCSID("$NetBSD: pthread_cond.c,v 1.71 2020/06/03 22:10:24 ad Exp $");
 
 #include <stdlib.h>
 #include <errno.h>
@@ -112,7 +112,7 @@
                       const struct timespec *abstime)
 {
        pthread_t self, next, waiters;
-       int retval;
+       int retval, cancel;
        clockid_t clkid = pthread_cond_getclock(cond);
 
        if (__predict_false(__uselibcstub))
@@ -126,6 +126,7 @@
            mutex->ptm_owner != NULL);
 
        self = pthread__self();
+       pthread__assert(!self->pt_condwait);
 
        if (__predict_false(self->pt_cancel)) {
                pthread__cancelled();
@@ -165,24 +166,42 @@
                                retval = errno;
                        }
                }
-       } while (self->pt_condwait && !self->pt_cancel && !retval);
+               cancel = self->pt_cancel;
+       } while (self->pt_condwait && !cancel && !retval);
 
        /*
-        * If we have cancelled then exit.  POSIX dictates that
-        * the mutex must be held when we action the cancellation.
+        * If this thread absorbed a wakeup from pthread_cond_signal() and
+        * cannot take the wakeup, we must ensure that another thread does.
+        *
+        * And if awoken early, we may still be on the waiter list and must
+        * remove self.
+        *
+        * In all cases do the wakeup without the mutex held otherwise:
         *
-        * If we absorbed a pthread_cond_signal() and cannot take
-        * the wakeup, we must ensure that another thread does.
-        *
-        * If awoke early, we may still be on the waiter list and
-        * must remove ourself.
+        * - wakeup could be deferred until mutex release
+        * - it would be mixing up two sets of waitpoints
+        */
+       if (__predict_false(self->pt_condwait | cancel | retval)) {
+               pthread_cond_broadcast(cond);
+
+               /*
+                * Might have raced with another thread to do the wakeup. 
+                * In any case there will be a wakeup for sure.  Eat it and
+                * wait for pt_condwait to clear.
+                */
+               do {
+                       (void)_lwp_park(CLOCK_REALTIME, TIMER_ABSTIME, NULL,
+                           0, NULL, NULL);
+               } while (self->pt_condwait);
+       }
+
+       /*
+        * If cancelled then exit.  POSIX dictates that the mutex must be
+        * held if this happens.
         */
        pthread_mutex_lock(mutex);
-       if (__predict_false(self->pt_condwait | self->pt_cancel | retval)) {
-               pthread_cond_broadcast(cond);
-               if (self->pt_cancel) {
-                       pthread__cancelled();
-               }
+       if (cancel) {
+               pthread__cancelled();
        }
 
        return retval;
diff -r 1dc19abd7270 -r ee71b25a7925 lib/libpthread/pthread_mutex.c
--- a/lib/libpthread/pthread_mutex.c    Wed Jun 03 19:55:13 2020 +0000
+++ b/lib/libpthread/pthread_mutex.c    Wed Jun 03 22:10:24 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pthread_mutex.c,v 1.78 2020/06/01 11:44:59 ad Exp $    */
+/*     $NetBSD: pthread_mutex.c,v 1.79 2020/06/03 22:10:24 ad Exp $    */
 
 /*-
  * Copyright (c) 2001, 2003, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@@ -47,7 +47,7 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: pthread_mutex.c,v 1.78 2020/06/01 11:44:59 ad Exp $");
+__RCSID("$NetBSD: pthread_mutex.c,v 1.79 2020/06/03 22:10:24 ad Exp $");
 
 #include <sys/types.h>
 #include <sys/lwpctl.h>
@@ -278,6 +278,7 @@
        serrno = errno;
 
        pthread__assert(!self->pt_willpark);
+       pthread__assert(!self->pt_mutexwait);
 
        /* Recursive or errorcheck? */
        if (MUTEX_OWNER(owner) == (uintptr_t)self) {
@@ -369,6 +370,18 @@
                        if (error < 0 && errno == ETIMEDOUT) {
                                /* Remove self from waiters list */
                                pthread__mutex_wakeup(self, ptm);
+
+                               /*
+                                * Might have raced with another thread to
+                                * do the wakeup.  In any case there will be
+                                * a wakeup for sure.  Eat it and wait for
+                                * pt_mutexwait to clear.
+                                */
+                               do {
+                                       (void)_lwp_park(CLOCK_REALTIME,
+                                          TIMER_ABSTIME, NULL, 0, NULL, NULL);
+                               } while (self->pt_mutexwait);
+
                                /* Priority protect */
                                if (MUTEX_PROTECT(owner))
                                        (void)_sched_protect(-1);



Home | Main Index | Thread Index | Old Index