Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/kern Fix races in aborted futex waits.



details:   https://anonhg.NetBSD.org/src/rev/08224323f7b4
branches:  trunk
changeset: 971561:08224323f7b4
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Mon Apr 27 23:54:43 2020 +0000

description:
Fix races in aborted futex waits.

- Re-check the wake condition in futex_wait in the event of error.
  => Otherwise, if futex_wait times out in cv_timedwait_sig but
     futex_wake wakes it while cv_timedwait_sig is still trying to
     reacquire fw_lock, the wake would be incorrectly accounted.

- Fold futex_wait_abort into futex_wait so it happens atomically.
  => Otherwise, if futex_wait times out and release fw_lock, then,
     before futex_wait_abort reacquires the lock and removes it from
     the queue, the waiter could be woken by futex_wake.  But once we
     enter futex_wait_abort, the decision to abort is final, so the
     wake would incorrectly accounted.

- In futex_wait_abort, mark each waiter aborting while we do the lock
  dance, and skip over aborting waiters in futex_wake and
  futex_requeue.
  => Otherwise, futex_wake might move it to a new futex while
     futex_wait_abort has released all the locks -- but
     futex_wait_abort still has the old futex, so TAILQ_REMOVE will
     cross the streams and bad things will happen.

- In futex_wait_abort, release the futex we moved the waiter off.
  => Otherwise, we would leak the futex reference acquired by
     futex_func_wait, in the event of aborting.  (For normal wakeups,
     futex_wake releases the reference on our behalf.)

- Consistently use futex_wait_dequeue rather than TAILQ_REMOVE so that
  all changes to fw_futex and the waiter queue are isolated to
  futex_wait_enqueue/dequeue and happen together.

Patch developed with and tested by thorpej@.

diffstat:

 sys/kern/sys_futex.c |  89 ++++++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 70 insertions(+), 19 deletions(-)

diffs (209 lines):

diff -r f0f03688705a -r 08224323f7b4 sys/kern/sys_futex.c
--- a/sys/kern/sys_futex.c      Mon Apr 27 23:40:37 2020 +0000
+++ b/sys/kern/sys_futex.c      Mon Apr 27 23:54:43 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: sys_futex.c,v 1.3 2020/04/27 05:28:17 thorpej Exp $    */
+/*     $NetBSD: sys_futex.c,v 1.4 2020/04/27 23:54:43 riastradh Exp $  */
 
 /*-
  * Copyright (c) 2018, 2019, 2020 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sys_futex.c,v 1.3 2020/04/27 05:28:17 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sys_futex.c,v 1.4 2020/04/27 23:54:43 riastradh Exp $");
 
 /*
  * Futexes
@@ -195,6 +195,7 @@
        TAILQ_ENTRY(futex_wait) fw_entry;       /* queue lock */
        LIST_ENTRY(futex_wait)  fw_abort;       /* queue abortlock */
        int                     fw_bitset;
+       bool                    fw_aborting;    /* fw_lock */
 };
 
 /*
@@ -800,6 +801,7 @@
        cv_init(&fw->fw_cv, "futex");
        fw->fw_futex = NULL;
        fw->fw_bitset = bitset;
+       fw->fw_aborting = false;
 }
 
 /*
@@ -830,6 +832,7 @@
        KASSERT(mutex_owned(&f->fx_qlock));
        KASSERT(mutex_owned(&fw->fw_lock));
        KASSERT(fw->fw_futex == NULL);
+       KASSERT(!fw->fw_aborting);
 
        fw->fw_futex = f;
        TAILQ_INSERT_TAIL(&f->fx_queue, fw, fw_entry);
@@ -858,15 +861,14 @@
  * futex_wait_abort(fw)
  *
  *     Caller is no longer waiting for fw.  Remove it from any queue
- *     if it was on one.
+ *     if it was on one.  Caller must hold fw->fw_lock.
  */
 static void
 futex_wait_abort(struct futex_wait *fw)
 {
        struct futex *f;
 
-       /* Acquire fw_lock so that the content of fw won't change.  */
-       mutex_enter(&fw->fw_lock);
+       KASSERT(mutex_owned(&fw->fw_lock));
 
        /*
         * Grab the futex queue.  It can't go away as long as we hold
@@ -880,12 +882,20 @@
        LIST_INSERT_HEAD(&f->fx_abortlist, fw, fw_abort);
        mutex_exit(&f->fx_abortlock);
 
+       /*
+        * Mark fw as aborting so it won't lose wakeups and won't be
+        * transferred to any other queue.
+        */
+       fw->fw_aborting = true;
+
        /* f is now stable, so we can release fw_lock.  */
        mutex_exit(&fw->fw_lock);
 
        /* Now we can remove fw under the queue lock.  */
        mutex_enter(&f->fx_qlock);
-       TAILQ_REMOVE(&f->fx_queue, fw, fw_entry);
+       mutex_enter(&fw->fw_lock);
+       futex_wait_dequeue(fw, f);
+       mutex_exit(&fw->fw_lock);
        mutex_exit(&f->fx_qlock);
 
        /*
@@ -897,6 +907,20 @@
        if (LIST_EMPTY(&f->fx_abortlist))
                cv_broadcast(&f->fx_abortcv);
        mutex_exit(&f->fx_abortlock);
+
+       /*
+        * Release our reference to the futex now that we are not
+        * waiting for it.
+        */
+       futex_rele(f);
+
+       /*
+        * Reacquire the fw lock as caller expects.  Verify that we're
+        * aborting and no longer associated with a futex.
+        */
+       mutex_enter(&fw->fw_lock);
+       KASSERT(fw->fw_aborting);
+       KASSERT(fw->fw_futex == NULL);
 }
 
 /*
@@ -905,7 +929,8 @@
  *     fw must be a waiter on a futex's queue.  Wait until deadline on
  *     the clock clkid, or forever if deadline is NULL, for a futex
  *     wakeup.  Return 0 on explicit wakeup or destruction of futex,
- *     ETIMEDOUT on timeout, EINTR/ERESTART on signal.
+ *     ETIMEDOUT on timeout, EINTR/ERESTART on signal.  Either way, fw
+ *     will no longer be on a futex queue on return.
  */
 static int
 futex_wait(struct futex_wait *fw, const struct timespec *deadline,
@@ -915,7 +940,18 @@
 
        /* Test and wait under the wait lock.  */
        mutex_enter(&fw->fw_lock);
-       while (fw->fw_bitset && fw->fw_futex != NULL) {
+
+       for (;;) {
+               /* If we're done yet, stop and report success.  */
+               if (fw->fw_bitset == 0 || fw->fw_futex == NULL) {
+                       error = 0;
+                       break;
+               }
+
+               /* If anything went wrong in the last iteration, stop.  */
+               if (error)
+                       break;
+
                /* Not done yet.  Wait.  */
                if (deadline) {
                        struct timespec ts;
@@ -941,13 +977,20 @@
                        /* Wait indefinitely, allowing signals. */
                        error = cv_wait_sig(&fw->fw_cv, &fw->fw_lock);
                }
-               if (error) {
-                       /* Convert EWOULDBLOCK to ETIMEDOUT.  */
-                       if (error == EWOULDBLOCK)
-                               error = ETIMEDOUT;
-                       break;
-               }
        }
+
+       /*
+        * If we were woken up, the waker will have removed fw from the
+        * queue.  But if anything went wrong, we must remove fw from
+        * the queue ourselves.  While here, convert EWOULDBLOCK to
+        * ETIMEDOUT.
+        */
+       if (error) {
+               futex_wait_abort(fw);
+               if (error == EWOULDBLOCK)
+                       error = ETIMEDOUT;
+       }
+
        mutex_exit(&fw->fw_lock);
 
        return error;
@@ -976,12 +1019,17 @@
        TAILQ_FOREACH_SAFE(fw, &f->fx_queue, fw_entry, fw_next) {
                if ((fw->fw_bitset & bitset) == 0)
                        continue;
-               if (nwake-- > 0) {
+               if (nwake > 0) {
                        mutex_enter(&fw->fw_lock);
+                       if (__predict_false(fw->fw_aborting)) {
+                               mutex_exit(&fw->fw_lock);
+                               continue;
+                       }
                        futex_wait_dequeue(fw, f);
                        fw->fw_bitset = 0;
                        cv_broadcast(&fw->fw_cv);
                        mutex_exit(&fw->fw_lock);
+                       nwake--;
                        nwoken++;
                        /*
                         * Drop the futex reference on behalf of the
@@ -1000,11 +1048,16 @@
                TAILQ_FOREACH_SAFE(fw, &f->fx_queue, fw_entry, fw_next) {
                        if ((fw->fw_bitset & bitset) == 0)
                                continue;
-                       if (nrequeue-- > 0) {
+                       if (nrequeue > 0) {
                                mutex_enter(&fw->fw_lock);
+                               if (__predict_false(fw->fw_aborting)) {
+                                       mutex_exit(&fw->fw_lock);
+                                       continue;
+                               }
                                futex_wait_dequeue(fw, f);
                                futex_wait_enqueue(fw, f2);
                                mutex_exit(&fw->fw_lock);
+                               nrequeue--;
                                /*
                                 * Transfer the reference from f to f2.
                                 * As above, we assert that we are not
@@ -1204,10 +1257,8 @@
 
        /* Wait. */
        error = futex_wait(fw, deadline, clkid);
-       if (error) {
-               futex_wait_abort(fw);
+       if (error)
                goto out;
-       }
 
        /* Return 0 on success, error on failure. */
        *retval = 0;



Home | Main Index | Thread Index | Old Index