Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/kern fix a race in kqueue_scan() - when multiple threads...



details:   https://anonhg.NetBSD.org/src/rev/1c8708424488
branches:  trunk
changeset: 980029:1c8708424488
user:      jdolecek <jdolecek%NetBSD.org@localhost>
date:      Wed Jan 20 21:39:09 2021 +0000

description:
fix a race in kqueue_scan() - when multiple threads check the same
kqueue, it could happen other thread seen empty kqueue while kevent
was being checked for re-firing and re-queued

make sure to keep retrying if there are outstanding kevents even
if no kevent is found on first pass through the queue, and only
drop the KN_QUEUED flag and kq_count when actually completely done
with the kevent

change is inspired by the FreeBSD in-flux handling, but without
introducing the reference counting

PR kern/50094 by Christof Meerwald

diffstat:

 sys/kern/kern_event.c |  67 ++++++++++++++++++++++++++++++--------------------
 1 files changed, 40 insertions(+), 27 deletions(-)

diffs (168 lines):

diff -r a45324dc4eba -r 1c8708424488 sys/kern/kern_event.c
--- a/sys/kern/kern_event.c     Wed Jan 20 21:38:44 2021 +0000
+++ b/sys/kern/kern_event.c     Wed Jan 20 21:39:09 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: kern_event.c,v 1.110 2020/12/27 12:45:33 jdolecek Exp $        */
+/*     $NetBSD: kern_event.c,v 1.111 2021/01/20 21:39:09 jdolecek Exp $        */
 
 /*-
  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
@@ -59,7 +59,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.110 2020/12/27 12:45:33 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.111 2021/01/20 21:39:09 jdolecek Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -179,6 +179,8 @@
 
 extern const struct filterops sig_filtops;
 
+#define KQ_FLUX_WAKEUP(kq)     cv_broadcast(&kq->kq_cv)
+
 /*
  * Table for for all system-defined filters.
  * These should be listed in the numeric order of the EVFILT_* defines.
@@ -1396,7 +1398,7 @@
        struct timespec ats, sleepts;
        struct knote    *kn, *marker, morker;
        size_t          count, nkev, nevents;
-       int             timeout, error, touch, rv;
+       int             timeout, error, touch, rv, influx;
        filedesc_t      *fdp;
 
        fdp = curlwp->l_fd;
@@ -1450,39 +1452,48 @@
 
        /* mark end of knote list */
        TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe);
+       influx = 0;
 
        /*
         * Acquire the fdp->fd_lock interlock to avoid races with
         * file creation/destruction from other threads.
         */
+relock:
        mutex_spin_exit(&kq->kq_lock);
        mutex_enter(&fdp->fd_lock);
        mutex_spin_enter(&kq->kq_lock);
 
        while (count != 0) {
                kn = TAILQ_FIRST(&kq->kq_head); /* get next knote */
-               while ((kn->kn_status & KN_MARKER) != 0) {
-                       if (kn == marker) {
-                               /* it's our marker, stop */
-                               TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
-                               if (count < maxevents || (tsp != NULL &&
-                                   (timeout = gettimeleft(&ats,
-                                   &sleepts)) <= 0))
-                                       goto queue_processed;
+
+               if ((kn->kn_status & KN_MARKER) != 0 && kn != marker) {
+                       if (influx) {
+                               influx = 0;
+                               KQ_FLUX_WAKEUP(kq);
+                       }
+                       mutex_exit(&fdp->fd_lock);
+                       (void)cv_wait_sig(&kq->kq_cv, &kq->kq_lock);
+                       goto relock;
+               }
+
+               TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
+               if (kn == marker) {
+                       /* it's our marker, stop */
+                       KQ_FLUX_WAKEUP(kq);
+                       if (count == maxevents) {
                                mutex_exit(&fdp->fd_lock);
                                goto retry;
                        }
-                       /* someone else's marker. */
-                       kn = TAILQ_NEXT(kn, kn_tqe);
+                       break;
                }
+               KASSERT((kn->kn_status & KN_BUSY) == 0);
+
                kq_check(kq);
-               kq->kq_count--;
-               TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
-               kn->kn_status &= ~KN_QUEUED;
                kn->kn_status |= KN_BUSY;
                kq_check(kq);
                if (kn->kn_status & KN_DISABLED) {
-                       kn->kn_status &= ~KN_BUSY;
+                       kq->kq_count--;
+                       kn->kn_status &= ~(KN_QUEUED|KN_BUSY);
                        /* don't want disabled events */
                        continue;
                }
@@ -1495,17 +1506,14 @@
                        rv = (*kn->kn_fop->f_event)(kn, 0);
                        KERNEL_UNLOCK_ONE(NULL);        /* XXXSMP */
                        mutex_spin_enter(&kq->kq_lock);
-                       /* Re-poll if note was re-enqueued. */
-                       if ((kn->kn_status & KN_QUEUED) != 0) {
-                               kn->kn_status &= ~KN_BUSY;
-                               continue;
-                       }
                        if (rv == 0) {
                                /*
                                 * non-ONESHOT event that hasn't
                                 * triggered again, so de-queue.
                                 */
-                               kn->kn_status &= ~(KN_ACTIVE|KN_BUSY);
+                               kn->kn_status &= ~(KN_QUEUED|KN_ACTIVE|KN_BUSY);
+                               kq->kq_count--;
+                               influx = 1;
                                continue;
                        }
                }
@@ -1524,9 +1532,11 @@
                }
                kevp++;
                nkev++;
+               influx = 1;
                if (kn->kn_flags & EV_ONESHOT) {
                        /* delete ONESHOT events after retrieval */
-                       kn->kn_status &= ~KN_BUSY;
+                       kn->kn_status &= ~(KN_QUEUED|KN_BUSY);
+                       kq->kq_count--;
                        mutex_spin_exit(&kq->kq_lock);
                        knote_detach(kn, fdp, true);
                        mutex_enter(&fdp->fd_lock);
@@ -1544,20 +1554,23 @@
                                kn->kn_fflags = 0;
                        }
                        kn->kn_status &= ~(KN_QUEUED|KN_ACTIVE|KN_BUSY);
+                       kq->kq_count--;
                } else if (kn->kn_flags & EV_DISPATCH) {
                        kn->kn_status |= KN_DISABLED;
                        kn->kn_status &= ~(KN_QUEUED|KN_ACTIVE|KN_BUSY);
+                       kq->kq_count--;
                } else {
                        /* add event back on list */
                        kq_check(kq);
-                       kn->kn_status |= KN_QUEUED;
                        kn->kn_status &= ~KN_BUSY;
                        TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
-                       kq->kq_count++;
                        kq_check(kq);
                }
+
                if (nkev == kevcnt) {
                        /* do copyouts in kevcnt chunks */
+                       influx = 0;
+                       KQ_FLUX_WAKEUP(kq);
                        mutex_spin_exit(&kq->kq_lock);
                        mutex_exit(&fdp->fd_lock);
                        error = (*keops->keo_put_events)
@@ -1576,7 +1589,7 @@
                        break;
                }
        }
-queue_processed:
+       KQ_FLUX_WAKEUP(kq);
        mutex_spin_exit(&kq->kq_lock);
        mutex_exit(&fdp->fd_lock);
 



Home | Main Index | Thread Index | Old Index