Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys Minor improvements to select/poll:



details:   https://anonhg.NetBSD.org/src/rev/6088880bdfbc
branches:  trunk
changeset: 1004941:6088880bdfbc
user:      ad <ad%NetBSD.org@localhost>
date:      Thu Nov 21 21:42:30 2019 +0000

description:
Minor improvements to select/poll:

- Increase the maximum number of clusters from 32 to 64 for large systems.
  kcpuset_t could potentially be used here but that's an excursion I don't
  want to go on right now.  uint32_t -> uint64_t is very simple.

- In the case of a non-blocking select/poll, or where we won't block
  because there are events ready to report, stop registering interest in
  the back-end objects early.

- Change the wmesg for poll back to "poll".

diffstat:

 sys/kern/sys_select.c |  80 +++++++++++++++++++++++++++++++++++++-------------
 sys/sys/selinfo.h     |   8 ++--
 2 files changed, 63 insertions(+), 25 deletions(-)

diffs (262 lines):

diff -r 183c96969a67 -r 6088880bdfbc sys/kern/sys_select.c
--- a/sys/kern/sys_select.c     Thu Nov 21 20:51:05 2019 +0000
+++ b/sys/kern/sys_select.c     Thu Nov 21 21:42:30 2019 +0000
@@ -1,7 +1,7 @@
-/*     $NetBSD: sys_select.c,v 1.48 2019/09/20 15:00:47 kamil Exp $    */
+/*     $NetBSD: sys_select.c,v 1.49 2019/11/21 21:42:30 ad Exp $       */
 
 /*-
- * Copyright (c) 2007, 2008, 2009, 2010 The NetBSD Foundation, Inc.
+ * Copyright (c) 2007, 2008, 2009, 2010, 2019 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -84,7 +84,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.48 2019/09/20 15:00:47 kamil Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.49 2019/11/21 21:42:30 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -104,6 +104,7 @@
 #include <sys/socketvar.h>
 #include <sys/sleepq.h>
 #include <sys/sysctl.h>
+#include <sys/bitops.h>
 
 /* Flags for lwp::l_selflag. */
 #define        SEL_RESET       0       /* awoken, interrupted, or not yet polling */
@@ -111,10 +112,6 @@
 #define        SEL_BLOCKING    2       /* blocking and waiting for event */
 #define        SEL_EVENT       3       /* interrupted, events set directly */
 
-/* Operations: either select() or poll(). */
-#define        SELOP_SELECT    1
-#define        SELOP_POLL      2
-
 /*
  * Per-cluster state for select()/poll().  For a system with fewer
  * than 32 CPUs, this gives us per-CPU clusters.
@@ -125,8 +122,8 @@
 typedef struct selcluster {
        kmutex_t        *sc_lock;
        sleepq_t        sc_sleepq;
+       uint64_t        sc_mask;
        int             sc_ncoll;
-       uint32_t        sc_mask;
 } selcluster_t;
 
 static inline int      selscan(char *, const int, const size_t, register_t *);
@@ -150,6 +147,10 @@
 static selcluster_t    *selcluster[SELCLUSTERS] __read_mostly;
 static int             direct_select __read_mostly = 0;
 
+/* Operations: either select() or poll(). */
+const char             selop_select[] = "select";
+const char             selop_poll[] = "poll";
+
 /*
  * Select system call.
  */
@@ -221,7 +222,7 @@
  * sel_do_scan: common code to perform the scan on descriptors.
  */
 static int
-sel_do_scan(const int op, void *fds, const int nf, const size_t ni,
+sel_do_scan(const char *opname, void *fds, const int nf, const size_t ni,
     struct timespec *ts, sigset_t *mask, register_t *retval)
 {
        lwp_t           * const l = curlwp;
@@ -238,10 +239,17 @@
        if (__predict_false(mask))
                sigsuspendsetup(l, mask);
 
+       /*
+        * We may context switch during or at any time after picking a CPU
+        * and cluster to associate with, but it doesn't matter.  In the
+        * unlikely event we migrate elsewhere all we risk is a little lock
+        * contention; correctness is not sacrificed.
+        */
        sc = curcpu()->ci_data.cpu_selcluster;
        lock = sc->sc_lock;
        l->l_selcluster = sc;
-       if (op == SELOP_SELECT) {
+
+       if (opname == selop_select) {
                l->l_selbits = fds;
                l->l_selni = ni;
        } else {
@@ -261,10 +269,16 @@
                 * and lock activity resulting from fo_poll is enough to
                 * provide an up to date value for new polling activity.
                 */
-               l->l_selflag = SEL_SCANNING;
+               if (ts && (ts->tv_sec | ts->tv_nsec | direct_select) == 0) {
+                       /* Non-blocking: no need for selrecord()/selclear() */
+                       l->l_selflag = SEL_RESET;
+               } else {
+                       l->l_selflag = SEL_SCANNING;
+               }
                ncoll = sc->sc_ncoll;
+               membar_exit();
 
-               if (op == SELOP_SELECT) {
+               if (opname == selop_select) {
                        error = selscan((char *)fds, nf, ni, retval);
                } else {
                        error = pollscan((struct pollfd *)fds, nf, retval);
@@ -301,7 +315,7 @@
                l->l_selflag = SEL_BLOCKING;
                l->l_kpriority = true;
                sleepq_enter(&sc->sc_sleepq, l, lock);
-               sleepq_enqueue(&sc->sc_sleepq, sc, "select", &select_sobj);
+               sleepq_enqueue(&sc->sc_sleepq, sc, opname, &select_sobj);
                error = sleepq_block(timo, true);
                if (error != 0) {
                        break;
@@ -363,7 +377,7 @@
        getbits(ex, 2);
 #undef getbits
 
-       error = sel_do_scan(SELOP_SELECT, bits, nd, ni, ts, mask, retval);
+       error = sel_do_scan(selop_select, bits, nd, ni, ts, mask, retval);
        if (error == 0 && u_in != NULL)
                error = copyout(bits + ni * 3, u_in, ni);
        if (error == 0 && u_ou != NULL)
@@ -382,10 +396,12 @@
        fd_mask *ibitp, *obitp;
        int msk, i, j, fd, n;
        file_t *fp;
+       lwp_t *l;
 
        ibitp = (fd_mask *)(bits + ni * 0);
        obitp = (fd_mask *)(bits + ni * 3);
        n = 0;
+       l = curlwp;
 
        memset(obitp, 0, ni * 3);
        for (msk = 0; msk < 3; msk++) {
@@ -402,8 +418,15 @@
                                 * Setup an argument to selrecord(), which is
                                 * a file descriptor number.
                                 */
-                               curlwp->l_selrec = fd;
+                               l->l_selrec = fd;
                                if ((*fp->f_ops->fo_poll)(fp, sel_flag[msk])) {
+                                       if (!direct_select) {
+                                               /*
+                                                * Have events: do nothing in
+                                                * selrecord().
+                                                */
+                                               l->l_selflag = SEL_RESET;
+                                       }
                                        obits |= (1U << j);
                                        n++;
                                }
@@ -412,7 +435,7 @@
                        if (obits != 0) {
                                if (direct_select) {
                                        kmutex_t *lock;
-                                       lock = curlwp->l_selcluster->sc_lock;
+                                       lock = l->l_selcluster->sc_lock;
                                        mutex_spin_enter(lock);
                                        *obitp |= obits;
                                        mutex_spin_exit(lock);
@@ -527,7 +550,7 @@
        if (error)
                goto fail;
 
-       error = sel_do_scan(SELOP_POLL, fds, nfds, ni, ts, mask, retval);
+       error = sel_do_scan(selop_poll, fds, nfds, ni, ts, mask, retval);
        if (error == 0)
                error = copyout(fds, u_fds, ni);
  fail:
@@ -560,6 +583,10 @@
                        fd_putfile(fds->fd);
                }
                if (revents) {
+                       if (!direct_select)  {
+                               /* Have events: do nothing in selrecord(). */
+                               curlwp->l_selflag = SEL_RESET;
+                       }
                        fds->revents = revents;
                        n++;
                }
@@ -607,7 +634,9 @@
        sc = selector->l_selcluster;
        other = sip->sel_lwp;
 
-       if (other == selector) {
+       if (selector->l_selflag == SEL_RESET) {
+               /* 0. We're not going to block - will poll again if needed. */
+       } else if (other == selector) {
                /* 1. We (selector) already claimed to be the first LWP. */
                KASSERT(sip->sel_cluster == sc);
        } else if (other == NULL) {
@@ -708,7 +737,7 @@
 selnotify(struct selinfo *sip, int events, long knhint)
 {
        selcluster_t *sc;
-       uint32_t mask;
+       uint64_t mask;
        int index, oflag;
        lwp_t *l;
        kmutex_t *lock;
@@ -757,8 +786,8 @@
                 */
                sip->sel_collision = 0;
                do {
-                       index = ffs(mask) - 1;
-                       mask &= ~(1U << index);
+                       index = ffs64(mask) - 1;
+                       mask ^= __BIT(index);
                        sc = selcluster[index];
                        lock = sc->sc_lock;
                        mutex_spin_enter(lock);
@@ -790,6 +819,15 @@
        sc = l->l_selcluster;
        lock = sc->sc_lock;
 
+       /*
+        * If the request was non-blocking, or we found events on the first
+        * descriptor, there will be no need to clear anything - avoid
+        * taking the lock.
+        */
+       if (SLIST_EMPTY(&l->l_selwait)) {
+               return;
+       }
+
        mutex_spin_enter(lock);
        for (sip = SLIST_FIRST(&l->l_selwait); sip != NULL; sip = next) {
                KASSERT(sip->sel_lwp == l);
diff -r 183c96969a67 -r 6088880bdfbc sys/sys/selinfo.h
--- a/sys/sys/selinfo.h Thu Nov 21 20:51:05 2019 +0000
+++ b/sys/sys/selinfo.h Thu Nov 21 21:42:30 2019 +0000
@@ -1,7 +1,7 @@
-/*     $NetBSD: selinfo.h,v 1.8 2010/07/08 12:23:31 rmind Exp $        */
+/*     $NetBSD: selinfo.h,v 1.9 2019/11/21 21:42:30 ad Exp $   */
 
 /*-
- * Copyright (c) 2008, 2010 The NetBSD Foundation, Inc.
+ * Copyright (c) 2008, 2010, 2019 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -71,13 +71,13 @@
  * notified when I/O becomes possible.
  */
 struct selinfo {
+       uint64_t        sel_collision;  /* mask of colliding cpus */
        struct klist    sel_klist;      /* knotes attached to this selinfo */
        void            *sel_cluster;   /* current cluster association */
        struct lwp      *sel_lwp;       /* first LWP to be notified */
        uintptr_t       sel_fdinfo;     /* selected descriptor by first LWP */
        SLIST_ENTRY(selinfo) sel_chain; /* entry on LWP's list of selinfo */
-       uint32_t        sel_collision;  /* mask of colliding cpus */
-       uint32_t        sel_reserved[3];/* reserved for future expansion */
+       uintptr_t       sel_reserved[2];/* reserved for future expansion */
 };
 
 #endif /* !_SYS_SELINFO_H_ */



Home | Main Index | Thread Index | Old Index