Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys Implement direct select/poll support, currently effectiv...
details: https://anonhg.NetBSD.org/src/rev/40bfe6044c6f
branches: trunk
changeset: 756186:40bfe6044c6f
user: rmind <rmind%NetBSD.org@localhost>
date: Thu Jul 08 12:23:31 2010 +0000
description:
Implement direct select/poll support, currently effective for socket and
pipe subsystems. Avoids overhead of second selscan() on wake-up, and thus
improves performance on certain workloads (especially when polling on many
file-descriptors). Also, clean-up sys/fd_set.h header and improve macros.
Welcome to 5.99.36!
diffstat:
sys/kern/sys_select.c | 173 +++++++++++++++++++++++++++++++++++++++----------
sys/sys/fd_set.h | 46 ++++++------
sys/sys/lwp.h | 14 ++-
sys/sys/param.h | 4 +-
sys/sys/selinfo.h | 3 +-
sys/sys/types.h | 5 +-
6 files changed, 174 insertions(+), 71 deletions(-)
diffs (truncated from 535 to 300 lines):
diff -r c375cd42239a -r 40bfe6044c6f sys/kern/sys_select.c
--- a/sys/kern/sys_select.c Thu Jul 08 12:09:31 2010 +0000
+++ b/sys/kern/sys_select.c Thu Jul 08 12:23:31 2010 +0000
@@ -1,11 +1,11 @@
-/* $NetBSD: sys_select.c,v 1.22 2010/04/25 15:55:24 ad Exp $ */
+/* $NetBSD: sys_select.c,v 1.23 2010/07/08 12:23:31 rmind Exp $ */
/*-
* Copyright (c) 2007, 2008, 2009, 2010 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
- * by Andrew Doran.
+ * by Andrew Doran and Mindaugas Rasiukevicius.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -84,7 +84,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.22 2010/04/25 15:55:24 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.23 2010/07/08 12:23:31 rmind Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -109,7 +109,12 @@
/* Flags for lwp::l_selflag. */
#define SEL_RESET 0 /* awoken, interrupted, or not yet polling */
#define SEL_SCANNING 1 /* polling descriptors */
-#define SEL_BLOCKING 2 /* about to block on select_cv */
+#define SEL_BLOCKING 2 /* blocking and waiting for event */
+#define SEL_EVENT 3 /* interrupted, events set directly */
+
+/* Operations: either select() or poll(). */
+#define SELOP_SELECT 1
+#define SELOP_POLL 2
/*
* Per-cluster state for select()/poll(). For a system with fewer
@@ -125,10 +130,16 @@
uint32_t sc_mask;
} selcluster_t;
-static inline int selscan(char *, u_int, register_t *);
-static inline int pollscan(struct pollfd *, u_int, register_t *);
+static inline int selscan(char *, const int, const size_t, register_t *);
+static inline int pollscan(struct pollfd *, const int, register_t *);
static void selclear(void);
+static const int sel_flag[] = {
+ POLLRDNORM | POLLHUP | POLLERR,
+ POLLWRNORM | POLLHUP | POLLERR,
+ POLLRDBAND
+};
+
static syncobj_t select_sobj = {
SOBJ_SLEEPQ_FIFO,
sleepq_unsleep,
@@ -137,7 +148,7 @@
syncobj_noowner,
};
-static selcluster_t *selcluster[SELCLUSTERS];
+static selcluster_t *selcluster[SELCLUSTERS] __read_mostly;
/*
* Select system call.
@@ -206,8 +217,8 @@
* sel_do_scan: common code to perform the scan on descriptors.
*/
static int
-sel_do_scan(void *fds, u_int nfds, struct timespec *ts, sigset_t *mask,
- register_t *retval, int selpoll)
+sel_do_scan(const int op, void *fds, const int nf, const size_t ni,
+ struct timespec *ts, sigset_t *mask, register_t *retval)
{
lwp_t * const l = curlwp;
proc_t * const p = l->l_proc;
@@ -237,6 +248,14 @@
lock = sc->sc_lock;
l->l_selcluster = sc;
SLIST_INIT(&l->l_selwait);
+
+ l->l_selret = 0;
+ if (op == SELOP_SELECT) {
+ l->l_selbits = (char *)fds + ni * 3;
+ l->l_selni = ni;
+ } else {
+ l->l_selbits = NULL;
+ }
for (;;) {
int ncoll;
@@ -250,28 +269,51 @@
l->l_selflag = SEL_SCANNING;
ncoll = sc->sc_ncoll;
- if (selpoll) {
- error = selscan((char *)fds, nfds, retval);
+ if (op == SELOP_SELECT) {
+ error = selscan((char *)fds, nf, ni, retval);
} else {
- error = pollscan((struct pollfd *)fds, nfds, retval);
+ error = pollscan((struct pollfd *)fds, nf, retval);
}
-
if (error || *retval)
break;
if (ts && (timo = gettimeleft(ts, &sleepts)) <= 0)
break;
+ /*
+ * Acquire the lock and perform the (re)checks. Note, if
+ * collision has occured, then our state does not matter,
+ * as we must perform re-scan. Therefore, check it first.
+ */
+state_check:
mutex_spin_enter(lock);
- if (l->l_selflag != SEL_SCANNING || sc->sc_ncoll != ncoll) {
+ if (__predict_false(sc->sc_ncoll != ncoll)) {
+ /* Collision: perform re-scan. */
mutex_spin_exit(lock);
continue;
}
+ if (__predict_true(l->l_selflag == SEL_EVENT)) {
+ /* Events occured, they are set directly. */
+ mutex_spin_exit(lock);
+ KASSERT(l->l_selret != 0);
+ *retval = l->l_selret;
+ break;
+ }
+ if (__predict_true(l->l_selflag == SEL_RESET)) {
+ /* Events occured, but re-scan is requested. */
+ mutex_spin_exit(lock);
+ continue;
+ }
+ KASSERT(l->l_selflag == SEL_SCANNING);
+ /* Nothing happen, therefore - sleep. */
l->l_selflag = SEL_BLOCKING;
l->l_kpriority = true;
sleepq_enter(&sc->sc_sleepq, l, lock);
sleepq_enqueue(&sc->sc_sleepq, sc, "select", &select_sobj);
error = sleepq_block(timo, true);
- if (error != 0)
+ if (error != 0) {
break;
+ }
+ /* Awoken: need to check the state. */
+ goto state_check;
}
selclear();
@@ -326,7 +368,7 @@
getbits(ex, 2);
#undef getbits
- error = sel_do_scan(bits, nd, ts, mask, retval, 1);
+ error = sel_do_scan(SELOP_SELECT, bits, nd, ni, ts, mask, retval);
if (error == 0 && u_in != NULL)
error = copyout(bits + ni * 3, u_in, ni);
if (error == 0 && u_ou != NULL)
@@ -340,30 +382,32 @@
}
static inline int
-selscan(char *bits, u_int nfd, register_t *retval)
+selscan(char *bits, const int nfd, const size_t ni, register_t *retval)
{
- static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
- POLLWRNORM | POLLHUP | POLLERR,
- POLLRDBAND };
fd_mask *ibitp, *obitp;
- int msk, i, j, fd, ni, n;
- fd_mask ibits, obits;
+ int msk, i, j, fd, n;
file_t *fp;
- ni = howmany(nfd, NFDBITS) * sizeof(fd_mask);
ibitp = (fd_mask *)(bits + ni * 0);
obitp = (fd_mask *)(bits + ni * 3);
n = 0;
for (msk = 0; msk < 3; msk++) {
for (i = 0; i < nfd; i += NFDBITS) {
+ fd_mask ibits, obits;
+
ibits = *ibitp++;
obits = 0;
while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
ibits &= ~(1 << j);
if ((fp = fd_getfile(fd)) == NULL)
return (EBADF);
- if ((*fp->f_ops->fo_poll)(fp, flag[msk])) {
+ /*
+ * Setup an argument to selrecord(), which is
+ * a file descriptor number.
+ */
+ curlwp->l_selrec = fd;
+ if ((*fp->f_ops->fo_poll)(fp, sel_flag[msk])) {
obits |= (1 << j);
n++;
}
@@ -464,7 +508,7 @@
if (error)
goto fail;
- error = sel_do_scan(fds, nfds, ts, mask, retval, 0);
+ error = sel_do_scan(SELOP_POLL, fds, nfds, ni, ts, mask, retval);
if (error == 0)
error = copyout(fds, u_fds, ni);
fail:
@@ -474,12 +518,11 @@
}
static inline int
-pollscan(struct pollfd *fds, u_int nfd, register_t *retval)
+pollscan(struct pollfd *fds, const int nfd, register_t *retval)
{
- int i, n;
file_t *fp;
+ int i, n = 0;
- n = 0;
for (i = 0; i < nfd; i++, fds++) {
if (fds->fd < 0) {
fds->revents = 0;
@@ -487,6 +530,12 @@
fds->revents = POLLNVAL;
n++;
} else {
+ /*
+ * Perform poll: registers select request or returns
+ * the events which are set. Setup an argument for
+ * selrecord(), which is a pointer to struct pollfd.
+ */
+ curlwp->l_selrec = (uintptr_t)fds;
fds->revents = (*fp->f_ops->fo_poll)(fp,
fds->events | POLLERR | POLLHUP);
if (fds->revents != 0)
@@ -498,7 +547,6 @@
return (0);
}
-/*ARGSUSED*/
int
seltrue(dev_t dev, int events, lwp_t *l)
{
@@ -539,28 +587,73 @@
other = sip->sel_lwp;
if (other == selector) {
- /* `selector' has already claimed it. */
+ /* 1. We (selector) already claimed to be the first LWP. */
KASSERT(sip->sel_cluster = sc);
} else if (other == NULL) {
/*
- * First named waiter, although there may be unnamed
- * waiters (collisions). Issue a memory barrier to
- * ensure that we access sel_lwp (above) before other
- * fields - this guards against a call to selclear().
+ * 2. No first LWP, therefore we (selector) are the first.
+ *
+ * There may be unnamed waiters (collisions). Issue a memory
+ * barrier to ensure that we access sel_lwp (above) before
+ * other fields - this guards against a call to selclear().
*/
membar_enter();
sip->sel_lwp = selector;
SLIST_INSERT_HEAD(&selector->l_selwait, sip, sel_chain);
+ /* Copy the argument, which is for selnotify(). */
+ sip->sel_fdinfo = selector->l_selrec;
/* Replace selinfo's lock with the chosen cluster's lock. */
sip->sel_cluster = sc;
} else {
- /* Multiple waiters: record a collision. */
+ /* 3. Multiple waiters: record a collision. */
sip->sel_collision |= sc->sc_mask;
KASSERT(sip->sel_cluster != NULL);
}
}
/*
+ * sel_setevents: a helper function for selnotify(), to set the events
+ * for LWP sleeping in selcommon() or pollcommon().
+ */
+static inline void
+sel_setevents(lwp_t *l, struct selinfo *sip, const int events)
+{
+ const int oflag = l->l_selflag;
+
+ /*
+ * If we require re-scan or it was required by somebody else,
+ * then just (re)set SEL_RESET and return.
+ */
+ if (__predict_false(events == 0 || oflag == SEL_RESET)) {
+ l->l_selflag = SEL_RESET;
+ return;
+ }
+ /*
+ * Direct set. Note: select state of LWP is locked. First,
Home |
Main Index |
Thread Index |
Old Index