Subject: Re: Alternative approach for interface events
To: None <tech-net@netbsd.org, jdolecek@netbsd.org>
From: Pavel Cahyna <pavel.cahyna@st.cuni.cz>
List: tech-net
Date: 09/21/2004 01:44:22
On Mon, 20 Sep 2004 21:23:54 +0000, Pavel Cahyna wrote:
> (Currently knote(9) can only add events to a kqueue opened from
> userspace, but extending this API to be able to run arbitrary kernel
> functions instead would probably be possible. I have an (untested)
> patch which should do exactly this.)
Here is the patch. It has been compile-tested and all the kqueue
regression test complete successfully. "options KCONT" is needed.
It renames kcont_run to kcont_run_all, adds a kcont_run function which
runs only one kcont (I believe this is a reasonable change to the kcont
API) and adds a kn_kcont field to struct knote. Set this field to any
kcont you want (but always call kcont with env_arg set to this knote) and
it will be run when knote(9) is called on this knote. The kqueue system
has been changed to set kn_kcont to a kcont which queues the knote to a
kqueue.
Index: kern/kern_event.c
===================================================================
RCS file: /home/pavel/cvs/src/sys/kern/kern_event.c,v
retrieving revision 1.19
retrieving revision 1.19.1500.4
diff -u -r1.19 -r1.19.1500.4
--- kern/kern_event.c 14 Feb 2004 11:56:28 -0000 1.19
+++ kern/kern_event.c 20 Sep 2004 22:00:33 -0000 1.19.1500.4
@@ -1,4 +1,4 @@
-/* $NetBSD: kern_event.c,v 1.19 2004/02/14 11:56:28 jdolecek Exp $ */
+/* $NetBSD: kern_event.c,v 1.19.1500.4 2004/09/20 22:00:33 pavel Exp $ */
/*-
* Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
* All rights reserved.
@@ -28,7 +28,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.19 2004/02/14 11:56:28 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.19.1500.4 2004/09/20 22:00:33 pavel Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -53,6 +53,7 @@
#include <sys/filedesc.h>
#include <sys/sa.h>
#include <sys/syscallargs.h>
+#include <sys/kcont.h>
static int kqueue_scan(struct file *fp, size_t maxevents,
struct kevent *ulistp, const struct timespec *timeout,
@@ -113,13 +114,16 @@
#define KNOTE_ACTIVATE(kn) \
do { \
kn->kn_status |= KN_ACTIVE; \
- if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \
- knote_enqueue(kn); \
+ if ((kn->kn_status & KN_DISABLED) == 0) \
+ kcont_run(kn->kn_kcont, NULL, 0, KC_IPL_IMMED); \
} while(0)
#define KN_HASHSIZE 64 /* XXX should be tunable */
#define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
+#define KN_KC_ASSERT(kn) KASSERT((kn->kn_kcont->kc_fn == kq_func) && \
+ (kn->kn_kcont->kc_env_arg == kn))
+
extern const struct filterops sig_filtops;
/*
@@ -151,6 +155,15 @@
static int user_kfilterc; /* current offset */
static int user_kfiltermaxc; /* max size so far */
+static void
+kq_func(void * obj, void * env, int status)
+{
+ struct knote * kn = env;
+ KN_KC_ASSERT(kn);
+ if ((kn->kn_status & KN_QUEUED) == 0)
+ knote_enqueue(kn);
+}
+
/*
* kqueue_init:
*
@@ -819,6 +832,8 @@
kev->fflags = 0;
kev->data = 0;
kn->kn_kevent = *kev;
+ kn->kn_kcont = kcont(pool_get(&kc_pool, PR_WAITOK),
+ kq_func, kn, KC_IPL_IMMED);
knote_attach(kn, fdp);
if ((error = kfilter->filtops->f_attach(kn)) != 0) {
@@ -836,6 +851,7 @@
kn->kn_sfflags = kev->fflags;
kn->kn_sdata = kev->data;
kn->kn_kevent.udata = kev->udata;
+ KN_KC_ASSERT(kn);
}
s = splsched();
@@ -861,9 +877,8 @@
if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) {
s = splsched();
kn->kn_status &= ~KN_DISABLED;
- if ((kn->kn_status & KN_ACTIVE) &&
- ((kn->kn_status & KN_QUEUED) == 0))
- knote_enqueue(kn);
+ if (kn->kn_status & KN_ACTIVE)
+ kcont_run(kn->kn_kcont, NULL, 0, KC_IPL_IMMED);
splx(s);
}
@@ -1193,6 +1208,8 @@
if (kq == kn->kn_kq) {
kn->kn_fop->f_detach(kn);
FILE_UNUSE(kn->kn_fp, p);
+ KN_KC_ASSERT(kn);
+ pool_put(&kc_pool, kn->kn_kcont);
pool_put(&knote_pool, kn);
*knp = kn0;
} else {
@@ -1210,6 +1227,8 @@
if (kq == kn->kn_kq) {
kn->kn_fop->f_detach(kn);
/* XXX non-fd release of kn->kn_ptr */
+ KN_KC_ASSERT(kn);
+ pool_put(&kc_pool, kn->kn_kcont);
pool_put(&knote_pool, kn);
*knp = kn0;
} else {
@@ -1384,6 +1403,8 @@
knote_dequeue(kn);
if (kn->kn_fop->f_isfd)
FILE_UNUSE(kn->kn_fp, p);
+ KN_KC_ASSERT(kn);
+ pool_put(&kc_pool, kn->kn_kcont);
pool_put(&knote_pool, kn);
}
Index: kern/kern_kcont.c
===================================================================
RCS file: /home/pavel/cvs/src/sys/kern/kern_kcont.c,v
retrieving revision 1.9
retrieving revision 1.9.1500.3
diff -u -r1.9 -r1.9.1500.3
--- kern/kern_kcont.c 27 Mar 2004 00:42:38 -0000 1.9
+++ kern/kern_kcont.c 20 Sep 2004 21:54:32 -0000 1.9.1500.3
@@ -1,4 +1,4 @@
-/* $NetBSD: kern_kcont.c,v 1.9 2004/03/27 00:42:38 jonathan Exp $ */
+/* $NetBSD: kern_kcont.c,v 1.9.1500.3 2004/09/20 21:54:32 pavel Exp $ */
/*
* Copyright 2003 Jonathan Stone.
@@ -37,7 +37,7 @@
/*
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_kcont.c,v 1.9 2004/03/27 00:42:38 jonathan Exp $ ");
+__KERNEL_RCSID(0, "$NetBSD: kern_kcont.c,v 1.9.1500.3 2004/09/20 21:54:32 pavel Exp $ ");
#include <sys/types.h>
#include <sys/param.h>
@@ -86,7 +86,7 @@
/*
* Pool allocator structure.
*/
-static struct pool kc_pool;
+struct pool kc_pool;
/*
* Process-context continuation queue.
@@ -269,6 +269,48 @@
kcont_enqueue_atomic(kcq, kc);
}
+/*
+ * Call (or hand off) one continuation function. If the
+ * caller-provided IPL is the same as the requested IPL, deliver the
+ * callback. If the caller-provided IPL is higher than the requested
+ * callback IPL, re-enqueue the continuation to a lower-priority
+ * queue.
+ */
+void
+kcont_run(struct kc *kc, void *obj, int status, int curipl)
+{
+
+
+ /* If execution of kc was already deferred, restore context. */
+ if (kc->kc_flags & KC_DEFERRED) {
+ KASSERT(obj == NULL);
+ obj = kc->kc_obj;
+ status = kc->kc_status;
+ }
+
+ /* Check whether to execute now or to defer. */
+ if (kc->kc_ipl == KC_IPL_IMMED || curipl <= kc->kc_ipl) {
+ int saved_flags = kc->kc_flags; /* XXX see below */
+
+ /* Satisfy our raison d'e^tre */
+ (*kc->kc_fn)(obj, kc->kc_env_arg, status);
+
+ /*
+ * We must not touch (*kc) after calling
+ * (*kc->kc_fn), unless we were specifically
+ * asked to free it. The memory for (*kc) may
+ * be a sub-field of some other object (for example,
+ * of kc->kc_env_arg) and (*kc_fn)() may already
+ * have freed it by the time we get here. So save
+ * kc->kc_flags (above) and use that saved copy
+ * to test for auto-free.
+ */
+ if (saved_flags & KC_AUTOFREE)
+ pool_put(&kc_pool, kc);
+ } else {
+ kcont_defer(kc, obj, status);
+ }
+}
/*
* Run through a list of continuations, calling (or handing off)
@@ -279,42 +321,12 @@
* callback IPL, re-enqueue the continuation to a lower-priority queue.
*/
void
-kcont_run(kcq_t *kcq, void *obj, int status, int curipl)
+kcont_run_all(kcq_t *kcq, void *obj, int status, int curipl)
{
struct kc *kc;
- while ((kc = kcont_dequeue_atomic(kcq)) != NULL) {
-
- /* If execution of kc was already deferred, restore context. */
- if (kc->kc_flags & KC_DEFERRED) {
- KASSERT(obj == NULL);
- obj = kc->kc_obj;
- status = kc->kc_status;
- }
-
- /* Check whether to execute now or to defer. */
- if (kc->kc_ipl == KC_IPL_IMMED || curipl <= kc->kc_ipl) {
- int saved_flags = kc->kc_flags; /* XXX see below */
-
- /* Satisfy our raison d'e^tre */
- (*kc->kc_fn)(obj, kc->kc_env_arg, status);
-
- /*
- * We must not touch (*kc) after calling
- * (*kc->kc_fn), unless we were specifically
- * asked to free it. The memory for (*kc) may
- * be a sub-field of some other object (for example,
- * of kc->kc_env_arg) and (*kc_fn)() may already
- * have freed it by the time we get here. So save
- * kc->kc_flags (above) and use that saved copy
- * to test for auto-free.
- */
- if (saved_flags & KC_AUTOFREE)
- pool_put(&kc_pool, kc);
- } else {
- kcont_defer(kc, obj, status);
- }
- }
+ while ((kc = kcont_dequeue_atomic(kcq)) != NULL)
+ kcont_run(kc, obj, status, curipl);
}
#ifdef __HAVE_GENERIC_SOFT_INTERRUPTS
@@ -325,21 +337,21 @@
kcont_run_softclock(void *arg)
{
- kcont_run((struct kcqueue *)arg, NULL, 0, KC_IPL_DEFER_SOFTCLOCK);
+ kcont_run_all((struct kcqueue *)arg, NULL, 0, KC_IPL_DEFER_SOFTCLOCK);
}
static void
kcont_run_softnet(void *arg)
{
- kcont_run((struct kcqueue *)arg, NULL, 0, KC_IPL_DEFER_SOFTNET);
+ kcont_run_all((struct kcqueue *)arg, NULL, 0, KC_IPL_DEFER_SOFTNET);
}
static void
kcont_run_softserial(void *arg)
{
- kcont_run((struct kcqueue *)arg, NULL, 0, KC_IPL_DEFER_SOFTSERIAL);
+ kcont_run_all((struct kcqueue *)arg, NULL, 0, KC_IPL_DEFER_SOFTSERIAL);
}
#endif /* __HAVE_GENERIC_SOFT_INTERRUPTS */
@@ -366,7 +378,7 @@
status = ltsleep(&kcq_process_ctxt, PCATCH, "kcont", hz, NULL);
if (status != 0 && status != EWOULDBLOCK)
break;
- kcont_run(&kcq_process_ctxt, NULL, 0, KC_IPL_DEFER_PROCESS);
+ kcont_run_all(&kcq_process_ctxt, NULL, 0, KC_IPL_DEFER_PROCESS);
}
kthread_exit(0);
}
Index: sys/event.h
===================================================================
RCS file: /home/pavel/cvs/src/sys/sys/event.h,v
retrieving revision 1.12
retrieving revision 1.12.1500.1
diff -u -r1.12 -r1.12.1500.1
--- sys/event.h 22 Feb 2004 17:45:26 -0000 1.12
+++ sys/event.h 20 Sep 2004 13:12:46 -0000 1.12.1500.1
@@ -1,4 +1,4 @@
-/* $NetBSD: event.h,v 1.12 2004/02/22 17:45:26 jdolecek Exp $ */
+/* $NetBSD: event.h,v 1.12.1500.1 2004/09/20 13:12:46 pavel Exp $ */
/*-
* Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
* All rights reserved.
@@ -184,6 +184,7 @@
} kn_ptr;
const struct filterops *kn_fop;
void *kn_hook;
+ struct kc *kn_kcont;
#define KN_ACTIVE 0x01 /* event has been triggered */
#define KN_QUEUED 0x02 /* event is on queue */
Index: sys/kcont.h
===================================================================
RCS file: /home/pavel/cvs/src/sys/sys/kcont.h,v
retrieving revision 1.3
retrieving revision 1.3.1500.4
diff -u -r1.3 -r1.3.1500.4
--- sys/kcont.h 25 Mar 2004 23:02:58 -0000 1.3
+++ sys/kcont.h 20 Sep 2004 21:54:32 -0000 1.3.1500.4
@@ -1,4 +1,4 @@
-/* $NetBSD: kcont.h,v 1.3 2004/03/25 23:02:58 enami Exp $ */
+/* $NetBSD: kcont.h,v 1.3.1500.4 2004/09/20 21:54:32 pavel Exp $ */
/*
* Copyright (c) 2003 The NetBSD Foundation, Inc.
@@ -59,6 +59,8 @@
* additional state beyond the "slept-upon" object.
*/
+extern struct pool kc_pool;
+
typedef struct kc {
SIMPLEQ_ENTRY(kc) kc_next; /* next kc in object's callback list */
@@ -150,6 +152,14 @@
*/
void kcont_enqueue(kcq_t * /*kcqueue*/, struct kc * /*kc*/);
+/*
+ * Execute (or defer) one continuation, either running the
+ * continuation with the given object and status; or saving the object
+ * and status and deferring the kcont to a software-interrupt priority
+ * kc_queue.
+ */
+void kcont_run(kc_t * /*kc*/, void * /*obj*/,
+ int /*status*/, int /*cur_ipl*/);
/*
* Execute (or defer) all continuations on an object's kcqueue
@@ -158,7 +168,7 @@
* with the given object and status; or saving the object and status
* and deferring the kconts to a software-interrupt priority kc_queue.
*/
-void kcont_run(kcq_t * /*kcq*/, void * /*obj*/,
+void kcont_run_all(kcq_t * /*kcq*/, void * /*obj*/,
int /*status*/, int /*cur_ipl*/);