Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/thorpej-futex]: src/sys Native implementation of the Linux eventfd(2) API.
details: https://anonhg.NetBSD.org/src/rev/541dafed8180
branches: thorpej-futex
changeset: 961121:541dafed8180
user: thorpej <thorpej%NetBSD.org@localhost>
date: Mon Dec 14 16:00:51 2020 +0000
description:
Native implementation of the Linux eventfd(2) API.
diffstat:
sys/kern/files.kern | 3 +-
sys/kern/sys_eventfd.c | 583 +++++++++++++++++++++++++++++++++++++++++++++++
sys/kern/syscalls.master | 4 +-
sys/sys/Makefile | 4 +-
sys/sys/eventfd.h | 57 ++++
sys/sys/file.h | 7 +-
6 files changed, 651 insertions(+), 7 deletions(-)
diffs (truncated from 739 to 300 lines):
diff -r 6d070aa67c57 -r 541dafed8180 sys/kern/files.kern
--- a/sys/kern/files.kern Mon Dec 14 14:37:44 2020 +0000
+++ b/sys/kern/files.kern Mon Dec 14 16:00:51 2020 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: files.kern,v 1.53.2.1 2020/12/14 14:38:13 thorpej Exp $
+# $NetBSD: files.kern,v 1.53.2.2 2020/12/14 16:00:51 thorpej Exp $
#
# kernel sources
@@ -157,6 +157,7 @@
file kern/subr_xcall.c kern
file kern/sys_aio.c aio
file kern/sys_descrip.c kern
+file kern/sys_eventfd.c kern
file kern/sys_futex.c kern
file kern/sys_generic.c kern
file kern/sys_getrandom.c kern
diff -r 6d070aa67c57 -r 541dafed8180 sys/kern/sys_eventfd.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/kern/sys_eventfd.c Mon Dec 14 16:00:51 2020 +0000
@@ -0,0 +1,583 @@
+/* $NetBSD: sys_eventfd.c,v 1.1.2.1 2020/12/14 16:00:51 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: sys_eventfd.c,v 1.1.2.1 2020/12/14 16:00:51 thorpej Exp $");
+
+/*
+ * eventfd
+ *
+ * Eventfd objects present a simple counting object associated with a
+ * file descriptor. Writes and reads to this file descriptor increment
+ * and decrement the count, respectively. When the count is non-zero,
+ * the descriptor is considered "readable", and when less than the max
+ * value (EVENTFD_MAXVAL), is considered "writable".
+ *
+ * This implementation is API compatible with the Linux eventfd(2)
+ * interface.
+ */
+
+#include <sys/types.h>
+#include <sys/condvar.h>
+#include <sys/eventfd.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/kauth.h>
+#include <sys/mutex.h>
+#include <sys/poll.h>
+#include <sys/proc.h>
+#include <sys/select.h>
+#include <sys/stat.h>
+#include <sys/syscallargs.h>
+#include <sys/uio.h>
+
+struct eventfd {
+ kmutex_t efd_lock;
+ kcondvar_t efd_read_wait;
+ kcondvar_t efd_write_wait;
+ kcondvar_t efd_restart_wait;
+ struct selinfo efd_read_sel;
+ struct selinfo efd_write_sel;
+ eventfd_t efd_val;
+ int64_t efd_nwaiters;
+ bool efd_restarting;
+ bool efd_has_read_waiters;
+ bool efd_has_write_waiters;
+ bool efd_is_semaphore;
+
+ /*
+ * Information kept for stat(2).
+ */
+ struct timespec efd_btime; /* time created */
+ struct timespec efd_mtime; /* last write */
+ struct timespec efd_atime; /* last read */
+};
+
+#define EVENTFD_MAXVAL (UINT64_MAX - 1)
+
+/*
+ * eventfd_create:
+ *
+ * Create an eventfd object.
+ */
+static struct eventfd *
+eventfd_create(unsigned int const val, int const flags)
+{
+ struct eventfd * const efd = kmem_zalloc(sizeof(*efd), KM_SLEEP);
+
+ mutex_init(&efd->efd_lock, MUTEX_DEFAULT, IPL_NONE);
+ cv_init(&efd->efd_read_wait, "efdread");
+ cv_init(&efd->efd_write_wait, "efdwrite");
+ cv_init(&efd->efd_restart_wait, "efdrstrt");
+ selinit(&efd->efd_read_sel);
+ selinit(&efd->efd_write_sel);
+ efd->efd_val = val;
+ efd->efd_is_semaphore = !!(flags & EFD_SEMAPHORE);
+ getnanotime(&efd->efd_btime);
+
+ /* Caller deals with EFD_CLOEXEC and EFD_NONBLOCK. */
+
+ return efd;
+}
+
+/*
+ * eventfd_destroy:
+ *
+ * Destroy an eventfd object.
+ */
+static void
+eventfd_destroy(struct eventfd * const efd)
+{
+
+ KASSERT(efd->efd_nwaiters == 0);
+ KASSERT(efd->efd_restarting == false);
+ KASSERT(efd->efd_has_read_waiters == false);
+ KASSERT(efd->efd_has_write_waiters == false);
+
+ cv_destroy(&efd->efd_read_wait);
+ cv_destroy(&efd->efd_write_wait);
+ cv_destroy(&efd->efd_restart_wait);
+
+ seldestroy(&efd->efd_read_sel);
+ seldestroy(&efd->efd_write_sel);
+
+ mutex_destroy(&efd->efd_lock);
+}
+
+/*
+ * eventfd_wait:
+ *
+ * Block on an eventfd. Handles non-blocking, as well as
+ * the restart cases.
+ */
+static int
+eventfd_wait(struct eventfd * const efd, int const fflag, bool const is_write)
+{
+ kcondvar_t *waitcv;
+ int error;
+
+ if (fflag & FNONBLOCK) {
+ return EAGAIN;
+ }
+
+ /*
+ * We're going to block. If there is a restart in-progress,
+ * wait for that to complete first.
+ */
+ while (efd->efd_restarting) {
+ cv_wait(&efd->efd_restart_wait, &efd->efd_lock);
+ }
+
+ if (is_write) {
+ efd->efd_has_write_waiters = true;
+ waitcv = &efd->efd_write_wait;
+ } else {
+ efd->efd_has_read_waiters = true;
+ waitcv = &efd->efd_read_wait;
+ }
+
+ efd->efd_nwaiters++;
+ KASSERT(efd->efd_nwaiters > 0);
+ error = cv_wait_sig(waitcv, &efd->efd_lock);
+ efd->efd_nwaiters--;
+ KASSERT(efd->efd_nwaiters >= 0);
+
+ /*
+ * If a restart was triggered while we were asleep, we need
+ * to return ERESTART if no other error was returned. If we
+ * are the last waiter coming out of the restart drain, clear
+ * the condition.
+ */
+ if (efd->efd_restarting) {
+ if (error == 0) {
+ error = ERESTART;
+ }
+ if (efd->efd_nwaiters == 0) {
+ efd->efd_restarting = false;
+ cv_broadcast(&efd->efd_restart_wait);
+ }
+ }
+
+ return error;
+}
+
+/*
+ * eventfd_wake:
+ *
+ * Wake LWPs block on an eventfd.
+ */
+static void
+eventfd_wake(struct eventfd * const efd, bool const is_write)
+{
+ kcondvar_t *waitcv = NULL;
+ struct selinfo *sel;
+ int pollev;
+
+ if (is_write) {
+ if (efd->efd_has_read_waiters) {
+ waitcv = &efd->efd_read_wait;
+ efd->efd_has_read_waiters = false;
+ }
+ sel = &efd->efd_read_sel;
+ pollev = POLLIN | POLLRDNORM;
+ } else {
+ if (efd->efd_has_write_waiters) {
+ waitcv = &efd->efd_write_wait;
+ efd->efd_has_write_waiters = false;
+ }
+ sel = &efd->efd_write_sel;
+ pollev = POLLOUT | POLLWRNORM;
+ }
+ if (waitcv != NULL) {
+ cv_broadcast(waitcv);
+ }
+ selnotify(sel, pollev, NOTE_SUBMIT);
+}
+
+/*
+ * eventfd file operations
+ */
+
+static int
+eventfd_fop_read(file_t * const fp, off_t * const offset,
+ struct uio * const uio, kauth_cred_t const cred, int const flags)
+{
+ struct eventfd * const efd = fp->f_eventfd;
+ int const fflag = fp->f_flag;
+ eventfd_t return_value;
+ int error;
+
+ if (uio->uio_resid < sizeof(eventfd_t)) {
+ return EINVAL;
+ }
+
+ mutex_enter(&efd->efd_lock);
+
+ while (efd->efd_val == 0) {
+ if ((error = eventfd_wait(efd, fflag, false)) != 0) {
+ mutex_exit(&efd->efd_lock);
+ return error;
+ }
+ }
+
+ if (efd->efd_is_semaphore) {
+ return_value = 1;
+ efd->efd_val--;
+ } else {
+ return_value = efd->efd_val;
+ efd->efd_val = 0;
+ }
+
+ getnanotime(&efd->efd_atime);
+ eventfd_wake(efd, false);
+
+ /* XXX Should we unlock before the uiomove()? */
+
+ error = uiomove(&return_value, sizeof(return_value), uio);
+
+ /* XXX Should we restore eventfd state if uiomove() fails? */
+
+ mutex_exit(&efd->efd_lock);
+
+ return error;
+}
+
+static int
+eventfd_fop_write(file_t * const fp, off_t * const offset,
+ struct uio * const uio, kauth_cred_t const cred, int const flags)
+{
+ struct eventfd * const efd = fp->f_eventfd;
+ int const fflag = fp->f_flag;
+ eventfd_t write_value;
Home |
Main Index |
Thread Index |
Old Index