Subject: kernel semaphores
To: None <tech-kern@netbsd.org>
From: Simon J. Gerraty <sjg@crufty.net>
List: tech-kern
Date: 01/07/2002 01:37:49
I originally wrote kern_sem.c et al to provide interlocking for IPI's
on sparc mp, which now have their own light weight interlocking.

Does anyone see any need for kernel semaphores in other areas?
and if so, care to review the implementation below.  It caters for 
multiple resource at a time allocations, and provides sleep and spin
waits.   The hard bits are done using the existing locking primitives.

--sjg


--- /dev/null	Mon Jan  7 01:32:24 2002
+++ sys/sys/ksem.h	Tue Mar  6 10:11:15 2001
@@ -0,0 +1,68 @@
+/*	$NetBSD$	*/
+
+/*-
+ * Copyright (c) 2001 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Simon Gerraty.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the NetBSD
+ *	Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SYS_KSEM_H_
+# define _SYS_KSEM_H_
+
+#include <sys/lock.h>
+
+#ifdef _KERNEL
+typedef struct sema_s {
+	struct simplelock sem_interlock;
+	const char *sem_wmesg;
+	int	sem_flags;
+	int	sem_sleepers;
+	int	sem_count;
+} sema_t;
+
+#define SEMAF_VALID (1<<0)		/* initialized */
+#define SEMAF_DEBUG (1<<1)
+
+void	sema_init(sema_t *sp, int cnt, const char *wmesg, int flags);
+void	sema_clear(sema_t *sp);		/* shutdown */
+void	sema_setflags(sema_t *sp, int flags);
+int	sema_signal(sema_t *sp);	/* V() */
+int	sema_signal_n(sema_t *sp, int n); /* V() * n */
+void	sema_spinwait(sema_t *sp, int n); /* spin version of P() * n */
+void	sema_wait(sema_t *sp);		/* sleep version of P() */
+void	sema_wait_n(sema_t *sp, int n);	/* sleep version of P() * n */
+
+#define sema_signal(sp) sema_signal_n((sp), 1)
+
+#endif
+#endif
--- /dev/null	Mon Jan  7 01:32:24 2002
+++ sys/kern/kern_sem.c	Mon Jan  7 01:26:09 2002
@@ -0,0 +1,303 @@
+/*	$NetBSD$	*/
+
+/*-
+ * Copyright (c) 2001 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Simon Gerraty.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the NetBSD
+ *	Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/ksem.h>
+
+#define SEMADEBUG
+#if defined(LOCKDEBUG) && !defined(SEMADEBUG)
+# define SEMADEBUG
+#endif
+
+#ifdef SEMADEBUG
+int sema_debug = 1;
+#endif
+
+/*
+ * XXX not at all sure we should be using spl* here.
+ * so make it easy to undo.
+ */
+//#define SEMASPL splhigh
+#ifdef SEMASPL
+# define SEMA_SPL(x) x
+#else
+# define SEMA_SPL(x)
+#endif
+
+void
+sema_init (sema_t *sp, int cnt, const char *wmesg, int flags)
+{
+	simple_lock_init(&sp->sem_interlock);
+	sp->sem_wmesg = wmesg;
+	sp->sem_count = cnt;
+	sp->sem_sleepers = 0;
+	sp->sem_flags = SEMAF_VALID|flags;
+#ifdef SEMADEBUG
+	if (sema_debug < 0 || (sema_debug && (flags & SEMAF_DEBUG))) {
+		printf("\n{%d}sema_init(%p, %d, %s)\n", cpu_number(),
+		       sp, cnt, wmesg ?: "");
+		if (sema_debug < 0)
+			sema_debug = 0;
+	}
+#endif
+}
+
+void
+sema_clear (sema_t *sp)
+{
+	SEMA_SPL(int s);
+	
+	if ((sp->sem_flags & SEMAF_VALID)) {
+#ifdef SEMADEBUG
+		if (sema_debug && (sp->sem_flags & SEMAF_DEBUG))
+			printf("\n{%d}sema_clear(%p) count==%d, sleepers==%d\n",
+			       cpu_number(),
+			       sp, sp->sem_count, sp->sem_sleepers);
+#endif
+		SEMA_SPL(s = SEMASPL());
+		simple_lock(&sp->sem_interlock);
+		sp->sem_count = sp->sem_flags = 0;
+		/*
+		 * Its important that no-one be sleeping on the semaphore
+		 * since after this, no one will ever wake them up.
+		 */
+		while (sp->sem_sleepers > 0) {
+#ifdef DIAGNOSTIC
+			panic("sema_clear: sleepers > 0");
+#endif
+			sp->sem_sleepers--;
+			wakeup_one(sp);
+		}
+		simple_unlock(&sp->sem_interlock);
+		SEMA_SPL(splx(s));
+	}
+}
+
+void
+sema_setflags (sema_t *sp, int flags)
+{
+#ifdef SEMADEBUG
+	if (sema_debug && (sp->sem_flags & SEMAF_DEBUG))
+		printf("\n{%d}sema_setflags(%p, %d)\n", cpu_number(),
+		       sp, flags);
+#endif
+	if ((flags & SEMAF_VALID) == 0) {
+		sema_clear(sp);
+	}
+	sp->sem_flags = flags;
+}
+
+
+/*
+ * This is the normal wait routine.
+ * If decrementing the count takes it below 0 we
+ * sleep until sema_signal() ups to to 0 and awakens us.
+ * We don't make sema_wait() just sema_wait_n() with n=1 since
+ * this is a little more efficient.
+ */
+void
+sema_wait (sema_t *sp)
+{
+	struct proc *p = curproc;
+	SEMA_SPL(int s);
+	
+	if (!(sp->sem_flags & SEMAF_VALID) || panicstr != NULL) {
+		return;
+	}
+	SEMA_SPL(s = SEMASPL());
+	simple_lock(&sp->sem_interlock);
+	sp->sem_count--;
+
+#ifdef SEMADEBUG
+	if (sema_debug && (sp->sem_flags & SEMAF_DEBUG))
+		printf("\n{%d}sema_wait(%p) == %d\n",
+		       cpu_number(),
+		       sp, sp->sem_count);
+#endif
+	if (sp->sem_count < 0) {
+		sp->sem_sleepers++;
+		ltsleep(sp, p->p_priority|PNORELOCK,
+			sp->sem_wmesg ?: "sema",
+			0, &sp->sem_interlock);
+		sp->sem_sleepers--;
+		SEMA_SPL(splx(s));
+		return;
+	}
+	simple_unlock(&sp->sem_interlock);
+	SEMA_SPL(splx(s));
+}
+
+
+/*
+ * Like sema_wait() but allows reserving n resources at once.
+ * We need to loop around the sleep call, to ensure that we hold the
+ * interlock and that there are still "n" resources available.
+ * Unlike sema_wait() we have ltsleep() re-aquire the interlock when
+ * we wakeup.
+ */
+void
+sema_wait_n (sema_t *sp, int n)
+{
+	struct proc *p = curproc;
+	SEMA_SPL(int s);
+	
+	if (!(sp->sem_flags & SEMAF_VALID) || panicstr != NULL) {
+		return;
+	}
+	SEMA_SPL(s = SEMASPL());
+	simple_lock(&sp->sem_interlock);
+
+#ifdef SEMADEBUG
+	if (sema_debug && (sp->sem_flags & SEMAF_DEBUG))
+		printf("\n{%d}sema_wait_n(%p,%d) == %d\n",
+		       cpu_number(),
+		       sp, n, sp->sem_count);
+#endif
+	/*
+	 * If there are less than "n" available, sleep until that changes.
+	 */
+	if (sp->sem_count - n < 0) {
+		do {
+			sp->sem_sleepers++;
+			ltsleep(sp, p->p_priority,
+				sp->sem_wmesg ?: "seman",
+				0, &sp->sem_interlock);
+			sp->sem_sleepers--;
+			/*
+			 * There is no guarantee that there are "n" available
+			 * yet, so we may have to sleep again.
+			 */
+		} while (panicstr == NULL &&
+			 (sp->sem_flags & SEMAF_VALID) &&
+			 sp->sem_count - n < 0);
+	}
+	sp->sem_count -= n;		/* got them */
+	simple_unlock(&sp->sem_interlock);
+	SEMA_SPL(splx(s));
+}
+
+/*
+ * Spinning version of sema_wait_n().
+ */
+void
+sema_spinwait (sema_t *sp, int n)
+{
+	SEMA_SPL(int s);
+
+	if (!(sp->sem_flags & SEMAF_VALID) || panicstr != NULL) {
+		return;
+	}
+	SEMA_SPL(s = SEMASPL());
+	simple_lock(&sp->sem_interlock);
+
+#ifdef SEMADEBUG
+	if (sema_debug && (sp->sem_flags & SEMAF_DEBUG))
+		printf("\n{%d}sema_spinwait(%p,%d) == %d\n",
+		       cpu_number(),
+		       sp, n, sp->sem_count);
+#endif
+	if (sp->sem_count - n < 0) {
+		do {
+			simple_unlock(&sp->sem_interlock);
+			SEMA_SPL(splx(s));
+
+			while (panicstr == NULL &&
+			       (sp->sem_flags & SEMAF_VALID) &&
+			       sp->sem_count - n < 0)
+				continue; /* spin */
+			if (!(sp->sem_flags & SEMAF_VALID) ||
+			    panicstr != NULL) {
+				return;
+			}
+			
+			SEMA_SPL(s = SEMASPL());
+			simple_lock(&sp->sem_interlock);
+		} while (panicstr == NULL && sp->sem_count - n < 0);
+	}
+	sp->sem_count -= n;		/* got them */
+	simple_unlock(&sp->sem_interlock);
+	SEMA_SPL(splx(s));
+}
+
+
+/*
+ * sema_signal() is just this routine with n=1
+ * If we find count>=0 we wake up at most "n" sleepers.
+ */
+int
+sema_signal_n (sema_t *sp, int n)
+{
+	int rc = 0;
+	int i;
+	SEMA_SPL(int s);
+	
+	if (panicstr == NULL && (sp->sem_flags & SEMAF_VALID)) {
+		SEMA_SPL(s = SEMASPL());
+		simple_lock(&sp->sem_interlock);
+		sp->sem_count += n;
+
+#ifdef SEMADEBUG
+		if (sema_debug && (sp->sem_flags & SEMAF_DEBUG))
+			printf("\n{%d}sema_signal(%p,%d) == %d\n",
+			       cpu_number(),
+			       sp, n, sp->sem_count);
+#endif
+		if ((rc = sp->sem_count) >= 0 && sp->sem_sleepers > 0) {
+			/*
+			 * Wakeup at most n sleepers.
+			 */
+			for (i = 0; i < n && sp->sem_count - i >= 0; ++i) {
+#ifdef SEMADEBUG
+				if (sema_debug &&
+				    (sp->sem_flags & SEMAF_DEBUG))
+					printf("\n{%d}sema_signal: wakeup %d\n",
+					       cpu_number(), i);
+#endif
+
+				wakeup_one(sp);
+			}
+		}
+		simple_unlock(&sp->sem_interlock);
+		SEMA_SPL(splx(s));
+	}
+	return rc;
+}
+
+
Index: sys/conf/files
===================================================================
RCS file: /cvsroot/syssrc/sys/conf/files,v
retrieving revision 1.486
diff -u -p -r1.486 files
--- sys/conf/files	2001/12/17 15:40:43	1.486
+++ sys/conf/files	2002/01/07 09:36:22
@@ -1056,6 +1056,7 @@ file	kern/kern_physio.c
 file	kern/kern_proc.c
 file	kern/kern_prot.c
 file	kern/kern_resource.c
+file	kern/kern_sem.c
 file	kern/kern_sig.c
 file	kern/kern_subr.c
 file	kern/kern_synch.c