Current-Users archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Proposed pthread_atfork() internal changes



Some of you might have seen a discussion on source-changes-d
about pthread_atfork() (and arc4random() though that is not
relevant here).

pthread_atfork is called in constructors for libraries, incl
libc, so isn't allowed to use malloc(), which might not have
been initialised yet.   It used to use malloc() which broke
some systems.   Christos fixed that a week ago, but only
probably - that is, the current code allows for 16 pthread_atfork
callback functions to be registered (each call can register
1, 2 or 3 of them), and then reverts to malloc after that.

That's probably enough, but only probably (it really depends
upon how many calls to pthread_atfork() happen to be made
before malloc() is ready to be used).

An earlier attempt to fix this used mmap(), but not very
intelligently (the problems that version caused were just
a simple error in the mmap() call, not anything that was
difficult to fix) - it used one mmap'd page for every
callback function registered, which was kind of overkill.

The version proposed here goes back to using mmap() but
fits as many callback functions in each mapped page as
it can.   It retains the statically allocated block of
16 data structs, but only as pacifier for some absurd
code which believes that setting rlimit(RLIM_AS) to 0
is a sane thing to do (and which therefore causes mmap()
to fail).   That (failing) is allowed of pthread_atfork()
but not generally desireable.

Eliminating the static allocation is trivial if desired.

Below (not attached, just included) you will find a
complete cvs diff of the proposed change.   Because
that is somewhat hard to read (diff kind of finds
blank lines and similar which it detects as unchanged,
and otherwise makes something of a mess of this particular
change) I am also including the guts of the new algorithm,
the part with the significant changes (but that alone is
not enough, global decls, include files, etc, are not
included in that, just the two functions (which replace 3)
which have the executable code changes.  The rest should
be easy enough to spot in the diff.

If anyone has any comments or suggestions, please make
them soon, before this code gets comitted (it builds OK,
etc., I have been using it in my builds for a week now).

kre

First the code as it appears, the diff follows.

/*
 * Nb: nothing allocated by this allocator is ever freed.
 * (there is no API to free anything, and no need for one)
 *
 * The code relies upon this.
 */
static struct atfork_callback *
af_alloc(unsigned int blocks)
{
	struct atfork_callback *result;

	if (__predict_false(blocks == 0))
		return NULL;

	if (__predict_true(atfork_storage == NULL)) {
		for (size_t i = 0; i < __arraycount(atfork_builtin); i++) {
			if (atfork_builtin[i].fn == NULL) {
				if (i + blocks <= __arraycount(atfork_builtin))
					return &atfork_builtin[i];
				else
					break;
			}
		}
	}

	if (__predict_false(atfork_storage == NULL ||
	    cb_used(atfork_storage) + blocks > cb_ents(atfork_storage))) {
		if (__predict_false(hw_pagesize == 0)) {
			size_t len = sizeof(hw_pagesize);

			if (sysctl(hw_pagesize_sysctl, 2, &hw_pagesize,
			    &len, NULL, 0) != 0)
				return NULL;
			if (len != sizeof(hw_pagesize))
				return NULL;
			if (hw_pagesize == 0 || (hw_pagesize & 0xFF) != 0)
				return NULL;
		}
		atfork_storage = mmap(0, hw_pagesize, PROT_READ|PROT_WRITE,
		    MAP_PRIVATE | MAP_ANON, -1, 0);
		if (__predict_false(atfork_storage == NULL))
			return NULL;
		cb_used(atfork_storage) = 1;
		cb_ents(atfork_storage) =
		    (uint16_t)(hw_pagesize / sizeof(struct atfork_cb_block));
		if (__predict_false(cb_ents(atfork_storage) < blocks + 1))
			return NULL;
	}

	result = cb_blocks(atfork_storage) + cb_used(atfork_storage);
	cb_used(atfork_storage) += blocks;

	return result;
}

int
pthread_atfork(void (*prepare)(void), void (*parent)(void),
    void (*child)(void))
{
	struct atfork_callback *newprepare, *newparent, *newchild;
	sigset_t mask, omask;
	int error;

	sigfillset(&mask);
	thr_sigsetmask(SIG_SETMASK, &mask, &omask);

	mutex_lock(&atfork_lock);

	/*
	 * Note here that we either get all the blocks
	 * we need, in one call, or we get NULL.
	 *
	 * Note also that a NULL return is not an error
	 * if no blocks were required (all args == NULL)
	 */
	newprepare = af_alloc((prepare != NULL) +
	    (parent != NULL) + (child != NULL));

	error = ENOMEM;		/* in case of "goto out" */

	newparent = newprepare;
	if (prepare != NULL) {
		if (__predict_false(newprepare == NULL))
			goto out;
		newprepare->fn = prepare;
		newparent++;
	}

	newchild = newparent;
	if (parent != NULL) {
		if (__predict_false(newparent == NULL))
			goto out;
		newparent->fn = parent;
		newchild++;
	}

	if (child != NULL) {
		if (__predict_false(newchild == NULL))
			goto out;
		newchild->fn = child;
	}

	/*
	 * The order in which the functions are called is specified as
	 * LIFO for the prepare handler and FIFO for the others; insert
	 * at the head and tail as appropriate so that SIMPLEQ_FOREACH()
	 * produces the right order.
	 */
	if (prepare)
		SIMPLEQ_INSERT_HEAD(&prepareq, newprepare, next);
	if (parent)
		SIMPLEQ_INSERT_TAIL(&parentq, newparent, next);
	if (child)
		SIMPLEQ_INSERT_TAIL(&childq, newchild, next);

	error = 0;

 out:;
	mutex_unlock(&atfork_lock);
	thr_sigsetmask(SIG_SETMASK, &omask, NULL);
	return error;
}

Index: pthread_atfork.c
===================================================================
RCS file: /cvsroot/src/lib/libc/gen/pthread_atfork.c,v
retrieving revision 1.26
diff -u -r1.26 pthread_atfork.c
--- pthread_atfork.c	4 Mar 2025 16:40:46 -0000	1.26
+++ pthread_atfork.c	10 Mar 2025 05:17:59 -0000
@@ -39,7 +39,12 @@
 #include <errno.h>
 #include <stdlib.h>
 #include <unistd.h>
+
+#include <sys/mman.h>
+#include <sys/param.h>
 #include <sys/queue.h>
+#include <sys/sysctl.h>
+
 #include "extern.h"
 #include "reentrant.h"
 
@@ -59,6 +64,21 @@
 	void (*fn)(void);
 };
 
+struct atfork_cb_header {
+	uint16_t	entries;
+	uint16_t	used;
+};
+
+struct atfork_cb_block {
+	union {
+		struct atfork_callback block;
+		struct atfork_cb_header hdr;
+	} u;
+};
+
+#define	cb_blocks(bp)	(&(bp)->u.block)
+#define	cb_ents(bp)	(bp)->u.hdr.entries
+#define	cb_used(bp)	(bp)->u.hdr.used
 
 /*
  * We need to keep a cache for of at least 6, one for prepare, one for parent,
@@ -70,6 +90,10 @@
  * space for 16 since it is just 256 bytes.
  */
 static struct atfork_callback atfork_builtin[16];
+static struct atfork_cb_block *atfork_storage = NULL;
+static int hw_pagesize = 0;
+
+static const int hw_pagesize_sysctl[2] = { CTL_HW, HW_PAGESIZE };
 
 /*
  * Hypothetically, we could protect the queues with a rwlock which is
@@ -86,27 +110,59 @@
 static struct atfork_callback_q parentq = SIMPLEQ_HEAD_INITIALIZER(parentq);
 static struct atfork_callback_q childq = SIMPLEQ_HEAD_INITIALIZER(childq);
 
+/*
+ * Nb: nothing allocated by this allocator is ever freed.
+ * (there is no API to free anything, and no need for one)
+ *
+ * The code relies upon this.
+ */
 static struct atfork_callback *
-af_alloc(void)
+af_alloc(unsigned int blocks)
 {
+	struct atfork_callback *result;
+
+	if (__predict_false(blocks == 0))
+		return NULL;
 
-	for (size_t i = 0; i < __arraycount(atfork_builtin); i++) {
-		if (atfork_builtin[i].fn == NULL)
-			return &atfork_builtin[i];
+	if (__predict_true(atfork_storage == NULL)) {
+		for (size_t i = 0; i < __arraycount(atfork_builtin); i++) {
+			if (atfork_builtin[i].fn == NULL) {
+				if (i + blocks <= __arraycount(atfork_builtin))
+					return &atfork_builtin[i];
+				else
+					break;
+			}
+		}
 	}
 
-	return malloc(sizeof(atfork_builtin));
-}
+	if (__predict_false(atfork_storage == NULL ||
+	    cb_used(atfork_storage) + blocks > cb_ents(atfork_storage))) {
+		if (__predict_false(hw_pagesize == 0)) {
+			size_t len = sizeof(hw_pagesize);
+
+			if (sysctl(hw_pagesize_sysctl, 2, &hw_pagesize,
+			    &len, NULL, 0) != 0)
+				return NULL;
+			if (len != sizeof(hw_pagesize))
+				return NULL;
+			if (hw_pagesize == 0 || (hw_pagesize & 0xFF) != 0)
+				return NULL;
+		}
+		atfork_storage = mmap(0, hw_pagesize, PROT_READ|PROT_WRITE,
+		    MAP_PRIVATE | MAP_ANON, -1, 0);
+		if (__predict_false(atfork_storage == NULL))
+			return NULL;
+		cb_used(atfork_storage) = 1;
+		cb_ents(atfork_storage) =
+		    (uint16_t)(hw_pagesize / sizeof(struct atfork_cb_block));
+		if (__predict_false(cb_ents(atfork_storage) < blocks + 1))
+			return NULL;
+	}
 
-static void
-af_free(struct atfork_callback *af)
-{
+	result = cb_blocks(atfork_storage) + cb_used(atfork_storage);
+	cb_used(atfork_storage) += blocks;
 
-	if (af >= atfork_builtin
-	    && af < atfork_builtin + __arraycount(atfork_builtin))
-		af->fn = NULL;
-	else
-		free(af);
+	return result;
 }
 
 int
@@ -117,42 +173,42 @@
 	sigset_t mask, omask;
 	int error;
 
-	newprepare = newparent = newchild = NULL;
-
 	sigfillset(&mask);
 	thr_sigsetmask(SIG_SETMASK, &mask, &omask);
 
 	mutex_lock(&atfork_lock);
+
+	/*
+	 * Note here that we either get all the blocks
+	 * we need, in one call, or we get NULL.
+	 *
+	 * Note also that a NULL return is not an error
+	 * if no blocks were required (all args == NULL)
+	 */
+	newprepare = af_alloc((prepare != NULL) +
+	    (parent != NULL) + (child != NULL));
+
+	error = ENOMEM;		/* in case of "goto out" */
+
+	newparent = newprepare;
 	if (prepare != NULL) {
-		newprepare = af_alloc();
-		if (newprepare == NULL) {
-			error = ENOMEM;
+		if (__predict_false(newprepare == NULL))
 			goto out;
-		}
 		newprepare->fn = prepare;
+		newparent++;
 	}
 
+	newchild = newparent;
 	if (parent != NULL) {
-		newparent = af_alloc();
-		if (newparent == NULL) {
-			if (newprepare != NULL)
-				af_free(newprepare);
-			error = ENOMEM;
+		if (__predict_false(newparent == NULL))
 			goto out;
-		}
 		newparent->fn = parent;
+		newchild++;
 	}
 
 	if (child != NULL) {
-		newchild = af_alloc();
-		if (newchild == NULL) {
-			if (newprepare != NULL)
-				af_free(newprepare);
-			if (newparent != NULL)
-				af_free(newparent);
-			error = ENOMEM;
+		if (__predict_false(newchild == NULL))
 			goto out;
-		}
 		newchild->fn = child;
 	}
 
@@ -168,9 +224,11 @@
 		SIMPLEQ_INSERT_TAIL(&parentq, newparent, next);
 	if (child)
 		SIMPLEQ_INSERT_TAIL(&childq, newchild, next);
+
 	error = 0;
 
-out:	mutex_unlock(&atfork_lock);
+ out:;
+	mutex_unlock(&atfork_lock);
 	thr_sigsetmask(SIG_SETMASK, &omask, NULL);
 	return error;
 }



Home | Main Index | Thread Index | Old Index