Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys two changes in improve scalability:
details: https://anonhg.NetBSD.org/src/rev/390e3ef48f7e
branches: trunk
changeset: 555269:390e3ef48f7e
user: chs <chs%NetBSD.org@localhost>
date: Thu Nov 13 02:44:01 2003 +0000
description:
two changes in improve scalability:
(1) split the single list of pages allocated to a pool into three lists:
completely full, partially full, and completely empty.
there is no longer any need to traverse any list looking for a
certain type of page.
(2) replace the 8-element hash table for out-of-page page headers
with a splay tree.
these two changes (together with the recent enhancements to the wait code)
give us linear scaling for a fork+exit microbenchmark.
diffstat:
sys/kern/subr_pool.c | 410 ++++++++++++++++++++++++++------------------------
sys/sys/pool.h | 17 +-
sys/uvm/uvm_map.c | 8 +-
3 files changed, 232 insertions(+), 203 deletions(-)
diffs (truncated from 791 to 300 lines):
diff -r a76015bb29d0 -r 390e3ef48f7e sys/kern/subr_pool.c
--- a/sys/kern/subr_pool.c Thu Nov 13 02:34:24 2003 +0000
+++ b/sys/kern/subr_pool.c Thu Nov 13 02:44:01 2003 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: subr_pool.c,v 1.87 2003/04/09 18:22:13 thorpej Exp $ */
+/* $NetBSD: subr_pool.c,v 1.88 2003/11/13 02:44:01 chs Exp $ */
/*-
* Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
@@ -38,7 +38,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.87 2003/04/09 18:22:13 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.88 2003/11/13 02:44:01 chs Exp $");
#include "opt_pool.h"
#include "opt_poollog.h"
@@ -59,12 +59,14 @@
/*
* Pool resource management utility.
*
- * Memory is allocated in pages which are split into pieces according
- * to the pool item size. Each page is kept on a list headed by `pr_pagelist'
- * in the pool structure and the individual pool items are on a linked list
- * headed by `ph_itemlist' in each page header. The memory for building
- * the page list is either taken from the allocated pages themselves (for
- * small pool items) or taken from an internal pool of page headers (`phpool').
+ * Memory is allocated in pages which are split into pieces according to
+ * the pool item size. Each page is kept on one of three lists in the
+ * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
+ * for empty, full and partially-full pages respectively. The individual
+ * pool items are on a linked list headed by `ph_itemlist' in each page
+ * header. The memory for building the page list is either taken from
+ * the allocated pages themselves (for small pool items) or taken from
+ * an internal pool of page headers (`phpool').
*/
/* List of all pools */
@@ -89,16 +91,15 @@
struct pool_item_header {
/* Page headers */
- TAILQ_ENTRY(pool_item_header)
+ LIST_ENTRY(pool_item_header)
ph_pagelist; /* pool page list */
TAILQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */
- LIST_ENTRY(pool_item_header)
- ph_hashlist; /* Off-page page headers */
+ SPLAY_ENTRY(pool_item_header)
+ ph_node; /* Off-page page headers */
unsigned int ph_nmissing; /* # of chunks in use */
caddr_t ph_page; /* this page's address */
struct timeval ph_time; /* last referenced */
};
-TAILQ_HEAD(pool_pagelist,pool_item_header);
struct pool_item {
#ifdef DIAGNOSTIC
@@ -109,10 +110,6 @@
TAILQ_ENTRY(pool_item) pi_list;
};
-#define PR_HASH_INDEX(pp,addr) \
- (((u_long)(addr) >> (pp)->pr_alloc->pa_pageshift) & \
- (PR_HASHTABSIZE - 1))
-
#define POOL_NEEDS_CATCHUP(pp) \
((pp)->pr_nitems < (pp)->pr_minitems)
@@ -150,13 +147,19 @@
static int pool_catchup(struct pool *);
static void pool_prime_page(struct pool *, caddr_t,
struct pool_item_header *);
+static void pool_update_curpage(struct pool *);
void *pool_allocator_alloc(struct pool *, int);
void pool_allocator_free(struct pool *, void *);
+static void pool_print_pagelist(struct pool_pagelist *,
+ void (*)(const char *, ...));
static void pool_print1(struct pool *, const char *,
void (*)(const char *, ...));
+static int pool_chk_page(struct pool *, const char *,
+ struct pool_item_header *);
+
/*
* Pool log entry. An array of these is allocated in pool_init().
*/
@@ -275,24 +278,34 @@
#define pr_enter_check(pp, pr)
#endif /* POOL_DIAGNOSTIC */
+static __inline int
+phtree_compare(struct pool_item_header *a, struct pool_item_header *b)
+{
+ if (a->ph_page < b->ph_page)
+ return (-1);
+ else if (a->ph_page > b->ph_page)
+ return (1);
+ else
+ return (0);
+}
+
+SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare);
+SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare);
+
/*
* Return the pool page header based on page address.
*/
static __inline struct pool_item_header *
pr_find_pagehead(struct pool *pp, caddr_t page)
{
- struct pool_item_header *ph;
+ struct pool_item_header *ph, tmp;
if ((pp->pr_roflags & PR_PHINPAGE) != 0)
return ((struct pool_item_header *)(page + pp->pr_phoffset));
- for (ph = LIST_FIRST(&pp->pr_hashtab[PR_HASH_INDEX(pp, page)]);
- ph != NULL;
- ph = LIST_NEXT(ph, ph_hashlist)) {
- if (ph->ph_page == page)
- return (ph);
- }
- return (NULL);
+ tmp.ph_page = page;
+ ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp);
+ return ph;
}
/*
@@ -322,13 +335,13 @@
/*
* Unlink a page from the pool and release it (or queue it for release).
*/
- TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
+ LIST_REMOVE(ph, ph_pagelist);
if (pq) {
- TAILQ_INSERT_HEAD(pq, ph, ph_pagelist);
+ LIST_INSERT_HEAD(pq, ph, ph_pagelist);
} else {
pool_allocator_free(pp, ph->ph_page);
if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
- LIST_REMOVE(ph, ph_hashlist);
+ SPLAY_REMOVE(phtree, &pp->pr_phtree, ph);
s = splvm();
pool_put(&phpool, ph);
splx(s);
@@ -337,18 +350,7 @@
pp->pr_npages--;
pp->pr_npagefree++;
- if (pp->pr_curpage == ph) {
- /*
- * Find a new non-empty page header, if any.
- * Start search from the page head, to increase the
- * chance for "high water" pages to be freed.
- */
- TAILQ_FOREACH(ph, &pp->pr_pagelist, ph_pagelist)
- if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
- break;
-
- pp->pr_curpage = ph;
- }
+ pool_update_curpage(pp);
}
/*
@@ -361,7 +363,7 @@
pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
const char *wchan, struct pool_allocator *palloc)
{
- int off, slack, i;
+ int off, slack;
#ifdef POOL_DIAGNOSTIC
/*
@@ -425,7 +427,9 @@
/*
* Initialize the pool structure.
*/
- TAILQ_INIT(&pp->pr_pagelist);
+ LIST_INIT(&pp->pr_emptypages);
+ LIST_INIT(&pp->pr_fullpages);
+ LIST_INIT(&pp->pr_partpages);
TAILQ_INIT(&pp->pr_cachelist);
pp->pr_curpage = NULL;
pp->pr_npages = 0;
@@ -465,9 +469,7 @@
/* The page header will be taken from our page header pool */
pp->pr_phoffset = 0;
off = palloc->pa_pagesz;
- for (i = 0; i < PR_HASHTABSIZE; i++) {
- LIST_INIT(&pp->pr_hashtab[i]);
- }
+ SPLAY_INIT(&pp->pr_phtree);
}
/*
@@ -570,8 +572,10 @@
#endif
/* Remove all pages */
- while ((ph = TAILQ_FIRST(&pp->pr_pagelist)) != NULL)
+ while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
pr_rmpage(pp, ph, NULL);
+ KASSERT(LIST_EMPTY(&pp->pr_fullpages));
+ KASSERT(LIST_EMPTY(&pp->pr_partpages));
/* Remove from global pool list */
simple_lock(&pool_head_slock);
@@ -600,7 +604,7 @@
pp->pr_drain_hook_arg = arg;
}
-static __inline struct pool_item_header *
+static struct pool_item_header *
pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags)
{
struct pool_item_header *ph;
@@ -773,7 +777,6 @@
/* Start the allocation process over. */
goto startover;
}
-
if (__predict_false((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL)) {
pr_leave(pp);
simple_unlock(&pp->pr_slock);
@@ -814,9 +817,16 @@
panic("pool_get: nidle inconsistent");
#endif
pp->pr_nidle--;
+
+ /*
+ * This page was previously empty. Move it to the list of
+ * partially-full pages. This page is already curpage.
+ */
+ LIST_REMOVE(ph, ph_pagelist);
+ LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
}
ph->ph_nmissing++;
- if (TAILQ_FIRST(&ph->ph_itemlist) == NULL) {
+ if (TAILQ_EMPTY(&ph->ph_itemlist)) {
#ifdef DIAGNOSTIC
if (__predict_false(ph->ph_nmissing != pp->pr_itemsperpage)) {
pr_leave(pp);
@@ -826,23 +836,12 @@
}
#endif
/*
- * Find a new non-empty page header, if any.
- * Start search from the page head, to increase
- * the chance for "high water" pages to be freed.
- *
- * Migrate empty pages to the end of the list. This
- * will speed the update of curpage as pages become
- * idle. Empty pages intermingled with idle pages
- * is no big deal. As soon as a page becomes un-empty,
- * it will move back to the head of the list.
+ * This page is now full. Move it to the full list
+ * and select a new current page.
*/
- TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
- TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist);
- TAILQ_FOREACH(ph, &pp->pr_pagelist, ph_pagelist)
- if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
- break;
-
- pp->pr_curpage = ph;
+ LIST_REMOVE(ph, ph_pagelist);
+ LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist);
+ pool_update_curpage(pp);
}
pp->pr_nget++;
@@ -935,18 +934,15 @@
}
/*
- * If this page is now complete, do one of two things:
+ * If this page is now empty, do one of two things:
*
- * (1) If we have more pages than the page high water
- * mark, free the page back to the system.
+ * (1) If we have more pages than the page high water mark,
+ * free the page back to the system.
*
- * (2) Move it to the end of the page list, so that
- * we minimize our chances of fragmenting the
- * pool. Idle pages migrate to the end (along with
- * completely empty pages, so that we find un-empty
- * pages more quickly when we update curpage) of the
- * list so they can be more easily swept up by
- * the pagedaemon when pages are scarce.
+ * (2) Otherwise, move the page to the empty page list.
+ *
+ * Either way, select a new current page (so we use a partially-full
+ * page if one is available).
Home |
Main Index |
Thread Index |
Old Index