Subject: pool(9) revisited
To: None <tech-kern@NetBSD.ORG>
From: Paul Kranenburg <pk@cs.few.eur.nl>
List: tech-kern
Date: 07/01/1998 17:53:44
And here's a draft implementation.
Some notes:
- probably need to keep pages sorted on # of free items in them
- linear search for the page a returned item belongs to;
resolv to hashing?
- a single threshold for the canonical size of a pool will
likely make it behave suboptimal under heavy load.
Suggestions welcome.
------------------------------
/* $NetBSD: pool.h,v 1.3 1998/02/19 23:51:48 pk Exp $ */
/*-
* Copyright (c) 1997 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Paul Kranenburg.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the NetBSD
* Foundation, Inc. and its contributors.
* 4. Neither the name of The NetBSD Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _SYS_POOL_H_
#define _SYS_POOL_H_
#include <sys/queue.h>
typedef struct pool {
TAILQ_HEAD(,pool_item) pr_pagelist; /* Allocated pages */
struct pool_item *pr_curpage;
unsigned int pr_npages; /* # of pages allocated */
unsigned int pr_hiwat; /* nominal # of pages in pool */
unsigned int pr_pagesz; /* page size, must be 2^n */
unsigned long pr_pagemask; /* abbrev. of above */
unsigned int pr_itemsperpage;/* # items that fit in a page */
unsigned int pr_size; /* Size of item */
void *(*pr_alloc) __P((unsigned long, int, int));
void (*pr_free) __P((void *, unsigned long, int));
int pr_mtype; /* memory allocator tag */
char *pr_wchan; /* tsleep(9) identifier */
unsigned int pr_flags;
#define PR_MALLOCOK 1
#define PR_WAITOK 2
#define PR_WANTED 4
#define PR_STATIC 8
#define PR_FREEHEADER 16
#define PR_URGENT 32
struct simplelock pr_lock;
/*
* Instrumentation
*/
unsigned long pr_nalloc;
unsigned long pr_nfree;
unsigned long pr_npagealloc;
unsigned long pr_npagefree;
} *pool_handle_t;
pool_handle_t pool_create __P((size_t, int, char *, size_t,
void *(*)__P((unsigned long, int, int)),
void (*)__P((void *, unsigned long, int)),
int));
void pool_init __P((struct pool *, size_t, int, char *, size_t,
void *(*)__P((unsigned long, int, int)),
void (*)__P((void *, unsigned long, int)),
int));
void pool_destroy __P((pool_handle_t));
void *pool_get __P((pool_handle_t, int));
void pool_put __P((pool_handle_t, void *));
int pool_prime __P((pool_handle_t, int, caddr_t));
void pool_sethiwat __P((pool_handle_t, int));
void pool_print __P((pool_handle_t, char *));
#define POOL_ITEM_STORAGE_SIZE(size, nitems) (ALIGN(size) * nitems)
#define POOL_STORAGE_SIZE(size, nitems) \
(ALIGN(sizeof(struct pool)) + POOL_ITEM_STORAGE_SIZE(size, nitems))
#endif /* _SYS_POOL_H_ */
------------------------------
/* $NetBSD: subr_pool.c,v 1.2 1998/02/19 23:52:14 pk Exp $ */
/*-
* Copyright (c) 1997 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Paul Kranenburg.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the NetBSD
* Foundation, Inc. and its contributors.
* 4. Neither the name of The NetBSD Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/lock.h>
#include <sys/pool.h>
#include <vm/vm.h>
#include <vm/vm_kern.h>
#if defined(UVM)
#include <uvm/uvm.h>
#endif
/*
* Pool resource management utility.
*
* Memory is allocated in pages which are split into pieces according
* to the pool item size. Each page is kept on a list headed by `pr_pagelist'
* in the pool structure and the individual pool items are on a linked list
* headed by `pi_buckethead' in each page header. The memory for building
* the page list is taken from the allocated pages themselves.
*
*/
struct pool_item {
/* Page headers */
TAILQ_ENTRY(pool_item) pi_pagelist; /* pool page list */
TAILQ_HEAD(,pool_item) pi_buckethead; /* chunk list for this page */
int pi_nmissing; /* # of chunks in use */
int pi_flag;
#define PI_KEEPHEAD 1
/* XXX - flag could move to pool header.. */
caddr_t pi_page; /* this page's address */
/* Other entries use only this list entry */
TAILQ_ENTRY(pool_item) pi_bucketlist;
};
#define PH_PAGE_DEPLETED(ph) \
(((ph)->pi_flag & PI_KEEPHEAD) && ph->pi_buckethead.tqh_first == NULL)
static struct pool_item *pr_find_pagehead __P((struct pool *, caddr_t));
static void pool_rm_page __P((struct pool *, struct pool_item *));
static int pool_prime_page __P((struct pool *, caddr_t));
static void *pool_page_alloc __P((unsigned long, int, int));
static void pool_page_free __P((void *, unsigned long, int));
static __inline__ struct pool_item *
pr_find_pagehead(pp, page)
struct pool *pp;
caddr_t page;
{
struct pool_item *ph;
/* XXX - linear search... */
for (ph = pp->pr_pagelist.tqh_first; ph != NULL;
ph = ph->pi_pagelist.tqe_next) {
if (ph->pi_page == page)
return (ph);
}
return (NULL);
}
static __inline__ void
pool_rm_page(pp, ph)
struct pool *pp;
struct pool_item *ph;
{
/*
* Unlink a page from the pool and release it.
*/
pp->pr_npagefree++;
TAILQ_REMOVE(&pp->pr_pagelist, ph, pi_pagelist);
(*pp->pr_free)(ph->pi_page, pp->pr_pagesz, pp->pr_mtype);
pp->pr_npages--;
if (pp->pr_curpage == ph) {
/*
* Find a new non-empty page header, if any.
* Start search from the page head, to increase the
* chance for "high water" pages to be freed.
*/
for (ph = pp->pr_pagelist.tqh_first; ph != NULL;
ph = ph->pi_pagelist.tqe_next)
if (!PH_PAGE_DEPLETED(ph))
break;
pp->pr_curpage = ph;
}
}
struct pool *
pool_create(size, nitems, wchan, pagesz, alloc, release, mtype)
size_t size;
int nitems;
char *wchan;
size_t pagesz;
void *(*alloc) __P((unsigned long, int, int));
void (*release) __P((void *, unsigned long, int));
int mtype;
{
struct pool *pp;
pp = (struct pool *)malloc(sizeof(*pp), mtype, M_NOWAIT);
if (pp == NULL)
return (NULL);
pool_init(pp, size, PR_FREEHEADER, wchan, pagesz,
alloc, release, mtype);
if (nitems != 0) {
if (pool_prime(pp, nitems, NULL) != 0) {
pool_destroy(pp);
return (NULL);
}
}
return (pp);
}
void
pool_init(pp, size, flags, wchan, pagesz, alloc, release, mtype)
struct pool *pp;
size_t size;
int flags;
char *wchan;
size_t pagesz;
void *(*alloc) __P((unsigned long, int, int));
void (*release) __P((void *, unsigned long, int));
int mtype;
{
if (!powerof2(pagesz))
panic("pool_init: page size invalid (%lx)\n", (u_long)pagesz);
if (alloc == NULL)
alloc = pool_page_alloc;
if (release == NULL)
release = pool_page_free;
if (pagesz == 0)
pagesz = NBPG;
/*
* Initialize the pool structure.
*/
TAILQ_INIT(&pp->pr_pagelist);
pp->pr_curpage = NULL;
pp->pr_npages = 0;
pp->pr_hiwat = 0;
pp->pr_flags = flags;
pp->pr_size = ALIGN(size);
pp->pr_wchan = wchan;
pp->pr_mtype = mtype;
pp->pr_alloc = alloc;
pp->pr_free = release;
pp->pr_pagesz = pagesz;
pp->pr_pagemask = ~(pagesz - 1);
pp->pr_itemsperpage = pagesz / pp->pr_size;
pp->pr_nalloc = 0;
pp->pr_nfree = 0;
pp->pr_npagealloc = 0;
pp->pr_npagefree = 0;
simple_lock_init(&pp->pr_lock);
return;
}
/*
* De-commision a pool resource.
*/
void
pool_destroy(pp)
struct pool *pp;
{
struct pool_item *ph;
#ifdef DIAGNOSTIC
if (pp->pr_nalloc - pp->pr_nfree != 0)
panic("pool_destroy: pool busy: still out: %lu\n",
pp->pr_nalloc - pp->pr_nfree);
#endif
if ((pp->pr_flags & PR_STATIC) == 0)
while ((ph = pp->pr_pagelist.tqh_first) != NULL)
pool_rm_page(pp, ph);
if (pp->pr_flags & PR_FREEHEADER)
free(pp, pp->pr_mtype);
}
/*
* Grab an item from the pool; must be called at appropriate spl level
*/
void *
pool_get(pp, flags)
struct pool *pp;
int flags;
{
struct pool_item *pi, *ph;
#ifdef DIAGNOSTIC
if ((pp->pr_flags & PR_STATIC) && (flags & PR_MALLOCOK))
panic("pool_get: static");
#endif
simple_lock(&pp->pr_lock);
/*
* The convention we use is that if `curpage' is not NULL, then
* it points at a non-empty bucket. In particular, `curpage'
* never points at a page header which has PI_KEEPHEAD set and
* has no items in its bucket.
*/
again:
if ((ph = pp->pr_curpage) == NULL) {
void *v = (*pp->pr_alloc)(pp->pr_pagesz, flags, pp->pr_mtype);
if (v == NULL) {
if (flags & PR_URGENT)
panic("pool_get: urgent");
if ((flags & PR_WAITOK) == 0) {
simple_unlock(&pp->pr_lock);
return (NULL);
}
pp->pr_flags |= PR_WANTED;
simple_unlock(&pp->pr_lock);
tsleep((caddr_t)pp, PSWP, pp->pr_wchan, 0);
simple_lock(&pp->pr_lock);
} else {
pp->pr_npagealloc++;
pool_prime_page(pp, v);
}
goto again;
}
if ((pi = ph->pi_buckethead.tqh_first) == NULL) {
#ifdef DIAGNOSTIC
if (pp->pr_size < sizeof(struct pool_item))
panic("pool_get: must keep page header for item size %u", pp->pr_size);
#endif
/*
* Last item in bucket is gone; remove from page list
*/
TAILQ_REMOVE(&pp->pr_pagelist, ph, pi_pagelist);
pi = ph;
ph = NULL;
} else {
/*
* Remove from bucket list.
*/
TAILQ_REMOVE(&ph->pi_buckethead, pi, pi_bucketlist);
ph->pi_nmissing++;
if (PH_PAGE_DEPLETED(ph))
ph = NULL;
}
if (ph == NULL) {
/*
* Find a new non-empty page header, if any.
* Start search from the page head, to increase the
* chance for "high water" pages to be freed.
*/
for (ph = pp->pr_pagelist.tqh_first; ph != NULL;
ph = ph->pi_pagelist.tqe_next)
if (!PH_PAGE_DEPLETED(ph))
break;
pp->pr_curpage = ph;
}
pp->pr_nalloc++;
simple_unlock(&pp->pr_lock);
return ((void *)pi);
}
/*
* Return resource to the pool; must be called at appropriate spl level
*/
void
pool_put(pp, v)
struct pool *pp;
void *v;
{
struct pool_item *pi = v;
struct pool_item *ph;
caddr_t page;
page = (caddr_t)((u_long)v & pp->pr_pagemask);
simple_lock(&pp->pr_lock);
pp->pr_nfree++;
if ((ph = pr_find_pagehead(pp, page)) == NULL) {
/*
* Make this returned item the bucket header.
*/
#ifdef DIAGNOSTIC
if (pp->pr_size < sizeof(struct pool_item))
panic("pool_put: should have page header for item size %u", pp->pr_size);
#endif
ph = pi;
TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, pi_pagelist);
TAILQ_INIT(&ph->pi_buckethead);
ph->pi_nmissing = pp->pr_itemsperpage - 1;
ph->pi_page = page;
ph->pi_flag = 0;
} else {
/*
* Return to bucket.
*/
TAILQ_INSERT_TAIL(&ph->pi_buckethead, pi, pi_bucketlist);
ph->pi_nmissing--;
}
/* Cancel "pool empty" condition if it exists */
if (pp->pr_curpage == NULL)
pp->pr_curpage = ph;
if (pp->pr_flags & PR_WANTED) {
pp->pr_flags &= ~PR_WANTED;
wakeup((caddr_t)pp);
simple_unlock(&pp->pr_lock);
return;
}
if (ph->pi_nmissing == 0 && pp->pr_npages > pp->pr_hiwat) {
/*
* The pool is over the high water mark and this page
* is now complete, so release it.
*/
#ifdef DIAGNOSTIC
if (pp->pr_flags & PR_STATIC) {
/* can't happen because hiwat > freecount */
panic("pool_put: static");
}
#endif
pool_rm_page(pp, ph);
}
simple_unlock(&pp->pr_lock);
}
/*
* Add N items to the pool.
*/
int
pool_prime(pp, n, storage)
struct pool *pp;
int n;
caddr_t storage;
{
caddr_t cp;
#ifdef DIAGNOSTIC
if (storage && !(pp->pr_flags & PR_STATIC))
panic("pool_prime: static");
/* !storage && static caught below */
#endif
simple_lock(&pp->pr_lock);
while (n > 0) {
if (pp->pr_flags & PR_STATIC) {
cp = storage;
storage += pp->pr_pagesz;
} else {
cp = (*pp->pr_alloc)(pp->pr_pagesz, 0, pp->pr_mtype);
}
if (cp == NULL) {
simple_unlock(&pp->pr_lock);
return (ENOMEM);
}
pool_prime_page(pp, cp);
pp->pr_hiwat++;
n -= pp->pr_itemsperpage;
}
simple_unlock(&pp->pr_lock);
return (0);
}
/*
* Add a page worth of items to the pool
*/
int
pool_prime_page(pp, storage)
struct pool *pp;
caddr_t storage;
{
struct pool_item *pi, *ph;
caddr_t cp = storage;
int n, m;
ph = (struct pool_item *)cp;
/*
* First, determine how many chunks we need for our
* page header. If that is more than one, we need
* to hold on to the list head.
*/
m = howmany(sizeof(struct pool_item),pp->pr_size);
ph->pi_flag = (m > 1) ? PI_KEEPHEAD : 0;
/*
* Insert page header.
*/
TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, pi_pagelist);
TAILQ_INIT(&ph->pi_buckethead);
ph->pi_page = storage;
ph->pi_nmissing = 0;
/*
* Insert remaining chunks on the bucket list.
*/
cp = (caddr_t)(cp + m * pp->pr_size);
n = pp->pr_itemsperpage - m;
while (n--) {
pi = (struct pool_item *)cp;
cp = (caddr_t)(cp + pp->pr_size);
/* Insert on page list */
TAILQ_INSERT_TAIL(&ph->pi_buckethead, pi, pi_bucketlist);
}
/*
* If the pool was depleted, point at the new page.
*/
if (pp->pr_curpage == NULL)
pp->pr_curpage = ph;
pp->pr_npages++;
return (0);
}
void
pool_sethiwat(pp, n)
pool_handle_t pp;
int n;
{
pp->pr_hiwat = roundup(pp->pr_itemsperpage,n) / pp->pr_itemsperpage;
}
/*
* Default page allocator
*/
static void *
pool_page_alloc(sz, flags, mtype)
unsigned long sz;
int flags;
int mtype;
{
vm_offset_t va;
#if defined(UVM)
va = uvm_km_kmemalloc(kmem_map, uvmexp.kmem_object,
(vm_size_t)sz, UVM_KMF_NOWAIT);
#else
va = kmem_malloc(kmem_map, (vm_size_t)sz, 0);
#endif
return ((void *)va);
}
static void
pool_page_free(v, sz, mtype)
void *v;
unsigned long sz;
int mtype;
{
#if defined(UVM)
uvm_km_free(kmem_map, (vm_offset_t)v, sz);
#else
kmem_free(kmem_map, (vm_offset_t)v, sz);
#endif
}
void
pool_print(pp, label)
struct pool *pp;
char *label;
{
if (label != NULL)
printf("%s: ", label);
printf("pool %s: nalloc %lu nfree %lu npagealloc %lu npagefree %lu\n"
" npages %u hiwat %u itemsperpage %u\n",
pp->pr_wchan,
pp->pr_nalloc,
pp->pr_nfree,
pp->pr_npagealloc,
pp->pr_npagefree,
pp->pr_npages,
pp->pr_hiwat,
pp->pr_itemsperpage);
}