Subject: Some locking issues in subr_pool.c
To: NetBSD Kernel Technical Discussion List <tech-kern@netbsd.org>
From: Jason Thorpe <thorpej@shagadelic.org>
List: tech-kern
Date: 04/29/2005 11:59:17
--Apple-Mail-5--944996098
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
charset=US-ASCII;
delsp=yes;
format=flowed
Folks...
I noticed some locking issues in subr_pool.c the other day:
- Locking protocol for pr_rmpage() was hideous. Sometimes called
with the pool locked, sometimes not. And in the "not" case, it was
modifying fields normally protected by the lock.
- Fixing the above requires always deferring freeing the pool page to
the back-end allocator, because the pool must not be locked when the
pool_allocator is called (lock ordering is pool_allocator -> pool;
see pool_allocator_free()). This was a mostly mechanical change.
- When doing the above mechanical change, I noticed that pool_reclaim
() violates the pool_cache -> pool lock ordering. I didn't see a
good way to avoid that problem, so pool_cache_reclaim() has to use a
trylock, and bail out if it that fails. This is not a huge deal,
since we already trylock the pool at the beginning of pool_reclaim(),
and this is only an opportunistic memory reclamation anyway.
The following patch should fix these problems. I have not even tried
to compile it yet, but I would like to get comments on it.
-- thorpej
--Apple-Mail-5--944996098
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
x-unix-mode=0644;
name="pool-locking-patch.txt"
Content-Disposition: attachment;
filename=pool-locking-patch.txt
--- subr_pool.c 2005-04-27 16:12:13.000000000 -0700
+++ subr_pool.c.new 2005-04-29 07:17:09.000000000 -0700
@@ -174,7 +174,7 @@ struct pool_item {
/* The cache group pool. */
static struct pool pcgpool;
-static void pool_cache_reclaim(struct pool_cache *);
+static void pool_cache_reclaim(struct pool_cache *, struct pool_pagelist *);
static int pool_catchup(struct pool *);
static void pool_prime_page(struct pool *, caddr_t,
@@ -384,6 +384,23 @@ pr_find_pagehead(struct pool *pp, caddr_
return ph;
}
+static void
+pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq)
+{
+ struct pool_item_header *ph;
+ int s;
+
+ while ((ph = LIST_FIRST(&pq)) != NULL) {
+ LIST_REMOVE(ph, ph_pagelist);
+ pool_allocator_free(pp, ph->ph_page);
+ if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
+ s = splvm();
+ pool_put(pp->pr_phpool, ph);
+ splx(s);
+ }
+ }
+}
+
/*
* Remove a page from the pool.
*/
@@ -393,7 +410,7 @@ pr_rmpage(struct pool *pp, struct pool_i
{
int s;
- LOCK_ASSERT(!simple_lock_held(&pp->pr_slock) || pq != NULL);
+ LOCK_ASSERT(simple_lock_held(&pp->pr_slock));
/*
* If the page was idle, decrement the idle page count.
@@ -411,21 +428,13 @@ pr_rmpage(struct pool *pp, struct pool_i
pp->pr_nitems -= pp->pr_itemsperpage;
/*
- * Unlink a page from the pool and release it (or queue it for release).
+ * Unlink the page from the pool and queue it for release.
*/
LIST_REMOVE(ph, ph_pagelist);
if ((pp->pr_roflags & PR_PHINPAGE) == 0)
SPLAY_REMOVE(phtree, &pp->pr_phtree, ph);
- if (pq) {
- LIST_INSERT_HEAD(pq, ph, ph_pagelist);
- } else {
- pool_allocator_free(pp, ph->ph_page);
- if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
- s = splvm();
- pool_put(pp->pr_phpool, ph);
- splx(s);
- }
- }
+ LIST_INSERT_HEAD(pq, ph, ph_pagelist);
+
pp->pr_npages--;
pp->pr_npagefree++;
@@ -696,6 +705,7 @@ pool_init(struct pool *pp, size_t size,
void
pool_destroy(struct pool *pp)
{
+ struct pool_pagelist pq;
struct pool_item_header *ph;
struct pool_cache *pc;
int s;
@@ -707,7 +717,13 @@ pool_destroy(struct pool *pp)
simple_unlock(&pp->pr_alloc->pa_slock);
splx(s);
- /* Destroy all caches for this pool. */
+ /*
+ * Destroy all caches for this pool.
+ * XXXJRT We don't lock here, because the locking order is
+ * pool_cache -> pool. Maybe it doesn't matter because the
+ * pool is being destroyed anyway. Or maybe we should panic
+ * here if the pool_caches are not already gone.
+ */
while ((pc = TAILQ_FIRST(&pp->pr_cachelist)) != NULL)
pool_cache_destroy(pc);
@@ -720,8 +736,14 @@ pool_destroy(struct pool *pp)
#endif
/* Remove all pages */
+ LIST_INIT(&pq);
+ s = splvm();
+ simple_lock(&pp->pr_slock);
while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
- pr_rmpage(pp, ph, NULL);
+ pr_rmpage(pp, ph, &pq);
+ simple_unlock(&pp->pr_slock);
+ splx(s);
+ pr_pagelist_free(pp, &pq);
KASSERT(LIST_EMPTY(&pp->pr_fullpages));
KASSERT(LIST_EMPTY(&pp->pr_partpages));
@@ -1037,7 +1059,7 @@ pool_get(struct pool *pp, int flags)
* Internal version of pool_put(). Pool is already locked/entered.
*/
static void
-pool_do_put(struct pool *pp, void *v)
+pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq)
{
struct pool_item *pi = v;
struct pool_item_header *ph;
@@ -1125,9 +1147,7 @@ pool_do_put(struct pool *pp, void *v)
if (pp->pr_npages > pp->pr_minpages &&
(pp->pr_npages > pp->pr_maxpages ||
(pp->pr_alloc->pa_flags & PA_WANT) != 0)) {
- simple_unlock(&pp->pr_slock);
- pr_rmpage(pp, ph, NULL);
- simple_lock(&pp->pr_slock);
+ pr_rmpage(pp, ph, pq);
} else {
LIST_REMOVE(ph, ph_pagelist);
LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
@@ -1165,16 +1185,22 @@ pool_do_put(struct pool *pp, void *v)
void
_pool_put(struct pool *pp, void *v, const char *file, long line)
{
+ struct pool_pagelist pq;
+
+ LIST_INIT(&pq);
simple_lock(&pp->pr_slock);
pr_enter(pp, file, line);
pr_log(pp, v, PRLOG_PUT, file, line);
- pool_do_put(pp, v);
+ pool_do_put(pp, v, &pq);
pr_leave(pp);
simple_unlock(&pp->pr_slock);
+
+ if (! LIST_EMPTY(&pq))
+ pr_pagelist_free(pp, &pq);
}
#undef pool_put
#endif /* POOL_DIAGNOSTIC */
@@ -1182,12 +1208,18 @@ _pool_put(struct pool *pp, void *v, cons
void
pool_put(struct pool *pp, void *v)
{
+ struct pool_pagelist pq;
+
+ LIST_INIT(&pq);
simple_lock(&pp->pr_slock);
- pool_do_put(pp, v);
+ pool_do_put(pp, v, &pq);
simple_unlock(&pp->pr_slock);
+
+ if (! LIST_EMPTY(&pq))
+ pr_pagelist_free(pp, &pq);
}
#ifdef POOL_DIAGNOSTIC
@@ -1469,7 +1501,7 @@ pool_reclaim(struct pool *pp)
* Reclaim items from the pool's caches.
*/
TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist)
- pool_cache_reclaim(pc);
+ pool_cache_reclaim(pc, &pq);
s = splclock();
curtime = mono_time;
@@ -1503,17 +1535,7 @@ pool_reclaim(struct pool *pp)
if (LIST_EMPTY(&pq))
return (0);
- while ((ph = LIST_FIRST(&pq)) != NULL) {
- LIST_REMOVE(ph, ph_pagelist);
- pool_allocator_free(pp, ph->ph_page);
- if (pp->pr_roflags & PR_PHINPAGE) {
- continue;
- }
- s = splvm();
- pool_put(pp->pr_phpool, ph);
- splx(s);
- }
-
+ pr_pagelist_free(pp, &pq);
return (1);
}
@@ -2024,19 +2046,20 @@ pool_cache_destruct_object(struct pool_c
}
/*
- * pool_cache_do_invalidate:
+ * pool_cache_invalidate:
*
- * This internal function implements pool_cache_invalidate() and
- * pool_cache_reclaim().
+ * Invalidate a pool cache (destruct and release all of the
+ * cached objects).
*/
-static void
-pool_cache_do_invalidate(struct pool_cache *pc, int free_groups,
- void (*putit)(struct pool *, void *))
+void
+pool_cache_invalidate(struct pool_cache *pc)
{
struct pool_cache_group *pcg, *npcg;
void *object;
int s;
+ simple_lock(&pc->pc_slock);
+
for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL;
pcg = npcg) {
npcg = TAILQ_NEXT(pcg, pcg_list);
@@ -2047,32 +2070,15 @@ pool_cache_do_invalidate(struct pool_cac
pc->pc_allocfrom = NULL;
if (pc->pc_dtor != NULL)
(*pc->pc_dtor)(pc->pc_arg, object);
- (*putit)(pc->pc_pool, object);
- }
- if (free_groups) {
- pc->pc_ngroups--;
- TAILQ_REMOVE(&pc->pc_grouplist, pcg, pcg_list);
- if (pc->pc_freeto == pcg)
- pc->pc_freeto = NULL;
- s = splvm();
- pool_put(&pcgpool, pcg);
- splx(s);
+ /*
+ * XXXJRT Could improve this by calling
+ * pool_do_put() and freeing the entire
+ * page list at the end.
+ */
+ pool_put(pc->pc_pool, object);
}
}
-}
-/*
- * pool_cache_invalidate:
- *
- * Invalidate a pool cache (destruct and release all of the
- * cached objects).
- */
-void
-pool_cache_invalidate(struct pool_cache *pc)
-{
-
- simple_lock(&pc->pc_slock);
- pool_cache_do_invalidate(pc, 0, pool_put);
simple_unlock(&pc->pc_slock);
}
@@ -2082,11 +2088,42 @@ pool_cache_invalidate(struct pool_cache
* Reclaim a pool cache for pool_reclaim().
*/
static void
-pool_cache_reclaim(struct pool_cache *pc)
+pool_cache_reclaim(struct pool_cache *pc, struct pool_pagelist *pq)
{
+ struct pool_cache_group *pcg, *npcg;
+ void *object;
+ int s;
+
+ /*
+ * We're locking in the wrong order (normally pool_cache -> pool,
+ * but the pool is already locked when we get here), so we have
+ * to use trylock. If we can't lock the pool_cache, it's not really
+ * a big deal here.
+ */
+ if (simple_lock_try(&pc->pc_slock) == 0)
+ return;
+
+ for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL;
+ pcg = npcg) {
+ npcg = TAILQ_NEXT(pcg, pcg_list);
+ while (pcg->pcg_avail != 0) {
+ pc->pc_nitems--;
+ object = pcg_get(pcg, NULL);
+ if (pcg->pcg_avail == 0 && pc->pc_allocfrom == pcg)
+ pc->pc_allocfrom = NULL;
+ if (pc->pc_dtor != NULL)
+ (*pc->pc_dtor)(pc->pc_arg, object);
+ pool_do_put(pc->pc_pool, object, pq);
+ }
+ pc->pc_ngroups--;
+ TAILQ_REMOVE(&pc->pc_grouplist, pcg, pcg_list);
+ if (pc->pc_freeto == pcg)
+ pc->pc_freeto = NULL;
+ s = splvm();
+ pool_put(&pcgpool, pcg);
+ splx(s);
+ }
- simple_lock(&pc->pc_slock);
- pool_cache_do_invalidate(pc, 1, pool_do_put);
simple_unlock(&pc->pc_slock);
}
--Apple-Mail-5--944996098--