Subject: Re: kern/33185: kva shortage problems
To: None <kern-bug-people@netbsd.org, gnats-admin@netbsd.org,>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: netbsd-bugs
Date: 04/06/2006 11:30:03
The following reply was made to PR kern/33185; it has been noted by GNATS.
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
To: gnats-bugs@netbsd.org
Cc: tech-kern@netbsd.org
Subject: Re: kern/33185: kva shortage problems
Date: Thu, 06 Apr 2006 20:26:42 +0900
--NextPart-20060406202103-1948600
Content-Type: Text/Plain; charset=us-ascii
> 1. lack of kva reclamation mechanism.
> http://mail-index.NetBSD.org/tech-kern/2005/12/17/0028.html
the attached diff is an attempt to fix this part.
it basically moves wait points for kva to vm_map.
comments?
YAMAMOTO Takashi
--NextPart-20060406202103-1948600
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="a.diff"
Index: sys/callback.h
===================================================================
--- sys/callback.h (revision 0)
+++ sys/callback.h (revision 0)
@@ -0,0 +1,57 @@
+/* $NetBSD$ */
+
+/*-
+ * Copyright (c)2006 YAMAMOTO Takashi,
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _SYS_CALLBACK_H_
+#define _SYS_CALLBACK_H_
+
+struct callback_entry {
+ TAILQ_ENTRY(callback_entry) ce_q;
+ int (*ce_func)(struct callback_entry *, void *, void *);
+ void *ce_obj;
+};
+
+struct callback_head {
+ struct simplelock ch_lock;
+ TAILQ_HEAD(, callback_entry) ch_q;
+ struct callback_entry *ch_next;
+ int ch_nentries;
+ int ch_running;
+ int ch_flags;
+};
+
+/* return values of ce_func */
+#define CALLBACK_CHAIN_CONTINUE 0
+#define CALLBACK_CHAIN_ABORT 1
+
+int callback_run_roundrobin(struct callback_head *, void *);
+void callback_register(struct callback_head *, struct callback_entry *,
+ void *, int (*)(struct callback_entry *, void *, void *));
+void callback_unregister(struct callback_head *, struct callback_entry *);
+void callback_head_init(struct callback_head *);
+
+#endif /* !_SYS_CALLBACK_H_ */
Index: sys/pool.h
===================================================================
--- sys/pool.h (revision 1574)
+++ sys/pool.h (working copy)
@@ -53,6 +53,9 @@
#include <sys/queue.h>
#include <sys/time.h>
#include <sys/tree.h>
+#if defined(_KERNEL)
+#include <sys/callback.h>
+#endif /* defined(_KERNEL) */
#endif
#define PCG_NOBJECTS 16
@@ -108,9 +111,17 @@ struct pool_allocator {
TAILQ_HEAD(, pool) pa_list; /* list of pools using this allocator */
int pa_flags;
#define PA_INITIALIZED 0x01
-#define PA_WANT 0x02 /* wakeup any sleeping pools on free */
int pa_pagemask;
int pa_pageshift;
+ struct vm_map *pa_backingmap;
+#if defined(_KERNEL)
+ struct {
+ struct vm_map **i_backingmapptr;
+ SLIST_ENTRY(pool_allocator) i_q;
+ } pa_init;
+#define pa_q pa_init.i_q
+#define pa_backingmapptr pa_init.i_backingmapptr
+#endif /* defined(_KERNEL) */
};
LIST_HEAD(pool_pagelist,pool_item_header);
@@ -205,6 +216,8 @@ struct pool {
const char *pr_entered_file; /* reentrancy check */
long pr_entered_line;
+
+ struct callback_entry pr_reclaimerentry;
};
#endif /* __POOL_EXPOSE */
Index: kern/subr_pool.c
===================================================================
--- kern/subr_pool.c (revision 1589)
+++ kern/subr_pool.c (working copy)
@@ -82,12 +82,16 @@ static struct pool phpool[PHPOOL_MAX];
static struct pool psppool;
#endif
+static SLIST_HEAD(, pool_allocator) pa_deferinitq =
+ SLIST_HEAD_INITIALIZER(pa_deferinitq);
+
static void *pool_page_alloc_meta(struct pool *, int);
static void pool_page_free_meta(struct pool *, void *);
/* allocator for pool metadata */
static struct pool_allocator pool_allocator_meta = {
- pool_page_alloc_meta, pool_page_free_meta
+ pool_page_alloc_meta, pool_page_free_meta,
+ .pa_backingmapptr = &kmem_map,
};
/* # of seconds to retain page after last use */
@@ -184,8 +188,8 @@ static void pool_prime_page(struct pool
static void pool_update_curpage(struct pool *);
static int pool_grow(struct pool *, int);
-void *pool_allocator_alloc(struct pool *, int);
-void pool_allocator_free(struct pool *, void *);
+static void *pool_allocator_alloc(struct pool *, int);
+static void pool_allocator_free(struct pool *, void *);
static void pool_print_pagelist(struct pool *, struct pool_pagelist *,
void (*)(const char *, ...));
@@ -443,12 +447,106 @@ pr_rmpage(struct pool *pp, struct pool_i
pool_update_curpage(pp);
}
+static boolean_t
+pa_starved_p(struct pool_allocator *pa)
+{
+
+ if (pa->pa_backingmap != NULL) {
+ return vm_map_starved_p(pa->pa_backingmap);
+ }
+ return FALSE;
+}
+
+static int
+pool_reclaim_callback(struct callback_entry *ce, void *obj, void *arg)
+{
+ struct pool *pp = obj;
+ struct pool_allocator *pa = pp->pr_alloc;
+#if 1
+ unsigned long oidle = pp->pr_nidle;
+ unsigned long nidle;
+#endif
+
+ KASSERT(&pp->pr_reclaimerentry == ce);
+
+ pool_reclaim(pp);
+
+#if 1
+ nidle = pp->pr_nidle;
+ if (nidle != oidle) {
+ printf("%s: '%s' %lu -> %lu\n",
+ __func__, pp->pr_wchan, oidle, nidle);
+ }
+#endif
+
+ if (!pa_starved_p(pa)) {
+ return CALLBACK_CHAIN_ABORT;
+ }
+ return CALLBACK_CHAIN_CONTINUE;
+}
+
+static void
+pool_reclaim_register(struct pool *pp)
+{
+ struct vm_map *map = pp->pr_alloc->pa_backingmap;
+ int s;
+
+ if (map == NULL) {
+#if 1
+ if (pp->pr_alloc->pa_backingmapptr == NULL) {
+ printf("%s: pool %p '%s' doesn't have backing map\n",
+ __func__, pp, pp->pr_wchan);
+ }
+#endif
+ return;
+ }
+
+ s = splvm(); /* not necessary for INTRSAFE maps, but don't care. */
+ callback_register(&vm_map_to_kernel(map)->vmk_reclaim_callback,
+ &pp->pr_reclaimerentry, pp, pool_reclaim_callback);
+ splx(s);
+}
+
+static void
+pool_reclaim_unregister(struct pool *pp)
+{
+ struct vm_map *map = pp->pr_alloc->pa_backingmap;
+ int s;
+
+ if (map == NULL) {
+ return;
+ }
+
+ s = splvm(); /* not necessary for INTRSAFE maps, but don't care. */
+ callback_unregister(&vm_map_to_kernel(map)->vmk_reclaim_callback,
+ &pp->pr_reclaimerentry);
+ splx(s);
+}
+
+static void
+pa_reclaim_register(struct pool_allocator *pa)
+{
+ struct vm_map *map = *pa->pa_backingmapptr;
+ struct pool *pp;
+
+ KASSERT(pa->pa_backingmap == NULL);
+ if (map == NULL) {
+ SLIST_INSERT_HEAD(&pa_deferinitq, pa, pa_q);
+ return;
+ }
+ pa->pa_backingmap = map;
+ TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) {
+ pool_reclaim_register(pp);
+ }
+}
+
/*
* Initialize all the pools listed in the "pools" link set.
*/
void
-link_pool_init(void)
+link_pool_init(void) /* XXX rename */
{
+ struct pool_allocator *pa;
__link_set_decl(pools, struct link_pool_init);
struct link_pool_init * const *pi;
@@ -456,6 +554,14 @@ link_pool_init(void)
pool_init((*pi)->pp, (*pi)->size, (*pi)->align,
(*pi)->align_offset, (*pi)->flags, (*pi)->wchan,
(*pi)->palloc);
+
+ /* XXX XXX */
+ while ((pa = SLIST_FIRST(&pa_deferinitq)) != NULL) {
+ KASSERT(pa->pa_backingmapptr != NULL);
+ KASSERT(*pa->pa_backingmapptr != NULL);
+ SLIST_REMOVE_HEAD(&pa_deferinitq, pa_q);
+ pa_reclaim_register(pa);
+ }
}
/*
@@ -502,6 +608,10 @@ pool_init(struct pool *pp, size_t size,
simple_lock_init(&palloc->pa_slock);
palloc->pa_pagemask = ~(palloc->pa_pagesz - 1);
palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1;
+
+ if (palloc->pa_backingmapptr != NULL) {
+ pa_reclaim_register(palloc);
+ }
palloc->pa_flags |= PA_INITIALIZED;
}
@@ -683,6 +793,7 @@ pool_init(struct pool *pp, size_t size,
TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list);
simple_unlock(&palloc->pa_slock);
splx(s);
+ pool_reclaim_register(pp);
}
/*
@@ -703,6 +814,7 @@ pool_destroy(struct pool *pp)
simple_unlock(&pool_head_slock);
/* Remove this pool from its allocator's list of pools. */
+ pool_reclaim_unregister(pp); /* XXX can sleep */
s = splvm();
simple_lock(&pp->pr_alloc->pa_slock);
TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list);
@@ -897,24 +1009,10 @@ pool_get(struct pool *pp, int flags)
if (pp->pr_curpage != NULL)
goto startover;
- if ((flags & PR_WAITOK) == 0) {
- pp->pr_nfail++;
- pr_leave(pp);
- simple_unlock(&pp->pr_slock);
- return (NULL);
- }
-
- /*
- * Wait for items to be returned to this pool.
- *
- * wake up once a second and try again,
- * as the check in pool_cache_put_paddr() is racy.
- */
- pp->pr_flags |= PR_WANTED;
- /* PA_WANTED is already set on the allocator. */
+ pp->pr_nfail++;
pr_leave(pp);
- ltsleep(pp, PSWP, pp->pr_wchan, hz, &pp->pr_slock);
- pr_enter(pp, file, line);
+ simple_unlock(&pp->pr_slock);
+ return (NULL);
}
/* Start the allocation process over. */
@@ -1114,7 +1212,7 @@ pool_do_put(struct pool *pp, void *v, st
pp->pr_nidle++;
if (pp->pr_npages > pp->pr_minpages &&
(pp->pr_npages > pp->pr_maxpages ||
- (pp->pr_alloc->pa_flags & PA_WANT) != 0)) {
+ pa_starved_p(pp->pr_alloc))) {
pr_rmpage(pp, ph, pq);
} else {
LIST_REMOVE(ph, ph_pagelist);
@@ -1483,7 +1581,8 @@ pool_reclaim(struct pool *pp)
KASSERT(ph->ph_nmissing == 0);
timersub(&curtime, &ph->ph_time, &diff);
- if (diff.tv_sec < pool_inactive_time)
+ if (diff.tv_sec < pool_inactive_time
+ && !pa_starved_p(pp->pr_alloc))
continue;
/*
@@ -2166,10 +2265,12 @@ void pool_page_free(struct pool *, void
#ifdef POOL_SUBPAGE
struct pool_allocator pool_allocator_kmem_fullpage = {
pool_page_alloc, pool_page_free, 0,
+ .pa_backingmapptr = &kmem_map,
};
#else
struct pool_allocator pool_allocator_kmem = {
pool_page_alloc, pool_page_free, 0,
+ .pa_backingmapptr = &kmem_map,
};
#endif
@@ -2179,10 +2280,12 @@ void pool_page_free_nointr(struct pool *
#ifdef POOL_SUBPAGE
struct pool_allocator pool_allocator_nointr_fullpage = {
pool_page_alloc_nointr, pool_page_free_nointr, 0,
+ .pa_backingmapptr = &kernel_map,
};
#else
struct pool_allocator pool_allocator_nointr = {
pool_page_alloc_nointr, pool_page_free_nointr, 0,
+ .pa_backingmapptr = &kernel_map,
};
#endif
@@ -2192,6 +2295,7 @@ void pool_subpage_free(struct pool *, vo
struct pool_allocator pool_allocator_kmem = {
pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE,
+ .pa_backingmapptr = &kmem_map,
};
void *pool_subpage_alloc_nointr(struct pool *, int);
@@ -2199,125 +2303,41 @@ void pool_subpage_free_nointr(struct poo
struct pool_allocator pool_allocator_nointr = {
pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE,
+ .pa_backingmapptr = &kmem_map,
};
#endif /* POOL_SUBPAGE */
-/*
- * We have at least three different resources for the same allocation and
- * each resource can be depleted. First, we have the ready elements in the
- * pool. Then we have the resource (typically a vm_map) for this allocator.
- * Finally, we have physical memory. Waiting for any of these can be
- * unnecessary when any other is freed, but the kernel doesn't support
- * sleeping on multiple wait channels, so we have to employ another strategy.
- *
- * The caller sleeps on the pool (so that it can be awakened when an item
- * is returned to the pool), but we set PA_WANT on the allocator. When a
- * page is returned to the allocator and PA_WANT is set, pool_allocator_free
- * will wake up all sleeping pools belonging to this allocator.
- *
- * XXX Thundering herd.
- */
-void *
-pool_allocator_alloc(struct pool *org, int flags)
+static void *
+pool_allocator_alloc(struct pool *pp, int flags)
{
- struct pool_allocator *pa = org->pr_alloc;
- struct pool *pp, *start;
- int s, freed;
+ struct pool_allocator *pa = pp->pr_alloc;
void *res;
- LOCK_ASSERT(!simple_lock_held(&org->pr_slock));
+ LOCK_ASSERT(!simple_lock_held(&pp->pr_slock));
- do {
- if ((res = (*pa->pa_alloc)(org, flags)) != NULL)
- return (res);
- if ((flags & PR_WAITOK) == 0) {
- /*
- * We only run the drain hookhere if PR_NOWAIT.
- * In other cases, the hook will be run in
- * pool_reclaim().
- */
- if (org->pr_drain_hook != NULL) {
- (*org->pr_drain_hook)(org->pr_drain_hook_arg,
- flags);
- if ((res = (*pa->pa_alloc)(org, flags)) != NULL)
- return (res);
- }
- break;
- }
-
+ res = (*pa->pa_alloc)(pp, flags);
+ if (res == NULL && (flags & PR_WAITOK) == 0) {
/*
- * Drain all pools, that use this allocator.
- * We do this to reclaim VA space.
- * pa_alloc is responsible for waiting for
- * physical memory.
- *
- * XXX We risk looping forever if start if someone
- * calls pool_destroy on "start". But there is no
- * other way to have potentially sleeping pool_reclaim,
- * non-sleeping locks on pool_allocator, and some
- * stirring of drained pools in the allocator.
- *
- * XXX Maybe we should use pool_head_slock for locking
- * the allocators?
+ * We only run the drain hook here if PR_NOWAIT.
+ * In other cases, the hook will be run in
+ * pool_reclaim().
*/
- freed = 0;
-
- s = splvm();
- simple_lock(&pa->pa_slock);
- pp = start = TAILQ_FIRST(&pa->pa_list);
- do {
- TAILQ_REMOVE(&pa->pa_list, pp, pr_alloc_list);
- TAILQ_INSERT_TAIL(&pa->pa_list, pp, pr_alloc_list);
- simple_unlock(&pa->pa_slock);
- freed = pool_reclaim(pp);
- simple_lock(&pa->pa_slock);
- } while ((pp = TAILQ_FIRST(&pa->pa_list)) != start &&
- freed == 0);
-
- if (freed == 0) {
- /*
- * We set PA_WANT here, the caller will most likely
- * sleep waiting for pages (if not, this won't hurt
- * that much), and there is no way to set this in
- * the caller without violating locking order.
- */
- pa->pa_flags |= PA_WANT;
+ if (pp->pr_drain_hook != NULL) {
+ (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags);
+ res = (*pa->pa_alloc)(pp, flags);
}
- simple_unlock(&pa->pa_slock);
- splx(s);
- } while (freed);
- return (NULL);
+ }
+ return res;
}
-void
+static void
pool_allocator_free(struct pool *pp, void *v)
{
struct pool_allocator *pa = pp->pr_alloc;
- int s;
LOCK_ASSERT(!simple_lock_held(&pp->pr_slock));
(*pa->pa_free)(pp, v);
-
- s = splvm();
- simple_lock(&pa->pa_slock);
- if ((pa->pa_flags & PA_WANT) == 0) {
- simple_unlock(&pa->pa_slock);
- splx(s);
- return;
- }
-
- TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) {
- simple_lock(&pp->pr_slock);
- if ((pp->pr_flags & PR_WANTED) != 0) {
- pp->pr_flags &= ~PR_WANTED;
- wakeup(pp);
- }
- simple_unlock(&pp->pr_slock);
- }
- pa->pa_flags &= ~PA_WANT;
- simple_unlock(&pa->pa_slock);
- splx(s);
}
void *
Index: kern/vfs_bio.c
===================================================================
--- kern/vfs_bio.c (revision 1587)
+++ kern/vfs_bio.c (working copy)
@@ -177,9 +177,9 @@ struct simplelock bqueue_slock = SIMPLEL
/*
* Buffer pool for I/O buffers.
- * Access to this pool must be protected with splbio().
*/
-static POOL_INIT(bufpool, sizeof(struct buf), 0, 0, 0, "bufpl", NULL);
+static POOL_INIT(bufpool, sizeof(struct buf), 0, 0, 0, "bufpl",
+ &pool_allocator_nointr);
/* XXX - somewhat gross.. */
@@ -375,8 +375,7 @@ bufinit(void)
if (bufmem_valimit != 0) {
vaddr_t minaddr = 0, maxaddr;
buf_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
- bufmem_valimit, VM_MAP_PAGEABLE,
- FALSE, 0);
+ bufmem_valimit, 0, FALSE, 0);
if (buf_map == NULL)
panic("bufinit: cannot allocate submap");
} else
@@ -393,6 +392,7 @@ bufinit(void)
use_std = 1;
#endif
+ bufmempool_allocator.pa_backingmap = buf_map;
for (i = 0; i < NMEMPOOLS; i++) {
struct pool_allocator *pa;
struct pool *pp = &bmempools[i];
@@ -985,13 +985,13 @@ already_queued:
/* Allow disk interrupts. */
simple_unlock(&bp->b_interlock);
simple_unlock(&bqueue_slock);
+ splx(s);
if (bp->b_bufsize <= 0) {
#ifdef DEBUG
memset((char *)bp, 0, sizeof(*bp));
#endif
pool_put(&bufpool, bp);
}
- splx(s);
}
/*
Index: kern/uipc_socket.c
===================================================================
--- kern/uipc_socket.c (revision 1590)
+++ kern/uipc_socket.c (working copy)
@@ -126,16 +126,8 @@ EVCNT_ATTACH_STATIC(sosend_kvalimit);
#endif /* SOSEND_COUNTERS */
-void
-soinit(void)
-{
+static struct callback_entry sokva_reclaimerentry;
- /* Set the initial adjusted socket buffer size. */
- if (sb_max_set(sb_max))
- panic("bad initial sb_max value: %lu", sb_max);
-
-}
-
#ifdef SOSEND_NO_LOAN
int use_sosend_loan = 0;
#else
@@ -438,6 +430,32 @@ sosend_loan(struct socket *so, struct ui
}
return (space);
+}
+
+static int
+sokva_reclaim_callback(struct callback_entry *ce, void *obj, void *arg)
+{
+
+ KASSERT(ce == &sokva_reclaimerentry);
+ KASSERT(obj == NULL);
+
+ sodopendfree();
+ if (!vm_map_starved_p(kernel_map)) {
+ return CALLBACK_CHAIN_ABORT;
+ }
+ return CALLBACK_CHAIN_CONTINUE;
+}
+
+void
+soinit(void)
+{
+
+ /* Set the initial adjusted socket buffer size. */
+ if (sb_max_set(sb_max))
+ panic("bad initial sb_max value: %lu", sb_max);
+
+ callback_register(&vm_map_to_kernel(kernel_map)->vmk_reclaim_callback,
+ &sokva_reclaimerentry, NULL, sokva_reclaim_callback);
}
/*
Index: kern/uipc_mbuf.c
===================================================================
--- kern/uipc_mbuf.c (revision 1585)
+++ kern/uipc_mbuf.c (working copy)
@@ -154,6 +154,7 @@ mbinit(void)
KASSERT(sizeof(struct _m_ext) <= MHLEN);
KASSERT(sizeof(struct mbuf) == MSIZE);
+ mclpool_allocator.pa_backingmap = mb_map;
pool_init(&mbpool, msize, 0, 0, 0, "mbpl", NULL);
pool_init(&mclpool, mclbytes, 0, 0, 0, "mclpl", &mclpool_allocator);
Index: kern/subr_callback.c
===================================================================
--- kern/subr_callback.c (revision 0)
+++ kern/subr_callback.c (revision 0)
@@ -0,0 +1,139 @@
+/* $NetBSD$ */
+
+/*-
+ * Copyright (c)2006 YAMAMOTO Takashi,
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/callback.h>
+
+#define CH_WANT 1
+
+void
+callback_head_init(struct callback_head *ch)
+{
+
+ simple_lock_init(&ch->ch_lock);
+ TAILQ_INIT(&ch->ch_q);
+ ch->ch_next = NULL;
+ ch->ch_nentries = 0;
+}
+
+void
+callback_register(struct callback_head *ch, struct callback_entry *ce,
+ void *obj, int (*fn)(struct callback_entry *, void *, void *))
+{
+
+ ce->ce_func = fn;
+ ce->ce_obj = obj;
+ simple_lock(&ch->ch_lock);
+ TAILQ_INSERT_TAIL(&ch->ch_q, ce, ce_q);
+ ch->ch_nentries++;
+ simple_unlock(&ch->ch_lock);
+}
+
+void
+callback_unregister(struct callback_head *ch, struct callback_entry *ce)
+{
+
+ simple_lock(&ch->ch_lock);
+ while (ch->ch_running > 0) {
+ ch->ch_flags |= CH_WANT;
+ ltsleep(&ch->ch_running, PVM, "recunreg", 0, &ch->ch_lock);
+ }
+ if (__predict_false(ch->ch_next == ce)) {
+ ch->ch_next = TAILQ_NEXT(ce, ce_q);
+ }
+ TAILQ_REMOVE(&ch->ch_q, ce, ce_q);
+ ch->ch_nentries--;
+ simple_unlock(&ch->ch_lock);
+}
+
+static int
+callback_runone(struct callback_head *ch, void *arg)
+{
+ struct callback_entry *ce;
+ int result;
+
+ KASSERT(ch->ch_nentries > 0);
+ KASSERT(ch->ch_running > 0);
+
+ ce = ch->ch_next;
+ if (ce == NULL) {
+ ce = TAILQ_FIRST(&ch->ch_q);
+ }
+ KASSERT(ce != NULL);
+ result = (*ce->ce_func)(ce, ce->ce_obj, arg);
+ ch->ch_next = TAILQ_NEXT(ce, ce_q);
+ return result;
+}
+
+static void
+callback_run_enter(struct callback_head *ch)
+{
+
+ simple_lock(&ch->ch_lock);
+ ch->ch_running++;
+ simple_unlock(&ch->ch_lock);
+}
+
+static void
+callback_run_leave(struct callback_head *ch)
+{
+
+ simple_lock(&ch->ch_lock);
+ KASSERT(ch->ch_running > 0);
+ ch->ch_running--;
+ if (ch->ch_running == 0 && (ch->ch_flags & CH_WANT) != 0) {
+ ch->ch_flags &= ~CH_WANT;
+ wakeup(&ch->ch_running);
+ }
+ simple_unlock(&ch->ch_lock);
+}
+
+int
+callback_run_roundrobin(struct callback_head *ch, void *arg)
+{
+ int i;
+ int n;
+ int result = 0;
+
+ callback_run_enter(ch);
+ n = ch->ch_nentries;
+ for (i = 0; i < n; i++) {
+ result = callback_runone(ch, arg);
+ if (result != CALLBACK_CHAIN_CONTINUE) {
+ break;
+ }
+ }
+ callback_run_leave(ch);
+
+ return result;
+}
Index: uvm/uvm_km.c
===================================================================
--- uvm/uvm_km.c (revision 1591)
+++ uvm/uvm_km.c (working copy)
@@ -188,7 +188,8 @@ km_vacache_alloc(struct pool *pp, int fl
if (uvm_map(map, &va, size, NULL, UVM_UNKNOWN_OFFSET, size,
UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
UVM_ADV_RANDOM, UVM_FLAG_QUANTUM |
- ((flags & PR_WAITOK) ? 0 : UVM_FLAG_TRYLOCK | UVM_FLAG_NOWAIT))))
+ ((flags & PR_WAITOK) ? UVM_FLAG_WAITVA :
+ UVM_FLAG_TRYLOCK | UVM_FLAG_NOWAIT))))
return NULL;
return (void *)va;
@@ -226,10 +227,9 @@ km_vacache_init(struct vm_map *map, cons
pa->pa_alloc = km_vacache_alloc;
pa->pa_free = km_vacache_free;
pa->pa_pagesz = (unsigned int)size;
+ pa->pa_backingmap = map;
+ pa->pa_backingmapptr = NULL;
pool_init(pp, PAGE_SIZE, 0, 0, PR_NOTOUCH | PR_RECURSIVE, name, pa);
-
- /* XXX for now.. */
- pool_sethiwat(pp, 0);
}
void
@@ -252,6 +252,30 @@ uvm_km_vacache_init(struct vm_map *map,
}
#endif /* !defined(PMAP_MAP_POOLPAGE) */
+
+void
+uvm_km_va_drain(struct vm_map *map, uvm_flag_t flags)
+{
+ struct vm_map_kernel *vmk = vm_map_to_kernel(map);
+ const boolean_t intrsafe = (map->flags & VM_MAP_INTRSAFE) != 0;
+#if 0
+ const int rflags =
+ (flags & (UVM_FLAG_NOWAIT|UVM_FLAG_WAITVA)) == UVM_FLAG_WAITVA ?
+ 0 : RECLAIM_FLAG_NOWAIT;
+ struct reclaim_args args = {
+ .ra_flags = rflags,
+ };
+#endif
+ int s = 0xdeadbeaf; /* XXX: gcc */
+
+ if (intrsafe) {
+ s = splvm();
+ }
+ callback_run_roundrobin(&vmk->vmk_reclaim_callback, NULL);
+ if (intrsafe) {
+ splx(s);
+ }
+}
/*
* uvm_km_init: init kernel maps and objects to reflect reality (i.e.
Index: uvm/uvm_km.h
===================================================================
--- uvm/uvm_km.h (revision 1464)
+++ uvm/uvm_km.h (working copy)
@@ -55,6 +55,7 @@ void uvm_km_check_empty(vaddr_t, vaddr_t
#else
#define uvm_km_check_empty(a, b, c) /* nothing */
#endif /* defined(DEBUG) */
+void uvm_km_va_drain(struct vm_map *, uvm_flag_t);
#endif /* _KERNEL */
Index: uvm/uvm_map.c
===================================================================
--- uvm/uvm_map.c (revision 1587)
+++ uvm/uvm_map.c (working copy)
@@ -742,7 +742,17 @@ uvm_map_clip_end(struct vm_map *map, str
uvm_tree_sanity(map, "clip_end leave");
}
+static void
+vm_map_drain(struct vm_map *map, uvm_flag_t flags)
+{
+ if (!VM_MAP_IS_KERNEL(map)) {
+ return;
+ }
+
+ uvm_km_va_drain(map, flags);
+}
+
/*
* M A P - m a i n e n t r y p o i n t
*/
@@ -875,16 +885,11 @@ retry:
}
vm_map_lock(map); /* could sleep here */
}
- if ((prev_entry = uvm_map_findspace(map, start, size, &start,
- uobj, uoffset, align, flags)) == NULL) {
+ prev_entry = uvm_map_findspace(map, start, size, &start,
+ uobj, uoffset, align, flags);
+ if (prev_entry == NULL) {
unsigned int timestamp;
- if ((flags & UVM_FLAG_WAITVA) == 0) {
- UVMHIST_LOG(maphist,"<- uvm_map_findspace failed!",
- 0,0,0,0);
- vm_map_unlock(map);
- return ENOMEM;
- }
timestamp = map->timestamp;
UVMHIST_LOG(maphist,"waiting va timestamp=0x%x",
timestamp,0,0,0);
@@ -894,15 +899,24 @@ retry:
vm_map_unlock(map);
/*
- * wait until someone does unmap.
+ * try to reclaim kva and wait until someone does unmap.
* XXX fragile locking
*/
+ vm_map_drain(map, flags);
+
simple_lock(&map->flags_lock);
while ((map->flags & VM_MAP_WANTVA) != 0 &&
map->timestamp == timestamp) {
- ltsleep(&map->header, PVM, "vmmapva", 0,
- &map->flags_lock);
+ if ((flags & UVM_FLAG_WAITVA) == 0) {
+ simple_unlock(&map->flags_lock);
+ UVMHIST_LOG(maphist,
+ "<- uvm_map_findspace failed!", 0,0,0,0);
+ return ENOMEM;
+ } else {
+ ltsleep(&map->header, PVM, "vmmapva", 0,
+ &map->flags_lock);
+ }
}
simple_unlock(&map->flags_lock);
goto retry;
@@ -2655,6 +2669,7 @@ uvm_map_setup_kernel(struct vm_map_kerne
uvm_map_setup(&map->vmk_map, vmin, vmax, flags);
+ callback_head_init(&map->vmk_reclaim_callback);
LIST_INIT(&map->vmk_kentry_free);
map->vmk_merged_entries = NULL;
}
@@ -4789,4 +4804,18 @@ vm_map_to_kernel(struct vm_map *map)
KASSERT(VM_MAP_IS_KERNEL(map));
return (struct vm_map_kernel *)map;
+}
+
+boolean_t
+vm_map_starved_p(struct vm_map *map)
+{
+
+ if ((map->flags & VM_MAP_WANTVA) != 0) {
+ return TRUE;
+ }
+ /* XXX */
+ if ((vm_map_max(map) - vm_map_min(map)) / 16 * 15 < map->size) {
+ return TRUE;
+ }
+ return FALSE;
}
Index: uvm/uvm_map.h
===================================================================
--- uvm/uvm_map.h (revision 1571)
+++ uvm/uvm_map.h (working copy)
@@ -234,6 +234,9 @@ struct vm_map {
};
#if defined(_KERNEL)
+
+#include <sys/callback.h>
+
struct vm_map_kernel {
struct vm_map vmk_map;
LIST_HEAD(, uvm_kmapent_hdr) vmk_kentry_free;
@@ -241,6 +244,7 @@ struct vm_map_kernel {
struct vm_map_entry *vmk_merged_entries;
/* Merged entries, kept for later splitting */
+ struct callback_head vmk_reclaim_callback;
#if !defined(PMAP_MAP_POOLPAGE)
struct pool vmk_vacache; /* kva cache */
struct pool_allocator vmk_vacache_allocator; /* ... and its allocator */
@@ -506,6 +510,8 @@ do { \
if (oflags & VM_MAP_WANTLOCK) \
wakeup(&(map)->flags); \
} while (/*CONSTCOND*/ 0)
+
+boolean_t vm_map_starved_p(struct vm_map *);
#endif /* _KERNEL */
--NextPart-20060406202103-1948600--