Source-Changes-D archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: CVS commit: src/sys/kern
On Oct,Sunday 2 2011, at 3:00 PM, Juergen Hannken-Illjes wrote:
> Module Name: src
> Committed By: hannken
> Date: Sun Oct 2 13:00:07 UTC 2011
>
> Modified Files:
> src/sys/kern: vfs_vnode.c
>
> Log Message:
> The path getnewvnode()->getcleanvnode()->vclean()->VOP_LOCK() will panic
> if the vnode we want to clean is a layered vnode and the caller already
> locked its lower vnode.
>
> Change getnewvnode() to always allocate a fresh vnode and add a helper
> thread (vdrain) to keep the number of allocated vnodes within desiredvnodes.
>
> Rename getcleanvnode() to cleanvnode() and let it take a vnode from the
> lists, clean and free it.
Thanks for doing this. This should help zfs too I saw couple of
panics with zfs which were caused by this.
Have you done any benchmarks ? when I proposed solution like this
main objection was then before vnodes were not allocated from storage
pool every time(sometime we reused already allocated one). And this may
affect performance.
>
> Reviewed by: David Holland <dholland%netbsd.org@localhost>
>
> Should fix:
> PR #19110 (nullfs mounts over NFS cause lock manager problems)
> PR #34102 (ffs panic in NetBSD 3.0_STABLE)
> PR #45115 (lock error panic when build.sh*3 and daily script is running)
> PR #45355 (Reader/writer lock error: rw_vector_enter: locking against myself)
>
>
> To generate a diff of this commit:
> cvs rdiff -u -r1.11 -r1.12 src/sys/kern/vfs_vnode.c
>
> Please note that diffs are not public domain; they are subject to the
> copyright notices on the relevant files.
>
> Modified files:
>
> Index: src/sys/kern/vfs_vnode.c
> diff -u src/sys/kern/vfs_vnode.c:1.11 src/sys/kern/vfs_vnode.c:1.12
> --- src/sys/kern/vfs_vnode.c:1.11 Thu Sep 29 20:51:38 2011
> +++ src/sys/kern/vfs_vnode.c Sun Oct 2 13:00:06 2011
> @@ -1,4 +1,4 @@
> -/* $NetBSD: vfs_vnode.c,v 1.11 2011/09/29 20:51:38 christos Exp $ */
> +/* $NetBSD: vfs_vnode.c,v 1.12 2011/10/02 13:00:06 hannken Exp $ */
>
> /*-
> * Copyright (c) 1997-2011 The NetBSD Foundation, Inc.
> @@ -76,7 +76,6 @@
> * starts in one of the following ways:
> *
> * - Allocation, via getnewvnode(9) and/or vnalloc(9).
> - * - Recycle from a free list, via getnewvnode(9) -> getcleanvnode(9).
> * - Reclamation of inactive vnode, via vget(9).
> *
> * The life-cycle ends when the last reference is dropped, usually
> @@ -121,7 +120,7 @@
> */
>
> #include <sys/cdefs.h>
> -__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.11 2011/09/29 20:51:38 christos
> Exp $");
> +__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.12 2011/10/02 13:00:06 hannken
> Exp $");
>
> #include <sys/param.h>
> #include <sys/kernel.h>
> @@ -159,8 +158,10 @@ static kcondvar_t vrele_cv
> __cacheline_
> static lwp_t * vrele_lwp __cacheline_aligned;
> static int vrele_pending __cacheline_aligned;
> static int vrele_gen __cacheline_aligned;
> +static kcondvar_t vdrain_cv __cacheline_aligned;
>
> -static vnode_t * getcleanvnode(void);
> +static int cleanvnode(void);
> +static void vdrain_thread(void *);
> static void vrele_thread(void *);
> static void vnpanic(vnode_t *, const char *, ...)
> __attribute__((__format__(__printf__, 2, 3)));
> @@ -183,7 +184,11 @@ vfs_vnode_sysinit(void)
> TAILQ_INIT(&vrele_list);
>
> mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE);
> + cv_init(&vdrain_cv, "vdrain");
> cv_init(&vrele_cv, "vrele");
> + error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vdrain_thread,
> + NULL, NULL, "vdrain");
> + KASSERT(error == 0);
> error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread,
> NULL, &vrele_lwp, "vrele");
> KASSERT(error == 0);
> @@ -249,13 +254,12 @@ vnfree(vnode_t *vp)
> }
>
> /*
> - * getcleanvnode: grab a vnode from freelist and clean it.
> + * cleanvnode: grab a vnode from freelist, clean and free it.
> *
> * => Releases vnode_free_list_lock.
> - * => Returns referenced vnode on success.
> */
> -static vnode_t *
> -getcleanvnode(void)
> +static int
> +cleanvnode(void)
> {
> vnode_t *vp;
> vnodelst_t *listhd;
> @@ -288,7 +292,7 @@ try_nextlist:
> goto try_nextlist;
> }
> mutex_exit(&vnode_free_list_lock);
> - return NULL;
> + return EBUSY;
> }
>
> /* Remove it from the freelist. */
> @@ -300,7 +304,7 @@ try_nextlist:
>
> /*
> * The vnode is still associated with a file system, so we must
> - * clean it out before reusing it. We need to add a reference
> + * clean it out before freeing it. We need to add a reference
> * before doing this. If the vnode gains another reference while
> * being cleaned out then we lose - retry.
> */
> @@ -308,15 +312,7 @@ try_nextlist:
> vclean(vp, DOCLOSE);
> KASSERT(vp->v_usecount >= 1 + VC_XLOCK);
> atomic_add_int(&vp->v_usecount, -VC_XLOCK);
> - if (vp->v_usecount == 1) {
> - /* We're about to dirty it. */
> - vp->v_iflag &= ~VI_CLEAN;
> - mutex_exit(vp->v_interlock);
> - if (vp->v_type == VBLK || vp->v_type == VCHR) {
> - spec_node_destroy(vp);
> - }
> - vp->v_type = VNON;
> - } else {
> + if (vp->v_usecount > 1) {
> /*
> * Don't return to freelist - the holder of the last
> * reference will destroy it.
> @@ -326,17 +322,26 @@ try_nextlist:
> goto retry;
> }
>
> + KASSERT((vp->v_iflag & VI_CLEAN) == VI_CLEAN);
> + mutex_exit(vp->v_interlock);
> + if (vp->v_type == VBLK || vp->v_type == VCHR) {
> + spec_node_destroy(vp);
> + }
> + vp->v_type = VNON;
> +
> KASSERT(vp->v_data == NULL);
> KASSERT(vp->v_uobj.uo_npages == 0);
> KASSERT(TAILQ_EMPTY(&vp->v_uobj.memq));
> KASSERT(vp->v_numoutput == 0);
> KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
>
> - return vp;
> + vrele(vp);
> +
> + return 0;
> }
>
> /*
> - * getnewvnode: return the next vnode from the free list.
> + * getnewvnode: return a fresh vnode.
> *
> * => Returns referenced vnode, moved into the mount queue.
> * => Shares the interlock specified by 'slock', if it is not NULL.
> @@ -346,9 +351,8 @@ getnewvnode(enum vtagtype tag, struct mo
> kmutex_t *slock, vnode_t **vpp)
> {
> struct uvm_object *uobj;
> - static int toggle;
> vnode_t *vp;
> - int error = 0, tryalloc;
> + int error = 0;
>
> try_again:
> if (mp != NULL) {
> @@ -361,80 +365,28 @@ try_again:
> return error;
> }
>
> - /*
> - * We must choose whether to allocate a new vnode or recycle an
> - * existing one. The criterion for allocating a new one is that
> - * the total number of vnodes is less than the number desired or
> - * there are no vnodes on either free list. Generally we only
> - * want to recycle vnodes that have no buffers associated with
> - * them, so we look first on the vnode_free_list. If it is empty,
> - * we next consider vnodes with referencing buffers on the
> - * vnode_hold_list. The toggle ensures that half the time we
> - * will use a buffer from the vnode_hold_list, and half the time
> - * we will allocate a new one unless the list has grown to twice
> - * the desired size. We are reticent to recycle vnodes from the
> - * vnode_hold_list because we will lose the identity of all its
> - * referencing buffers.
> - */
> -
> vp = NULL;
>
> + /* Allocate a new vnode. */
> mutex_enter(&vnode_free_list_lock);
> -
> - toggle ^= 1;
> - if (numvnodes > 2 * desiredvnodes)
> - toggle = 0;
> -
> - tryalloc = numvnodes < desiredvnodes ||
> - (TAILQ_FIRST(&vnode_free_list) == NULL &&
> - (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
> -
> - if (tryalloc) {
> - /* Allocate a new vnode. */
> - numvnodes++;
> + numvnodes++;
> + if (numvnodes > desiredvnodes + desiredvnodes / 10)
> + cv_signal(&vdrain_cv);
> + mutex_exit(&vnode_free_list_lock);
> + if ((vp = vnalloc(NULL)) == NULL) {
> + mutex_enter(&vnode_free_list_lock);
> + numvnodes--;
> mutex_exit(&vnode_free_list_lock);
> - if ((vp = vnalloc(NULL)) == NULL) {
> - mutex_enter(&vnode_free_list_lock);
> - numvnodes--;
> - } else
> - vp->v_usecount = 1;
> - }
> -
> - if (vp == NULL) {
> - /* Recycle and get vnode clean. */
> - vp = getcleanvnode();
> - if (vp == NULL) {
> - if (mp != NULL) {
> - vfs_unbusy(mp, false, NULL);
> - }
> - if (tryalloc) {
> - printf("WARNING: unable to allocate new "
> - "vnode, retrying...\n");
> - kpause("newvn", false, hz, NULL);
> - goto try_again;
> - }
> - tablefull("vnode", "increase kern.maxvnodes or NVNODE");
> - *vpp = 0;
> - return ENFILE;
> - }
> - if ((vp->v_iflag & VI_LOCKSHARE) != 0 || slock) {
> - /* We must remove vnode from the old mount point. */
> - if (vp->v_mount) {
> - vfs_insmntque(vp, NULL);
> - }
> - /* Allocate a new interlock, if it was shared. */
> - if (vp->v_iflag & VI_LOCKSHARE) {
> - uvm_obj_setlock(&vp->v_uobj, NULL);
> - vp->v_iflag &= ~VI_LOCKSHARE;
> - }
> + if (mp != NULL) {
> + vfs_unbusy(mp, false, NULL);
> }
> - vp->v_iflag = 0;
> - vp->v_vflag = 0;
> - vp->v_uflag = 0;
> - vp->v_socket = NULL;
> + printf("WARNING: unable to allocate new vnode, retrying...\n");
> + kpause("newvn", false, hz, NULL);
> + goto try_again;
> }
>
> - KASSERT(vp->v_usecount == 1);
> + vp->v_usecount = 1;
> +
> KASSERT(vp->v_freelisthd == NULL);
> KASSERT(LIST_EMPTY(&vp->v_nclist));
> KASSERT(LIST_EMPTY(&vp->v_dnclist));
> @@ -493,6 +445,29 @@ ungetnewvnode(vnode_t *vp)
> }
>
> /*
> + * Helper thread to keep the number of vnodes below desiredvnodes.
> + */
> +static void
> +vdrain_thread(void *cookie)
> +{
> + int error;
> +
> + mutex_enter(&vnode_free_list_lock);
> +
> + for (;;) {
> + cv_timedwait(&vdrain_cv, &vnode_free_list_lock, hz);
> + while (numvnodes > desiredvnodes) {
> + error = cleanvnode();
> + if (error)
> + kpause("vndsbusy", false, hz, NULL);
> + mutex_enter(&vnode_free_list_lock);
> + if (error)
> + break;
> + }
> + }
> +}
> +
> +/*
> * Remove a vnode from its freelist.
> */
> void
> @@ -1204,17 +1179,19 @@ vwait(vnode_t *vp, int flags)
> int
> vfs_drainvnodes(long target)
> {
> + int error;
>
> - while (numvnodes > target) {
> - vnode_t *vp;
> + mutex_enter(&vnode_free_list_lock);
>
> + while (numvnodes > target) {
> + error = cleanvnode();
> + if (error != 0)
> + return error;
> mutex_enter(&vnode_free_list_lock);
> - vp = getcleanvnode();
> - if (vp == NULL) {
> - return EBUSY;
> - }
> - ungetnewvnode(vp);
> }
> +
> + mutex_exit(&vnode_free_list_lock);
> +
> return 0;
> }
>
>
Regards
Adam.
Home |
Main Index |
Thread Index |
Old Index