Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/kern Make vcache_tryvget() lockless. Reviewed by hannken@.
details: https://anonhg.NetBSD.org/src/rev/07771116fafa
branches: trunk
changeset: 1010481:07771116fafa
user: ad <ad%NetBSD.org@localhost>
date: Tue May 26 18:38:37 2020 +0000
description:
Make vcache_tryvget() lockless. Reviewed by hannken@.
diffstat:
sys/kern/vfs_cache.c | 14 +---
sys/kern/vfs_lookup.c | 8 +-
sys/kern/vfs_subr.c | 23 +++----
sys/kern/vfs_vnode.c | 154 ++++++++++++++++++++++++++++---------------------
4 files changed, 105 insertions(+), 94 deletions(-)
diffs (truncated from 441 to 300 lines):
diff -r cb20ae0b0d38 -r 07771116fafa sys/kern/vfs_cache.c
--- a/sys/kern/vfs_cache.c Tue May 26 16:08:55 2020 +0000
+++ b/sys/kern/vfs_cache.c Tue May 26 18:38:37 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: vfs_cache.c,v 1.143 2020/05/16 18:31:50 christos Exp $ */
+/* $NetBSD: vfs_cache.c,v 1.144 2020/05/26 18:38:37 ad Exp $ */
/*-
* Copyright (c) 2008, 2019, 2020 The NetBSD Foundation, Inc.
@@ -172,7 +172,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.143 2020/05/16 18:31:50 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.144 2020/05/26 18:38:37 ad Exp $");
#define __NAMECACHE_PRIVATE
#ifdef _KERNEL_OPT
@@ -582,13 +582,8 @@
return hit;
}
vp = ncp->nc_vp;
- mutex_enter(vp->v_interlock);
+ error = vcache_tryvget(vp);
rw_exit(&dvi->vi_nc_lock);
-
- /*
- * Unlocked except for the vnode interlock. Call vcache_tryvget().
- */
- error = vcache_tryvget(vp);
if (error) {
KASSERT(error == EBUSY);
/*
@@ -821,9 +816,8 @@
}
dvp = ncp->nc_dvp;
- mutex_enter(dvp->v_interlock);
+ error = vcache_tryvget(dvp);
rw_exit(&vi->vi_nc_listlock);
- error = vcache_tryvget(dvp);
if (error) {
KASSERT(error == EBUSY);
if (bufp)
diff -r cb20ae0b0d38 -r 07771116fafa sys/kern/vfs_lookup.c
--- a/sys/kern/vfs_lookup.c Tue May 26 16:08:55 2020 +0000
+++ b/sys/kern/vfs_lookup.c Tue May 26 18:38:37 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: vfs_lookup.c,v 1.219 2020/04/22 21:35:52 ad Exp $ */
+/* $NetBSD: vfs_lookup.c,v 1.220 2020/05/26 18:38:37 ad Exp $ */
/*
* Copyright (c) 1982, 1986, 1989, 1993
@@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.219 2020/04/22 21:35:52 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.220 2020/05/26 18:38:37 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_magiclinks.h"
@@ -1354,9 +1354,7 @@
foundobj->v_type != VDIR ||
(foundobj->v_type == VDIR &&
foundobj->v_mountedhere != NULL)) {
- mutex_enter(foundobj->v_interlock);
error = vcache_tryvget(foundobj);
- /* v_interlock now unheld */
if (error != 0) {
foundobj = NULL;
error = EOPNOTSUPP;
@@ -1381,9 +1379,7 @@
* let lookup_once() take care of it.
*/
if (searchdir != *searchdir_ret) {
- mutex_enter(searchdir->v_interlock);
error2 = vcache_tryvget(searchdir);
- /* v_interlock now unheld */
KASSERT(plock != NULL);
rw_exit(plock);
if (__predict_true(error2 == 0)) {
diff -r cb20ae0b0d38 -r 07771116fafa sys/kern/vfs_subr.c
--- a/sys/kern/vfs_subr.c Tue May 26 16:08:55 2020 +0000
+++ b/sys/kern/vfs_subr.c Tue May 26 18:38:37 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: vfs_subr.c,v 1.487 2020/05/16 18:31:50 christos Exp $ */
+/* $NetBSD: vfs_subr.c,v 1.488 2020/05/26 18:38:37 ad Exp $ */
/*-
* Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008, 2019, 2020
@@ -69,7 +69,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.487 2020/05/16 18:31:50 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.488 2020/05/26 18:38:37 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_ddb.h"
@@ -730,18 +730,15 @@
KASSERT(mutex_owned(&syncer_data_lock));
synced = false;
- /* We are locking in the wrong direction. */
- if (mutex_tryenter(vp->v_interlock)) {
+ if (vcache_tryvget(vp) == 0) {
mutex_exit(&syncer_data_lock);
- if (vcache_tryvget(vp) == 0) {
- if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
- synced = true;
- (void) VOP_FSYNC(vp, curlwp->l_cred,
- FSYNC_LAZY, 0, 0);
- vput(vp);
- } else
- vrele(vp);
- }
+ if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
+ synced = true;
+ (void) VOP_FSYNC(vp, curlwp->l_cred,
+ FSYNC_LAZY, 0, 0);
+ vput(vp);
+ } else
+ vrele(vp);
mutex_enter(&syncer_data_lock);
}
return synced;
diff -r cb20ae0b0d38 -r 07771116fafa sys/kern/vfs_vnode.c
--- a/sys/kern/vfs_vnode.c Tue May 26 16:08:55 2020 +0000
+++ b/sys/kern/vfs_vnode.c Tue May 26 18:38:37 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: vfs_vnode.c,v 1.122 2020/05/18 08:27:54 hannken Exp $ */
+/* $NetBSD: vfs_vnode.c,v 1.123 2020/05/26 18:38:37 ad Exp $ */
/*-
* Copyright (c) 1997-2011, 2019, 2020 The NetBSD Foundation, Inc.
@@ -112,7 +112,7 @@
* LOADING -> LOADED
* Vnode has been initialised in vcache_get() or
* vcache_new() and is ready to use.
- * LOADED -> RECLAIMING
+ * BLOCKED -> RECLAIMING
* Vnode starts disassociation from underlying file
* system in vcache_reclaim().
* RECLAIMING -> RECLAIMED
@@ -143,19 +143,12 @@
* as vput(9), routines. Common points holding references are e.g.
* file openings, current working directory, mount points, etc.
*
- * Note on v_usecount and its locking
- *
- * At nearly all points it is known that v_usecount could be zero,
- * the vnode_t::v_interlock will be held. To change the count away
- * from zero, the interlock must be held. To change from a non-zero
- * value to zero, again the interlock must be held.
- *
- * Changing the usecount from a non-zero value to a non-zero value can
- * safely be done using atomic operations, without the interlock held.
+ * v_usecount is adjusted with atomic operations, however to change
+ * from a non-zero value to zero the interlock must also be held.
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.122 2020/05/18 08:27:54 hannken Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.123 2020/05/26 18:38:37 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_pax.h"
@@ -237,13 +230,20 @@
extern struct vfsops dead_vfsops;
/*
+ * The high bit of v_usecount is a gate for vcache_tryvget(). It's set
+ * only when the vnode state is LOADED.
+ */
+#define VUSECOUNT_MASK 0x7fffffff
+#define VUSECOUNT_GATE 0x80000000
+
+/*
* Return the current usecount of a vnode.
*/
inline int
vrefcnt(struct vnode *vp)
{
- return atomic_load_relaxed(&vp->v_usecount);
+ return atomic_load_relaxed(&vp->v_usecount) & VUSECOUNT_MASK;
}
/* Vnode state operations and diagnostics. */
@@ -329,8 +329,8 @@
vstate_assert_change(vnode_t *vp, enum vnode_state from, enum vnode_state to,
const char *func, int line)
{
+ bool gated = (atomic_load_relaxed(&vp->v_usecount) & VUSECOUNT_GATE);
vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
- int refcnt = vrefcnt(vp);
KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line);
if (from == VS_LOADING)
@@ -345,10 +345,15 @@
if (vip->vi_state != from)
vnpanic(vp, "from is %s, expected %s at %s:%d\n",
vstate_name(vip->vi_state), vstate_name(from), func, line);
- if ((from == VS_BLOCKED || to == VS_BLOCKED) && refcnt != 1)
- vnpanic(vp, "%s to %s with usecount %d at %s:%d",
- vstate_name(from), vstate_name(to), refcnt,
- func, line);
+ if ((from == VS_LOADED) != gated)
+ vnpanic(vp, "state is %s, gate %d does not match at %s:%d\n",
+ vstate_name(vip->vi_state), gated, func, line);
+
+ /* Open/close the gate for vcache_tryvget(). */
+ if (to == VS_LOADED)
+ atomic_or_uint(&vp->v_usecount, VUSECOUNT_GATE);
+ else
+ atomic_and_uint(&vp->v_usecount, ~VUSECOUNT_GATE);
vip->vi_state = to;
if (from == VS_LOADING)
@@ -386,6 +391,12 @@
{
vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
+ /* Open/close the gate for vcache_tryvget(). */
+ if (to == VS_LOADED)
+ atomic_or_uint(&vp->v_usecount, VUSECOUNT_GATE);
+ else
+ atomic_and_uint(&vp->v_usecount, ~VUSECOUNT_GATE);
+
vip->vi_state = to;
if (from == VS_LOADING)
cv_broadcast(&vcache_cv);
@@ -712,10 +723,10 @@
u_int use, next;
for (use = atomic_load_relaxed(&vp->v_usecount);; use = next) {
- if (__predict_false(use == 1)) {
+ if (__predict_false((use & VUSECOUNT_MASK) == 1)) {
return false;
}
- KASSERT(use > 1);
+ KASSERT((use & VUSECOUNT_MASK) > 1);
next = atomic_cas_uint(&vp->v_usecount, use, use - 1);
if (__predict_true(next == use)) {
return true;
@@ -846,10 +857,8 @@
VOP_UNLOCK(vp);
} else {
/*
- * The vnode must not gain another reference while being
- * deactivated. If VOP_INACTIVE() indicates that
- * the described file has been deleted, then recycle
- * the vnode.
+ * If VOP_INACTIVE() indicates that the file has been
+ * deleted, then recycle the vnode.
*
* Note that VOP_INACTIVE() will not drop the vnode lock.
*/
@@ -858,12 +867,34 @@
VOP_INACTIVE(vp, &recycle);
rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
mutex_enter(vp->v_interlock);
- if (vtryrele(vp)) {
- VOP_UNLOCK(vp);
- mutex_exit(vp->v_interlock);
- rw_exit(vp->v_uobj.vmobjlock);
- return;
- }
+
+ for (;;) {
+ /*
+ * If no longer the last reference, try to shed it.
+ * On success, drop the interlock last thereby
+ * preventing the vnode being freed behind us.
+ */
+ if (vtryrele(vp)) {
+ VOP_UNLOCK(vp);
+ rw_exit(vp->v_uobj.vmobjlock);
+ mutex_exit(vp->v_interlock);
+ return;
+ }
+ /*
+ * Block new references then check again to see if a
+ * new reference was acquired in the meantime. If
+ * it was, restore the vnode state and try again.
+ */
+ if (recycle) {
+ VSTATE_CHANGE(vp, VS_LOADED, VS_BLOCKED);
+ if (vrefcnt(vp) != 1) {
+ VSTATE_CHANGE(vp, VS_BLOCKED,
+ VS_LOADED);
+ continue;
+ }
+ }
+ break;
+ }
/* Take care of space accounting. */
if ((vp->v_iflag & VI_EXECMAP) != 0 &&
Home |
Main Index |
Thread Index |
Old Index