Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/ad-namecache]: src/sys vfs_lookup:
details: https://anonhg.NetBSD.org/src/rev/5639653ebf86
branches: ad-namecache
changeset: 982988:5639653ebf86
user: ad <ad%NetBSD.org@localhost>
date: Fri Jan 17 22:26:25 2020 +0000
description:
vfs_lookup:
- Do the easy component name lookups directly in the namecache without
taking vnode locks nor vnode references (between the start and the leaf /
parent), which seems to largely solve the lock contention problem with
namei(). It needs support from the file system, which has to tell the
name cache about directory permissions (only ffs and tmpfs tried so far),
and I'm not sure how or if it can work with layered file systems yet.
Work in progress.
vfs_cache:
- Make the rbtree operations more efficient: inline the lookup, and key on a
64-bit hash value (32 bits plus 16 bits length) rather than names.
- Take namecache stuff out of vnode_impl, and take the rwlocks, and put them
all together an an nchnode struct which is mapped 1:1: with vnodes. Saves
memory and nicer cache profile.
- Add a routine to help vfs_lookup do its easy component name lookups.
- Report some more stats.
- Tidy up the file a bit.
diffstat:
sys/fs/tmpfs/tmpfs_subr.c | 8 +-
sys/kern/vfs_cache.c | 1196 +++++++++++++++++++++++++-------------------
sys/kern/vfs_lookup.c | 374 ++++++++++----
sys/sys/namei.src | 29 +-
sys/sys/vnode_impl.h | 12 +-
sys/ufs/ffs/ffs_vfsops.c | 6 +-
sys/ufs/ufs/ufs_vnops.c | 7 +-
7 files changed, 989 insertions(+), 643 deletions(-)
diffs (truncated from 2344 to 300 lines):
diff -r 9551feeaa639 -r 5639653ebf86 sys/fs/tmpfs/tmpfs_subr.c
--- a/sys/fs/tmpfs/tmpfs_subr.c Fri Jan 17 21:55:13 2020 +0000
+++ b/sys/fs/tmpfs/tmpfs_subr.c Fri Jan 17 22:26:25 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: tmpfs_subr.c,v 1.105 2019/09/18 17:59:15 christos Exp $ */
+/* $NetBSD: tmpfs_subr.c,v 1.105.2.1 2020/01/17 22:26:25 ad Exp $ */
/*
* Copyright (c) 2005-2013 The NetBSD Foundation, Inc.
@@ -73,7 +73,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.105 2019/09/18 17:59:15 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.105.2.1 2020/01/17 22:26:25 ad Exp $");
#include <sys/param.h>
#include <sys/cprng.h>
@@ -147,6 +147,8 @@
vp->v_data = node;
node->tn_vnode = vp;
uvm_vnp_setsize(vp, node->tn_size);
+ KASSERT(node->tn_mode != VNOVAL);
+ cache_set_id(vp, node->tn_mode, node->tn_uid, node->tn_gid);
}
/*
@@ -1037,6 +1039,7 @@
node->tn_mode = (mode & ALLPERMS);
tmpfs_update(vp, TMPFS_UPDATE_CTIME);
VN_KNOTE(vp, NOTE_ATTRIB);
+ cache_update_id(vp, node->tn_mode, node->tn_uid, node->tn_gid);
return 0;
}
@@ -1081,6 +1084,7 @@
node->tn_gid = gid;
tmpfs_update(vp, TMPFS_UPDATE_CTIME);
VN_KNOTE(vp, NOTE_ATTRIB);
+ cache_update_id(vp, node->tn_mode, node->tn_uid, node->tn_gid);
return 0;
}
diff -r 9551feeaa639 -r 5639653ebf86 sys/kern/vfs_cache.c
--- a/sys/kern/vfs_cache.c Fri Jan 17 21:55:13 2020 +0000
+++ b/sys/kern/vfs_cache.c Fri Jan 17 22:26:25 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: vfs_cache.c,v 1.126.2.5 2020/01/14 11:07:40 ad Exp $ */
+/* $NetBSD: vfs_cache.c,v 1.126.2.6 2020/01/17 22:26:25 ad Exp $ */
/*-
* Copyright (c) 2008, 2019, 2020 The NetBSD Foundation, Inc.
@@ -61,7 +61,7 @@
*/
/*
- * Name caching works as follows:
+ * Name caching:
*
* Names found by directory scans are retained in a cache for future
* reference. It is managed pseudo-LRU, so frequently used names will
@@ -78,17 +78,14 @@
* Data structures:
*
* The original BSD implementation used a global hash table, which
- * works very well on a uniprocessor system but causes performance
- * difficulties on a multiprocessor system. The global hash table is
- * also difficult to size dynamically, and can become very large. To
- * try and address these problems, the focus of interest in this
- * implementation is the directory itself. A per-directory structure
- * is used to look up names.
- *
- * XXX Currently this structure is an rbtree, but rbtrees touch many
- * cache lines during a lookup and so perform badly. The intent is to
- * utimately make use of some other data structure, perhaps a Robin
- * Hood hash. Insert blurb here when that happens.
+ * works very well but can cause difficulties for the CPU cache on
+ * modern CPUs, and concurrency headaches for the kernel hacker on
+ * multiprocessor systems. The global hash table is also difficult to
+ * size dynamically. To try and address these concerns, the focus of
+ * interest in this implementation is the directory itself: a
+ * per-directory red-black tree is used to look up names. Other than
+ * the choice of data structure, it works largely the same way as the
+ * BSD implementation.
*
* Replacement:
*
@@ -109,46 +106,49 @@
* Lock order:
*
* 1) nc_dvp->vi_ncdlock
- * 2) cache_list_lock
- * 3) vp->v_interlock
+ * 2) nc_dvp->vi_ncvlock
+ * 3) cache_lru_lock, vp->v_interlock
*
* Ugly ASCII diagram:
*
* XXX replace tabs with spaces, make less ugly
*
* ...
- * |
- * -----o-----
- * | VDIR |
- * | vnode |
- * -----------
+ * ^
+ * |
+ * -----o-----
+ * | VDIR |
+ * | nchnode |
+ * -----------
* ^
* |- nd_tree
- * |
- * +----+----+ ----------- -----------
- * | VDIR | | VCHR | | VREG |
- * | vnode o-----+ | vnode o-----+ | vnode o------+
- * +---------+ | ----------- | ----------- |
- * ^ | ^ | ^ |
- * |- nc_vp |- vi_nclist |- nc_vp |- vi_nclist |- nc_vp |
- * | | | | | |
- * -----o----- | -----o----- | -----o----- |
- * +---onamecache|<----+ +---onamecache|<----+ +---onamecache|<-----+
- * | ----------- | ----------- | -----------
- * | ^ | ^ | ^
+ * |
+ * -----o----- ----------- -----------
+ * | VDIR | | VCHR | | VREG |
+ * | nchnode o-----+ | nchnode o-----+ | nchnode o------+
+ * ----------- | ----------- | ----------- |
+ * ^ | ^ | ^ |
+ * |- nc_nn |- nn_list |- nc_nn |- nn_list |- nc_nn |
+ * | | | | | |
+ * -----o----- | -----o----- | -----o----- |
+ * +---onamecache|<----+ +---onamecache|<----+ +---onamecache|<-----+
+ * | ----------- | ----------- | -----------
+ * | ^ | ^ | ^
* | | | | | |
- * | | +----------------------+ | |
- * |-nc_dvp | +-------------------------------------------------+
- * | |/- nd_tree | |
- * | | |- nc_dvp |
- * | -----o----- | |
- * +-->| VDIR |<----------+ |
- * | vnode |<------------------------------------+
- * -----------
+ * | | +----------------------+ | |
+ * |-nc_dnn | +-------------------------------------------------+
+ * | |/- nd_tree | |
+ * | | |- nc_dnn |- nc_dnn
+ * | -----o----- | |
+ * +-->| VDIR |<----------+ |
+ * | nchnode |<------------------------------------+
+ * -----------
+ *
+ * START HERE
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.126.2.5 2020/01/14 11:07:40 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.126.2.6 2020/01/17 22:26:25 ad Exp $");
#define __NAMECACHE_PRIVATE
#ifdef _KERNEL_OPT
@@ -156,14 +156,15 @@
#include "opt_dtrace.h"
#endif
-#include <sys/param.h>
#include <sys/cpu.h>
#include <sys/errno.h>
#include <sys/evcnt.h>
+#include <sys/hash.h>
#include <sys/kernel.h>
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/namei.h>
+#include <sys/param.h>
#include <sys/pool.h>
#include <sys/sdt.h>
#include <sys/sysctl.h>
@@ -171,13 +172,48 @@
#include <sys/time.h>
#include <sys/vnode_impl.h>
+#include <miscfs/genfs/genfs.h>
+
+/*
+ * Per-vnode state for the namecache. This is allocated apart from struct
+ * vnode to make the best use of memory, and best use of CPU cache. Field
+ * markings and corresponding locks:
+ *
+ * - stable throught the lifetime of the vnode
+ * n protected by nn_lock
+ * l protected by nn_listlock
+ */
+struct nchnode {
+ /* First cache line: frequently used stuff. */
+ rb_tree_t nn_tree; /* n namecache tree */
+ TAILQ_HEAD(,namecache) nn_list; /* l namecaches (parent) */
+ mode_t nn_mode; /* n cached mode or VNOVAL */
+ uid_t nn_uid; /* n cached UID or VNOVAL */
+ gid_t nn_gid; /* n cached GID or VNOVAL */
+ uint32_t nn_spare; /* - spare (padding) */
+
+ /* Second cache line: locks and infrequenly used stuff. */
+ krwlock_t nn_lock /* - lock on node */
+ __aligned(COHERENCY_UNIT);
+ krwlock_t nn_listlock; /* - lock on nn_list */
+ struct vnode *nn_vp; /* - backpointer to vnode */
+};
+
+static void cache_activate(struct namecache *);
+static int cache_compare_key(void *, const void *, const void *);
+static int cache_compare_nodes(void *, const void *, const void *);
+static void cache_deactivate(void);
+static void cache_reclaim(void);
+static int cache_stat_sysctl(SYSCTLFN_ARGS);
+
/* Per-CPU counters. */
struct nchstats_percpu _NAMEI_CACHE_STATS(uintptr_t);
/* Global pool cache. */
static pool_cache_t cache_pool __read_mostly;
+static pool_cache_t cache_node_pool __read_mostly;
-/* LRU replacement state. */
+/* LRU replacement. */
enum cache_lru_id {
LRU_ACTIVE,
LRU_INACTIVE,
@@ -194,7 +230,6 @@
/* Cache effectiveness statistics. This holds total from per-cpu stats */
struct nchstats nchstats __cacheline_aligned;
-/* Macro to count an event. */
#define COUNT(f) do { \
kpreempt_disable(); \
((struct nchstats_percpu *)curcpu()->ci_data.cpu_nch)->f++; \
@@ -206,12 +241,16 @@
static const int cache_lru_maxscan = 128; /* max # to scan/reclaim */
static int doingcache = 1; /* 1 => enable the cache */
-static void cache_reclaim(void);
-static void cache_remove(struct namecache *, const bool);
+/* sysctl */
+static struct sysctllog *cache_sysctllog;
-/* sysctl */
-static struct sysctllog *sysctllog;
-static void sysctl_cache_stat_setup(void);
+/* Read-black tree */
+static rb_tree_ops_t cache_rbtree_ops __read_mostly = {
+ .rbto_compare_nodes = cache_compare_nodes,
+ .rbto_compare_key = cache_compare_key,
+ .rbto_node_offset = offsetof(struct namecache, nc_tree),
+ .rbto_context = NULL
+};
/* dtrace hooks */
SDT_PROVIDER_DEFINE(vfs);
@@ -243,13 +282,16 @@
static int
cache_compare_nodes(void *context, const void *n1, const void *n2)
{
- const struct namecache *ncp1 = n1;
- const struct namecache *ncp2 = n2;
+ const struct namecache *nc1 = n1;
+ const struct namecache *nc2 = n2;
- if (ncp1->nc_nlen != ncp2->nc_nlen) {
- return (int)ncp1->nc_nlen - (int)ncp2->nc_nlen;
+ if (nc1->nc_key < nc2->nc_key) {
+ return -1;
}
- return memcmp(ncp1->nc_name, ncp2->nc_name, ncp1->nc_nlen);
+ if (nc1->nc_key > nc2->nc_key) {
+ return 1;
+ }
+ return 0;
}
/*
@@ -258,156 +300,135 @@
static int
cache_compare_key(void *context, const void *n, const void *k)
{
- const struct namecache *ncp = n;
- const struct iovec *iov = k;
+ const struct namecache *nc = n;
+ const int64_t key = *(const int64_t *)k;
- if (ncp->nc_nlen != iov->iov_len) {
- return (int)ncp->nc_nlen - (int)iov->iov_len;
+ if (nc->nc_key < key) {
+ return -1;
}
- return memcmp(ncp->nc_name, iov->iov_base, ncp->nc_nlen);
+ if (nc->nc_key > key) {
Home |
Main Index |
Thread Index |
Old Index