Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys namecache changes:
details: https://anonhg.NetBSD.org/src/rev/9ee075e8188e
branches: trunk
changeset: 465644:9ee075e8188e
user: ad <ad%NetBSD.org@localhost>
date: Sun Dec 01 13:39:53 2019 +0000
description:
namecache changes:
- Delete the per-entry lock, and borrow the associated vnode's v_interlock
instead. We need to acquire it during lookup anyway. We can revisit this
in the future but for now it's a stepping stone, and works within the
quite limited context of what we have (BSD namecache/lookup design).
- Implement an idea that Mateusz Guzik (mjg%FreeBSD.org@localhost) gave me. In
cache_reclaim(), we don't need to lock out all of the CPUs to garbage
collect entries. All we need to do is observe their locks unheld at least
once: then we know they are not in the critical section, and no longer
have visibility of the entries about to be garbage collected.
- The above makes it safe for sysctl to take only namecache_lock to get stats,
and we can remove all the crap dealing with per-CPU locks.
- For lockstat, make namecache_lock a static now we have __cacheline_aligned.
- Avoid false sharing - don't write back to nc_hittime unless it has changed.
Put a a comment in place explaining this. Pretty sure this was there in
2008/2009 but someone removed it (understandably, the code looks weird).
- Use a mutex to protect the garbage collection queue instead of atomics, and
adjust the low water mark up so that cache_reclaim() isn't doing so much
work at once.
diffstat:
sys/kern/vfs_cache.c | 317 +++++++++++++++++++++---------------
sys/rump/include/rump/rump_namei.h | 2 +-
sys/sys/namei.h | 7 +-
sys/sys/namei.src | 7 +-
4 files changed, 189 insertions(+), 144 deletions(-)
diffs (truncated from 824 to 300 lines):
diff -r 069ccbae050c -r 9ee075e8188e sys/kern/vfs_cache.c
--- a/sys/kern/vfs_cache.c Sun Dec 01 13:20:42 2019 +0000
+++ b/sys/kern/vfs_cache.c Sun Dec 01 13:39:53 2019 +0000
@@ -1,7 +1,7 @@
-/* $NetBSD: vfs_cache.c,v 1.123 2019/09/15 17:37:25 maya Exp $ */
+/* $NetBSD: vfs_cache.c,v 1.124 2019/12/01 13:39:53 ad Exp $ */
/*-
- * Copyright (c) 2008 The NetBSD Foundation, Inc.
+ * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -58,7 +58,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.123 2019/09/15 17:37:25 maya Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.124 2019/12/01 13:39:53 ad Exp $");
#define __NAMECACHE_PRIVATE
#ifdef _KERNEL_OPT
@@ -130,7 +130,7 @@
* - Invalidate: active--->queued
*
* Done by cache_invalidate. If not already invalidated, nullify
- * ncp->nc_dvp and ncp->nc_vp, and add to cache_gcqueue. Called,
+ * ncp->nc_dvp and ncp->nc_vp, and add to namecache_gc_queue. Called,
* among various other places, in cache_lookup(dvp, name, namelen,
* nameiop, cnflags, &iswht, &vp) when MAKEENTRY is missing from
* cnflags.
@@ -145,16 +145,17 @@
* Locking.
*
* L namecache_lock Global lock for namecache table and queues.
+ * G namecache_gc_lock Global lock for garbage collection.
* C struct nchcpu::cpu_lock Per-CPU lock to reduce read contention.
- * N struct namecache::nc_lock Per-entry lock.
- * V struct vnode::v_interlock Vnode interlock.
+ * N struct namecache::nc_lock Per-entry lock, matching nc_vp->v_interlock.
+ * If nc_vp==NULL, lock is private / not shared.
*
- * Lock order: L -> C -> N -> V
+ * Lock order: L -> C -> N
*
* Examples:
* . L->C: cache_reclaim
- * . C->N->V: cache_lookup
- * . L->N->V: cache_purge1, cache_revlookup
+ * . C->N: cache_lookup
+ * . L->N: cache_purge1, cache_revlookup
*
* All use serialized by namecache_lock:
*
@@ -167,8 +168,9 @@
* - Insertion serialized by namecache_lock,
* - read protected by per-CPU lock,
* - insert/read ordering guaranteed by memory barriers, and
- * - deletion allowed only under namecache_lock and *all* per-CPU locks
- * in CPU_INFO_FOREACH order:
+ * - deletion allowed only under namecache_lock, with namecache_gc_lock
+ * taken to chop out the garbage collection list, and *all* per-CPU locks
+ * observed as "unowned" at least once:
*
* nchashtbl / struct namecache::nc_hash
*
@@ -180,11 +182,13 @@
*
* struct namecache::nc_dvp
* struct namecache::nc_vp
- * struct namecache::nc_gcqueue (*)
- * struct namecache::nc_hittime (**)
+ * struct namecache::nc_hittime (*)
+ *
+ * All use serialized by struct namecache_gc_lock:
*
- * (*) Once on the queue, only cache_thread uses this nc_gcqueue, unlocked.
- * (**) cache_prune reads nc_hittime unlocked, since approximate is OK.
+ * struct namecache::nc_gclist
+ *
+ * (*) cache_prune reads nc_hittime unlocked, since approximate is OK.
*
* Unlocked because stable after initialization:
*
@@ -257,7 +261,7 @@
* Structures associated with name cacheing.
*/
-static kmutex_t *namecache_lock __read_mostly;
+static kmutex_t namecache_lock __cacheline_aligned;
static pool_cache_t namecache_cache __read_mostly;
static TAILQ_HEAD(, namecache) nclruhead __cacheline_aligned;
@@ -276,8 +280,9 @@
static long numcache __cacheline_aligned;
/* Garbage collection queue and number of entries pending in it. */
-static void *cache_gcqueue;
-static u_int cache_gcpend;
+static kmutex_t namecache_gc_lock __cacheline_aligned;
+static SLIST_HEAD(namecache_gc_queue, namecache) namecache_gc_queue;
+static u_int namecache_gc_pend;
/* Cache effectiveness statistics. This holds total from per-cpu stats */
struct nchstats nchstats __cacheline_aligned;
@@ -287,8 +292,6 @@
* values and add current per-cpu increments to the subsystem total
* last collected by cache_reclaim().
*/
-#define CACHE_STATS_CURRENT /* nothing */
-
#define COUNT(cpup, f) ((cpup)->cpu_stats.f++)
#define UPDATE(cpup, f) do { \
@@ -298,15 +301,10 @@
Xcpup->cpu_stats_last.f = Xcnt; \
} while (/* CONSTCOND */ 0)
-#define ADD(stats, cpup, f) do { \
- struct nchcpu *Xcpup = (cpup); \
- stats.f += Xcpup->cpu_stats.f - Xcpup->cpu_stats_last.f; \
-} while (/* CONSTCOND */ 0)
-
/* Do unlocked stats the same way. Use a different name to allow mind changes */
#define COUNT_UNL(cpup, f) COUNT((cpup), f)
-static const int cache_lowat = 95;
+static const int cache_lowat = 97;
static const int cache_hiwat = 98;
static const int cache_hottime = 5; /* number of seconds */
static int doingcache = 1; /* 1 => enable the cache */
@@ -369,15 +367,12 @@
/*
* Invalidate a cache entry and enqueue it for garbage collection.
- * The caller needs to hold namecache_lock or a per-cpu lock to hold
- * off cache_reclaim().
*/
static void
cache_invalidate(struct namecache *ncp)
{
- void *head;
- KASSERT(mutex_owned(&ncp->nc_lock));
+ KASSERT(mutex_owned(ncp->nc_lock));
if (ncp->nc_dvp != NULL) {
SDT_PROBE(vfs, namecache, invalidate, done, ncp->nc_dvp,
@@ -385,11 +380,10 @@
ncp->nc_vp = NULL;
ncp->nc_dvp = NULL;
- do {
- head = cache_gcqueue;
- ncp->nc_gcqueue = head;
- } while (atomic_cas_ptr(&cache_gcqueue, head, ncp) != head);
- atomic_inc_uint(&cache_gcpend);
+ mutex_enter(&namecache_gc_lock);
+ SLIST_INSERT_HEAD(&namecache_gc_queue, ncp, nc_gclist);
+ namecache_gc_pend++;
+ mutex_exit(&namecache_gc_lock);
}
}
@@ -401,7 +395,7 @@
cache_disassociate(struct namecache *ncp)
{
- KASSERT(mutex_owned(namecache_lock));
+ KASSERT(mutex_owned(&namecache_lock));
KASSERT(ncp->nc_dvp == NULL);
if (ncp->nc_lru.tqe_prev != NULL) {
@@ -424,7 +418,8 @@
/*
* Lock all CPUs to prevent any cache lookup activity. Conceptually,
- * this locks out all "readers".
+ * this locks out all "readers". This is a very heavyweight operation
+ * that we only use for nchreinit().
*/
static void
cache_lock_cpus(void)
@@ -433,6 +428,9 @@
struct cpu_info *ci;
struct nchcpu *cpup;
+ /* Not necessary but don't want more than one LWP trying this. */
+ KASSERT(mutex_owned(&namecache_lock));
+
/*
* Lock out all CPUs first, then harvest per-cpu stats. This
* is probably not quite as cache-efficient as doing the lock
@@ -485,6 +483,7 @@
struct nchashhead *ncpp;
struct namecache *ncp;
nchash_t hash;
+ int ticks;
KASSERT(dvp != NULL);
hash = cache_hash(name, namelen);
@@ -496,15 +495,22 @@
ncp->nc_nlen != namelen ||
memcmp(ncp->nc_name, name, (u_int)ncp->nc_nlen))
continue;
- mutex_enter(&ncp->nc_lock);
+ mutex_enter(ncp->nc_lock);
if (__predict_true(ncp->nc_dvp == dvp)) {
- ncp->nc_hittime = hardclock_ticks;
+ ticks = hardclock_ticks;
+ if (ncp->nc_hittime != ticks) {
+ /*
+ * Avoid false sharing on MP: do not store
+ * to *ncp unless the value changed.
+ */
+ ncp->nc_hittime = ticks;
+ }
SDT_PROBE(vfs, namecache, lookup, hit, dvp,
name, namelen, 0, 0);
return ncp;
}
/* Raced: entry has been nullified. */
- mutex_exit(&ncp->nc_lock);
+ mutex_exit(ncp->nc_lock);
}
SDT_PROBE(vfs, namecache, lookup, miss, dvp,
@@ -573,7 +579,6 @@
int error;
bool hit;
-
/* Establish default result values */
if (iswht_ret != NULL) {
*iswht_ret = 0;
@@ -610,12 +615,13 @@
* want cache entry to exist.
*/
cache_invalidate(ncp);
- mutex_exit(&ncp->nc_lock);
+ mutex_exit(ncp->nc_lock);
mutex_exit(&cpup->cpu_lock);
/* found nothing */
return false;
}
- if (ncp->nc_vp == NULL) {
+ vp = ncp->nc_vp;
+ if (__predict_false(vp == NULL)) {
if (iswht_ret != NULL) {
/*
* Restore the ISWHITEOUT flag saved earlier.
@@ -642,14 +648,11 @@
/* found nothing */
hit = false;
}
- mutex_exit(&ncp->nc_lock);
+ mutex_exit(ncp->nc_lock);
mutex_exit(&cpup->cpu_lock);
return hit;
}
-
- vp = ncp->nc_vp;
- mutex_enter(vp->v_interlock);
- mutex_exit(&ncp->nc_lock);
+ KASSERT(vp->v_interlock == ncp->nc_lock);
mutex_exit(&cpup->cpu_lock);
/*
@@ -724,13 +727,12 @@
*iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0;
}
COUNT(cpup, ncs_neghits);
- mutex_exit(&ncp->nc_lock);
+ mutex_exit(ncp->nc_lock);
mutex_exit(&cpup->cpu_lock);
/* found negative entry; vn is already null from above */
return true;
}
- mutex_enter(vp->v_interlock);
- mutex_exit(&ncp->nc_lock);
+ KASSERT(vp->v_interlock == ncp->nc_lock);
mutex_exit(&cpup->cpu_lock);
/*
@@ -775,6 +777,7 @@
struct nchcpu *cpup;
char *bp;
int error, nlen;
+ bool locked, again;
if (!doingcache)
goto out;
@@ -787,10 +790,12 @@
* is the only place these counters are incremented so no one
* will be racing with us to increment them.
*/
+ again = false;
+ retry:
cpup = curcpu()->ci_data.cpu_nch;
- mutex_enter(namecache_lock);
Home |
Main Index |
Thread Index |
Old Index