Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src Update stats-keeping in sys/kern/vfs_cache.c to remove (most)
details: https://anonhg.NetBSD.org/src/rev/378a6bcbc47d
branches: trunk
changeset: 335084:378a6bcbc47d
user: dennis <dennis%NetBSD.org@localhost>
date: Wed Dec 24 20:01:21 2014 +0000
description:
Update stats-keeping in sys/kern/vfs_cache.c to remove (most)
races while allowing consistent lockless sampling of the per-cpu
statistics without atomic operations. Update comment describing
the locking protocol to include this.
These files were fumble-fingered out of the last commit.
diffstat:
sys/kern/vfs_cache.c | 260 ++++++++++++++++++++++++++++++++---------------
usr.bin/systat/vmstat.c | 6 +-
usr.bin/vmstat/vmstat.c | 21 +---
3 files changed, 182 insertions(+), 105 deletions(-)
diffs (truncated from 569 to 300 lines):
diff -r 6bc5f33bc593 -r 378a6bcbc47d sys/kern/vfs_cache.c
--- a/sys/kern/vfs_cache.c Wed Dec 24 19:56:49 2014 +0000
+++ b/sys/kern/vfs_cache.c Wed Dec 24 20:01:21 2014 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: vfs_cache.c,v 1.102 2014/12/07 22:23:38 dennis Exp $ */
+/* $NetBSD: vfs_cache.c,v 1.103 2014/12/24 20:01:21 dennis Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -58,7 +58,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.102 2014/12/07 22:23:38 dennis Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.103 2014/12/24 20:01:21 dennis Exp $");
#include "opt_ddb.h"
#include "opt_revcache.h"
@@ -126,19 +126,52 @@
* locks essentially idles the subsystem, ensuring there are no
* concurrent references to the cache entries being freed.
*
- * As a side effect while running cache_reclaim(), once the per-cpu
- * locks are held the per-cpu stats are sampled, added to the
- * subsystem total and zeroed. Only some of the per-cpu stats are
- * incremented with the per-cpu lock held, however, and attempting
- * to add locking around the remaining counters currently
- * incremented without a lock can cause deadlock, so we don't
- * do that. XXX Fix this up in a later revision.
+ * 32 bit per-cpu statistic counters (struct nchstats_percpu) are
+ * incremented when the operations they count are performed while
+ * running on the corresponding CPU. Frequently individual counters
+ * are incremented while holding a lock (either a per-cpu lock or
+ * namecache_lock) sufficient to preclude concurrent increments
+ * being done to the same counter, so non-atomic increments are
+ * done using the COUNT() macro. Counters which are incremented
+ * when one of these locks is not held use the COUNT_UNL() macro
+ * instead. COUNT_UNL() could be defined to do atomic increments
+ * but currently just does what COUNT() does, on the theory that
+ * it is unlikely the non-atomic increment will be interrupted
+ * by something on the same CPU that increments the same counter,
+ * but even if it does happen the consequences aren't serious.
+ *
+ * N.B.: Attempting to protect COUNT_UNL() increments by taking
+ * a per-cpu lock in the namecache_count_*() functions causes
+ * a deadlock. Don't do that, use atomic increments instead if
+ * the imperfections here bug you.
*
- * Per-cpu namecache data is defined next.
+ * The 64 bit system-wide statistic counts (struct nchstats) are
+ * maintained by sampling the per-cpu counters periodically, adding
+ * in the deltas since the last samples and recording the current
+ * samples to use to compute the next delta. The sampling is done
+ * as a side effect of cache_reclaim() which is run periodically,
+ * for its own purposes, often enough to avoid overflow of the 32
+ * bit counters. While sampling in this fashion requires no locking
+ * it is never-the-less done only after all locks have been taken by
+ * cache_reclaim() to allow cache_stat_sysctl() to hold off
+ * cache_reclaim() with minimal locking.
+ *
+ * cache_stat_sysctl() takes its CPU's per-cpu lock to hold off
+ * cache_reclaim() so that it can copy the subsystem total stats
+ * without them being concurrently modified. If CACHE_STATS_CURRENT
+ * is defined it also harvests the per-cpu increments into the total,
+ * which again requires cache_reclaim() to be held off.
+ *
+ * The per-cpu data (a lock and the per-cpu stats structures)
+ * are defined next.
*/
+struct nchstats_percpu _NAMEI_CACHE_STATS(uint32_t);
+
struct nchcpu {
- kmutex_t cpu_lock;
- struct nchstats cpu_stats;
+ kmutex_t cpu_lock;
+ struct nchstats_percpu cpu_stats;
+ /* XXX maybe __cacheline_aligned would improve this? */
+ struct nchstats_percpu cpu_stats_last; /* from last sample */
};
/*
@@ -177,9 +210,32 @@
static void *cache_gcqueue;
static u_int cache_gcpend;
-/* Cache effectiveness statistics. */
+/* Cache effectiveness statistics. This holds total from per-cpu stats */
struct nchstats nchstats __cacheline_aligned;
-#define COUNT(c,x) (c.x++)
+
+/*
+ * Macros to count an event, update the central stats with per-cpu
+ * values and add current per-cpu increments to the subsystem total
+ * last collected by cache_reclaim().
+ */
+#define CACHE_STATS_CURRENT /* nothing */
+
+#define COUNT(cpup, f) ((cpup)->cpu_stats.f++)
+
+#define UPDATE(cpup, f) do { \
+ struct nchcpu *Xcpup = (cpup); \
+ uint32_t Xcnt = (volatile uint32_t) Xcpup->cpu_stats.f; \
+ nchstats.f += Xcnt - Xcpup->cpu_stats_last.f; \
+ Xcpup->cpu_stats_last.f = Xcnt; \
+} while (/* CONSTCOND */ 0)
+
+#define ADD(stats, cpup, f) do { \
+ struct nchcpu *Xcpup = (cpup); \
+ stats.f += Xcpup->cpu_stats.f - Xcpup->cpu_stats_last.f; \
+} while (/* CONSTCOND */ 0)
+
+/* Do unlocked stats the same way. Use a different name to allow mind changes */
+#define COUNT_UNL(cpup, f) COUNT((cpup), f)
static const int cache_lowat = 95;
static const int cache_hiwat = 98;
@@ -219,6 +275,8 @@
/*
* Invalidate a cache entry and enqueue it for garbage collection.
+ * The caller needs to hold namecache_lock or a per-cpu lock to hold
+ * off cache_reclaim().
*/
static void
cache_invalidate(struct namecache *ncp)
@@ -271,11 +329,6 @@
* Lock all CPUs to prevent any cache lookup activity. Conceptually,
* this locks out all "readers".
*/
-#define UPDATE(f) do { \
- nchstats.f += cpup->cpu_stats.f; \
- cpup->cpu_stats.f = 0; \
-} while (/* CONSTCOND */ 0)
-
static void
cache_lock_cpus(void)
{
@@ -283,24 +336,31 @@
struct cpu_info *ci;
struct nchcpu *cpup;
+ /*
+ * Lock out all CPUs first, then harvest per-cpu stats. This
+ * is probably not quite as cache-efficient as doing the lock
+ * and harvest at the same time, but allows cache_stat_sysctl()
+ * to make do with a per-cpu lock.
+ */
for (CPU_INFO_FOREACH(cii, ci)) {
cpup = ci->ci_data.cpu_nch;
mutex_enter(&cpup->cpu_lock);
- UPDATE(ncs_goodhits);
- UPDATE(ncs_neghits);
- UPDATE(ncs_badhits);
- UPDATE(ncs_falsehits);
- UPDATE(ncs_miss);
- UPDATE(ncs_long);
- UPDATE(ncs_pass2);
- UPDATE(ncs_2passes);
- UPDATE(ncs_revhits);
- UPDATE(ncs_revmiss);
+ }
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ cpup = ci->ci_data.cpu_nch;
+ UPDATE(cpup, ncs_goodhits);
+ UPDATE(cpup, ncs_neghits);
+ UPDATE(cpup, ncs_badhits);
+ UPDATE(cpup, ncs_falsehits);
+ UPDATE(cpup, ncs_miss);
+ UPDATE(cpup, ncs_long);
+ UPDATE(cpup, ncs_pass2);
+ UPDATE(cpup, ncs_2passes);
+ UPDATE(cpup, ncs_revhits);
+ UPDATE(cpup, ncs_revmiss);
}
}
-#undef UPDATE
-
/*
* Release all CPU locks.
*/
@@ -318,8 +378,9 @@
}
/*
- * Find a single cache entry and return it locked. 'namecache_lock' or
- * at least one of the per-CPU locks must be held.
+ * Find a single cache entry and return it locked.
+ * The caller needs to hold namecache_lock or a per-cpu lock to hold
+ * off cache_reclaim().
*/
static struct namecache *
cache_lookup_entry(const struct vnode *dvp, const char *name, size_t namelen)
@@ -333,6 +394,7 @@
ncpp = &nchashtbl[NCHASH2(hash, dvp)];
LIST_FOREACH(ncp, ncpp, nc_hash) {
+ /* XXX Needs barrier for Alpha here */
if (ncp->nc_dvp != dvp ||
ncp->nc_nlen != namelen ||
memcmp(ncp->nc_name, name, (u_int)ncp->nc_nlen))
@@ -407,7 +469,8 @@
struct namecache *ncp;
struct vnode *vp;
struct nchcpu *cpup;
- int error;
+ int error, ret_value;
+
/* Establish default result values */
if (iswht_ret != NULL) {
@@ -422,20 +485,21 @@
cpup = curcpu()->ci_data.cpu_nch;
mutex_enter(&cpup->cpu_lock);
if (__predict_false(namelen > NCHNAMLEN)) {
- COUNT(cpup->cpu_stats, ncs_long);
+ COUNT(cpup, ncs_long);
mutex_exit(&cpup->cpu_lock);
/* found nothing */
return 0;
}
+
ncp = cache_lookup_entry(dvp, name, namelen);
if (__predict_false(ncp == NULL)) {
- COUNT(cpup->cpu_stats, ncs_miss);
+ COUNT(cpup, ncs_miss);
mutex_exit(&cpup->cpu_lock);
/* found nothing */
return 0;
}
if ((cnflags & MAKEENTRY) == 0) {
- COUNT(cpup->cpu_stats, ncs_badhits);
+ COUNT(cpup, ncs_badhits);
/*
* Last component and we are renaming or deleting,
* the cache entry is invalid, or otherwise don't
@@ -460,13 +524,11 @@
if (__predict_true(nameiop != CREATE ||
(cnflags & ISLASTCN) == 0)) {
- COUNT(cpup->cpu_stats, ncs_neghits);
- mutex_exit(&ncp->nc_lock);
- mutex_exit(&cpup->cpu_lock);
+ COUNT(cpup, ncs_neghits);
/* found neg entry; vn is already null from above */
- return 1;
+ ret_value = 1;
} else {
- COUNT(cpup->cpu_stats, ncs_badhits);
+ COUNT(cpup, ncs_badhits);
/*
* Last component and we are renaming or
* deleting, the cache entry is invalid,
@@ -474,17 +536,22 @@
* exist.
*/
cache_invalidate(ncp);
- mutex_exit(&ncp->nc_lock);
- mutex_exit(&cpup->cpu_lock);
/* found nothing */
- return 0;
+ ret_value = 0;
}
+ mutex_exit(&ncp->nc_lock);
+ mutex_exit(&cpup->cpu_lock);
+ return ret_value;
}
vp = ncp->nc_vp;
mutex_enter(vp->v_interlock);
mutex_exit(&ncp->nc_lock);
mutex_exit(&cpup->cpu_lock);
+
+ /*
+ * Unlocked except for the vnode interlock. Call vget().
+ */
error = vget(vp, LK_NOWAIT);
if (error) {
KASSERT(error == EBUSY);
@@ -492,27 +559,21 @@
* This vnode is being cleaned out.
* XXX badhits?
*/
- COUNT(cpup->cpu_stats, ncs_falsehits);
+ COUNT_UNL(cpup, ncs_falsehits);
/* found nothing */
return 0;
}
-#ifdef DEBUG
- /*
- * since we released nb->nb_lock,
- * we can't use this pointer any more.
- */
- ncp = NULL;
-#endif /* DEBUG */
-
- /* We don't have the right lock, but this is only for stats. */
- COUNT(cpup->cpu_stats, ncs_goodhits);
-
Home |
Main Index |
Thread Index |
Old Index