Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src Update stats-keeping in sys/kern/vfs_cache.c to remove (most)



details:   https://anonhg.NetBSD.org/src/rev/378a6bcbc47d
branches:  trunk
changeset: 335084:378a6bcbc47d
user:      dennis <dennis%NetBSD.org@localhost>
date:      Wed Dec 24 20:01:21 2014 +0000

description:
Update stats-keeping in sys/kern/vfs_cache.c to remove (most)
races while allowing consistent lockless sampling of the per-cpu
statistics without atomic operations.  Update comment describing
the locking protocol to include this.

These files were fumble-fingered out of the last commit.

diffstat:

 sys/kern/vfs_cache.c    |  260 ++++++++++++++++++++++++++++++++---------------
 usr.bin/systat/vmstat.c |    6 +-
 usr.bin/vmstat/vmstat.c |   21 +---
 3 files changed, 182 insertions(+), 105 deletions(-)

diffs (truncated from 569 to 300 lines):

diff -r 6bc5f33bc593 -r 378a6bcbc47d sys/kern/vfs_cache.c
--- a/sys/kern/vfs_cache.c      Wed Dec 24 19:56:49 2014 +0000
+++ b/sys/kern/vfs_cache.c      Wed Dec 24 20:01:21 2014 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: vfs_cache.c,v 1.102 2014/12/07 22:23:38 dennis Exp $   */
+/*     $NetBSD: vfs_cache.c,v 1.103 2014/12/24 20:01:21 dennis Exp $   */
 
 /*-
  * Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -58,7 +58,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.102 2014/12/07 22:23:38 dennis Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.103 2014/12/24 20:01:21 dennis Exp $");
 
 #include "opt_ddb.h"
 #include "opt_revcache.h"
@@ -126,19 +126,52 @@
  * locks essentially idles the subsystem, ensuring there are no
  * concurrent references to the cache entries being freed.
  *
- * As a side effect while running cache_reclaim(), once the per-cpu
- * locks are held the per-cpu stats are sampled, added to the
- * subsystem total and zeroed.  Only some of the per-cpu stats are
- * incremented with the per-cpu lock held, however, and attempting
- * to add locking around the remaining counters currently
- * incremented without a lock can cause deadlock, so we don't
- * do that.  XXX Fix this up in a later revision.
+ * 32 bit per-cpu statistic counters (struct nchstats_percpu) are
+ * incremented when the operations they count are performed while
+ * running on the corresponding CPU.  Frequently individual counters
+ * are incremented while holding a lock (either a per-cpu lock or
+ * namecache_lock) sufficient to preclude concurrent increments
+ * being done to the same counter, so non-atomic increments are
+ * done using the COUNT() macro.  Counters which are incremented
+ * when one of these locks is not held use the COUNT_UNL() macro
+ * instead.  COUNT_UNL() could be defined to do atomic increments
+ * but currently just does what COUNT() does, on the theory that
+ * it is unlikely the non-atomic increment will be interrupted
+ * by something on the same CPU that increments the same counter,
+ * but even if it does happen the consequences aren't serious.
+ *
+ * N.B.: Attempting to protect COUNT_UNL() increments by taking
+ * a per-cpu lock in the namecache_count_*() functions causes
+ * a deadlock.  Don't do that, use atomic increments instead if
+ * the imperfections here bug you.
  *
- * Per-cpu namecache data is defined next.
+ * The 64 bit system-wide statistic counts (struct nchstats) are
+ * maintained by sampling the per-cpu counters periodically, adding
+ * in the deltas since the last samples and recording the current
+ * samples to use to compute the next delta.  The sampling is done
+ * as a side effect of cache_reclaim() which is run periodically,
+ * for its own purposes, often enough to avoid overflow of the 32
+ * bit counters.  While sampling in this fashion requires no locking
+ * it is never-the-less done only after all locks have been taken by
+ * cache_reclaim() to allow cache_stat_sysctl() to hold off
+ * cache_reclaim() with minimal locking.
+ *
+ * cache_stat_sysctl() takes its CPU's per-cpu lock to hold off
+ * cache_reclaim() so that it can copy the subsystem total stats
+ * without them being concurrently modified.  If CACHE_STATS_CURRENT
+ * is defined it also harvests the per-cpu increments into the total,
+ * which again requires cache_reclaim() to be held off.
+ *
+ * The per-cpu data (a lock and the per-cpu stats structures)
+ * are defined next.
  */
+struct nchstats_percpu _NAMEI_CACHE_STATS(uint32_t);
+
 struct nchcpu {
-       kmutex_t        cpu_lock;
-       struct nchstats cpu_stats;
+       kmutex_t                cpu_lock;
+       struct nchstats_percpu  cpu_stats;
+       /* XXX maybe __cacheline_aligned would improve this? */
+       struct nchstats_percpu  cpu_stats_last; /* from last sample */
 };
 
 /*
@@ -177,9 +210,32 @@
 static void    *cache_gcqueue;
 static u_int   cache_gcpend;
 
-/* Cache effectiveness statistics. */
+/* Cache effectiveness statistics.  This holds total from per-cpu stats */
 struct nchstats        nchstats __cacheline_aligned;
-#define        COUNT(c,x)      (c.x++)
+
+/*
+ * Macros to count an event, update the central stats with per-cpu
+ * values and add current per-cpu increments to the subsystem total
+ * last collected by cache_reclaim().
+ */
+#define        CACHE_STATS_CURRENT     /* nothing */
+
+#define        COUNT(cpup, f)  ((cpup)->cpu_stats.f++)
+
+#define        UPDATE(cpup, f) do { \
+       struct nchcpu *Xcpup = (cpup); \
+       uint32_t Xcnt = (volatile uint32_t) Xcpup->cpu_stats.f; \
+       nchstats.f += Xcnt - Xcpup->cpu_stats_last.f; \
+       Xcpup->cpu_stats_last.f = Xcnt; \
+} while (/* CONSTCOND */ 0)
+
+#define        ADD(stats, cpup, f) do { \
+       struct nchcpu *Xcpup = (cpup); \
+       stats.f += Xcpup->cpu_stats.f - Xcpup->cpu_stats_last.f; \
+} while (/* CONSTCOND */ 0)
+
+/* Do unlocked stats the same way. Use a different name to allow mind changes */
+#define        COUNT_UNL(cpup, f)      COUNT((cpup), f)
 
 static const int cache_lowat = 95;
 static const int cache_hiwat = 98;
@@ -219,6 +275,8 @@
 
 /*
  * Invalidate a cache entry and enqueue it for garbage collection.
+ * The caller needs to hold namecache_lock or a per-cpu lock to hold
+ * off cache_reclaim().
  */
 static void
 cache_invalidate(struct namecache *ncp)
@@ -271,11 +329,6 @@
  * Lock all CPUs to prevent any cache lookup activity.  Conceptually,
  * this locks out all "readers".
  */
-#define        UPDATE(f) do { \
-       nchstats.f += cpup->cpu_stats.f; \
-       cpup->cpu_stats.f = 0; \
-} while (/* CONSTCOND */ 0)
-
 static void
 cache_lock_cpus(void)
 {
@@ -283,24 +336,31 @@
        struct cpu_info *ci;
        struct nchcpu *cpup;
 
+       /*
+        * Lock out all CPUs first, then harvest per-cpu stats.  This
+        * is probably not quite as cache-efficient as doing the lock
+        * and harvest at the same time, but allows cache_stat_sysctl()
+        * to make do with a per-cpu lock.
+        */
        for (CPU_INFO_FOREACH(cii, ci)) {
                cpup = ci->ci_data.cpu_nch;
                mutex_enter(&cpup->cpu_lock);
-               UPDATE(ncs_goodhits);
-               UPDATE(ncs_neghits);
-               UPDATE(ncs_badhits);
-               UPDATE(ncs_falsehits);
-               UPDATE(ncs_miss);
-               UPDATE(ncs_long);
-               UPDATE(ncs_pass2);
-               UPDATE(ncs_2passes);
-               UPDATE(ncs_revhits);
-               UPDATE(ncs_revmiss);
+       }
+       for (CPU_INFO_FOREACH(cii, ci)) {
+               cpup = ci->ci_data.cpu_nch;
+               UPDATE(cpup, ncs_goodhits);
+               UPDATE(cpup, ncs_neghits);
+               UPDATE(cpup, ncs_badhits);
+               UPDATE(cpup, ncs_falsehits);
+               UPDATE(cpup, ncs_miss);
+               UPDATE(cpup, ncs_long);
+               UPDATE(cpup, ncs_pass2);
+               UPDATE(cpup, ncs_2passes);
+               UPDATE(cpup, ncs_revhits);
+               UPDATE(cpup, ncs_revmiss);
        }
 }
 
-#undef UPDATE
-
 /*
  * Release all CPU locks.
  */
@@ -318,8 +378,9 @@
 }
 
 /*
- * Find a single cache entry and return it locked.  'namecache_lock' or
- * at least one of the per-CPU locks must be held.
+ * Find a single cache entry and return it locked.
+ * The caller needs to hold namecache_lock or a per-cpu lock to hold
+ * off cache_reclaim().
  */
 static struct namecache *
 cache_lookup_entry(const struct vnode *dvp, const char *name, size_t namelen)
@@ -333,6 +394,7 @@
        ncpp = &nchashtbl[NCHASH2(hash, dvp)];
 
        LIST_FOREACH(ncp, ncpp, nc_hash) {
+               /* XXX Needs barrier for Alpha here */
                if (ncp->nc_dvp != dvp ||
                    ncp->nc_nlen != namelen ||
                    memcmp(ncp->nc_name, name, (u_int)ncp->nc_nlen))
@@ -407,7 +469,8 @@
        struct namecache *ncp;
        struct vnode *vp;
        struct nchcpu *cpup;
-       int error;
+       int error, ret_value;
+
 
        /* Establish default result values */
        if (iswht_ret != NULL) {
@@ -422,20 +485,21 @@
        cpup = curcpu()->ci_data.cpu_nch;
        mutex_enter(&cpup->cpu_lock);
        if (__predict_false(namelen > NCHNAMLEN)) {
-               COUNT(cpup->cpu_stats, ncs_long);
+               COUNT(cpup, ncs_long);
                mutex_exit(&cpup->cpu_lock);
                /* found nothing */
                return 0;
        }
+
        ncp = cache_lookup_entry(dvp, name, namelen);
        if (__predict_false(ncp == NULL)) {
-               COUNT(cpup->cpu_stats, ncs_miss);
+               COUNT(cpup, ncs_miss);
                mutex_exit(&cpup->cpu_lock);
                /* found nothing */
                return 0;
        }
        if ((cnflags & MAKEENTRY) == 0) {
-               COUNT(cpup->cpu_stats, ncs_badhits);
+               COUNT(cpup, ncs_badhits);
                /*
                 * Last component and we are renaming or deleting,
                 * the cache entry is invalid, or otherwise don't
@@ -460,13 +524,11 @@
 
                if (__predict_true(nameiop != CREATE ||
                    (cnflags & ISLASTCN) == 0)) {
-                       COUNT(cpup->cpu_stats, ncs_neghits);
-                       mutex_exit(&ncp->nc_lock);
-                       mutex_exit(&cpup->cpu_lock);
+                       COUNT(cpup, ncs_neghits);
                        /* found neg entry; vn is already null from above */
-                       return 1;
+                       ret_value = 1;
                } else {
-                       COUNT(cpup->cpu_stats, ncs_badhits);
+                       COUNT(cpup, ncs_badhits);
                        /*
                         * Last component and we are renaming or
                         * deleting, the cache entry is invalid,
@@ -474,17 +536,22 @@
                         * exist.
                         */
                        cache_invalidate(ncp);
-                       mutex_exit(&ncp->nc_lock);
-                       mutex_exit(&cpup->cpu_lock);
                        /* found nothing */
-                       return 0;
+                       ret_value = 0;
                }
+               mutex_exit(&ncp->nc_lock);
+               mutex_exit(&cpup->cpu_lock);
+               return ret_value;
        }
 
        vp = ncp->nc_vp;
        mutex_enter(vp->v_interlock);
        mutex_exit(&ncp->nc_lock);
        mutex_exit(&cpup->cpu_lock);
+
+       /*
+        * Unlocked except for the vnode interlock.  Call vget().
+        */
        error = vget(vp, LK_NOWAIT);
        if (error) {
                KASSERT(error == EBUSY);
@@ -492,27 +559,21 @@
                 * This vnode is being cleaned out.
                 * XXX badhits?
                 */
-               COUNT(cpup->cpu_stats, ncs_falsehits);
+               COUNT_UNL(cpup, ncs_falsehits);
                /* found nothing */
                return 0;
        }
 
-#ifdef DEBUG
-       /*
-        * since we released nb->nb_lock,
-        * we can't use this pointer any more.
-        */
-       ncp = NULL;
-#endif /* DEBUG */
-
-       /* We don't have the right lock, but this is only for stats. */
-       COUNT(cpup->cpu_stats, ncs_goodhits);
-



Home | Main Index | Thread Index | Old Index