Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys Rework namecache locking commentary.
details: https://anonhg.NetBSD.org/src/rev/b4de5730aef8
branches: trunk
changeset: 822407:b4de5730aef8
user: riastradh <riastradh%NetBSD.org@localhost>
date: Sat Mar 18 21:03:28 2017 +0000
description:
Rework namecache locking commentary.
- Annotate struct namecache declaration in namei.src/namei.h.
- Bulleted lists, not walls of texts.
- Identify lock order too.
- Note subtle exceptions.
diffstat:
sys/kern/vfs_cache.c | 130 ++++++++++++++++++++++++++++++--------------------
sys/sys/namei.src | 37 ++++++++-----
2 files changed, 99 insertions(+), 68 deletions(-)
diffs (217 lines):
diff -r 99d4cb841cf2 -r b4de5730aef8 sys/kern/vfs_cache.c
--- a/sys/kern/vfs_cache.c Sat Mar 18 20:01:44 2017 +0000
+++ b/sys/kern/vfs_cache.c Sat Mar 18 21:03:28 2017 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: vfs_cache.c,v 1.116 2017/03/18 20:01:44 riastradh Exp $ */
+/* $NetBSD: vfs_cache.c,v 1.117 2017/03/18 21:03:28 riastradh Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -58,7 +58,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.116 2017/03/18 20:01:44 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.117 2017/03/18 21:03:28 riastradh Exp $");
#ifdef _KERNEL_OPT
#include "opt_ddb.h"
@@ -103,71 +103,95 @@
*/
/*
- * The locking in this subsystem works as follows:
+ * Locking.
*
- * When an entry is added to the cache, via cache_enter(),
- * namecache_lock is taken to exclude other writers. The new
- * entry is added to the hash list in a way which permits
- * concurrent lookups and invalidations in the cache done on
- * other CPUs to continue in parallel.
+ * L namecache_lock Global lock for namecache table and queues.
+ * C struct nchcpu::cpu_lock Per-CPU lock to reduce read contention.
+ * N struct namecache::nc_lock Per-entry lock.
+ * V struct vnode::v_interlock Vnode interlock.
+ *
+ * Lock order: C -> N -> L -> V
+ *
+ * All use serialized by namecache_lock:
*
- * When a lookup is done in the cache, via cache_lookup() or
- * cache_lookup_raw(), the per-cpu lock below is taken. This
- * protects calls to cache_lookup_entry() and cache_invalidate()
- * against cache_reclaim() but allows lookups to continue in
- * parallel with cache_enter().
+ * nclruhead / struct namecache::nc_lru
+ * ncvhashtbl / struct namecache::nc_vhash
+ * struct vnode_impl::vi_dnclist / struct namecache::nc_dvlist
+ * struct vnode_impl::vi_nclist / struct namecache::nc_vlist
+ * nchstats
*
- * cache_revlookup() takes namecache_lock to exclude cache_enter()
- * and cache_reclaim() since the list it operates on is not
- * maintained to allow concurrent reads.
+ * - Insertion serialized by namecache_lock,
+ * - read protected by per-CPU lock,
+ * - insert/read ordering guaranteed by memory barriers, and
+ * - deletion allowed only under namecache_lock and *all* per-CPU locks
+ * in CPU_INFO_FOREACH order:
+ *
+ * nchashtbl / struct namecache::nc_hash
+ *
+ * The per-CPU locks exist only to reduce the probability of
+ * contention between readers. We do not bind to a CPU, so
+ * contention is still possible.
+ *
+ * All use serialized by struct namecache::nc_lock:
*
- * When cache_reclaim() is called namecache_lock is held to hold
- * off calls to cache_enter()/cache_revlookup() and each of the
- * per-cpu locks is taken to hold off lookups. Holding all these
- * locks essentially idles the subsystem, ensuring there are no
- * concurrent references to the cache entries being freed.
+ * struct namecache::nc_dvp
+ * struct namecache::nc_vp
+ * struct namecache::nc_gcqueue (*)
+ * struct namecache::nc_hittime (**)
+ *
+ * (*) Once on the queue, only cache_thread uses this nc_gcqueue, unlocked.
+ * (**) cache_prune reads nc_hittime unlocked, since approximate is OK.
+ *
+ * Unlocked because stable after initialization:
+ *
+ * struct namecache::nc_dvp
+ * struct namecache::nc_vp
+ * struct namecache::nc_flags
+ * struct namecache::nc_nlen
+ * struct namecache::nc_name
+ *
+ * Unlocked because approximation is OK:
*
- * 32 bit per-cpu statistic counters (struct nchstats_percpu) are
- * incremented when the operations they count are performed while
- * running on the corresponding CPU. Frequently individual counters
- * are incremented while holding a lock (either a per-cpu lock or
- * namecache_lock) sufficient to preclude concurrent increments
- * being done to the same counter, so non-atomic increments are
- * done using the COUNT() macro. Counters which are incremented
- * when one of these locks is not held use the COUNT_UNL() macro
- * instead. COUNT_UNL() could be defined to do atomic increments
- * but currently just does what COUNT() does, on the theory that
- * it is unlikely the non-atomic increment will be interrupted
- * by something on the same CPU that increments the same counter,
- * but even if it does happen the consequences aren't serious.
+ * struct nchcpu::cpu_stats
+ * struct nchcpu::cpu_stats_last
+ *
+ * Updates under namecache_lock or any per-CPU lock are marked with
+ * COUNT, while updates outside those locks are marked with COUNT_UNL.
+ *
+ * - The theory seems to have been that you could replace COUNT_UNL by
+ * atomic operations -- except that doesn't help unless you also
+ * replace COUNT by atomic operations, because mixing atomics and
+ * nonatomics is a recipe for failure.
+ * - We use 32-bit per-CPU counters and 64-bit global counters under
+ * the theory that 32-bit counters are less likely to be hosed by
+ * nonatomic increment.
+ */
+
+/*
+ * The comment below is preserved for posterity in case it is
+ * important, but it is clear that everywhere the namecache_count_*()
+ * functions are called, other cache_*() functions that take the same
+ * locks are also called, so I can't imagine how this could be a
+ * problem:
*
* N.B.: Attempting to protect COUNT_UNL() increments by taking
* a per-cpu lock in the namecache_count_*() functions causes
* a deadlock. Don't do that, use atomic increments instead if
* the imperfections here bug you.
+ */
+
+/*
+ * struct nchstats_percpu:
*
- * The 64 bit system-wide statistic counts (struct nchstats) are
- * maintained by sampling the per-cpu counters periodically, adding
- * in the deltas since the last samples and recording the current
- * samples to use to compute the next delta. The sampling is done
- * as a side effect of cache_reclaim() which is run periodically,
- * for its own purposes, often enough to avoid overflow of the 32
- * bit counters. While sampling in this fashion requires no locking
- * it is never-the-less done only after all locks have been taken by
- * cache_reclaim() to allow cache_stat_sysctl() to hold off
- * cache_reclaim() with minimal locking.
- *
- * cache_stat_sysctl() takes its CPU's per-cpu lock to hold off
- * cache_reclaim() so that it can copy the subsystem total stats
- * without them being concurrently modified. If CACHE_STATS_CURRENT
- * is defined it also harvests the per-cpu increments into the total,
- * which again requires cache_reclaim() to be held off.
- *
- * The per-cpu data (a lock and the per-cpu stats structures)
- * are defined next.
+ * Per-CPU counters.
*/
struct nchstats_percpu _NAMEI_CACHE_STATS(uint32_t);
+/*
+ * struct nchcpu:
+ *
+ * Per-CPU namecache state: lock and per-CPU counters.
+ */
struct nchcpu {
kmutex_t cpu_lock;
struct nchstats_percpu cpu_stats;
diff -r 99d4cb841cf2 -r b4de5730aef8 sys/sys/namei.src
--- a/sys/sys/namei.src Sat Mar 18 20:01:44 2017 +0000
+++ b/sys/sys/namei.src Sat Mar 18 21:03:28 2017 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: namei.src,v 1.38 2017/03/18 19:43:31 riastradh Exp $ */
+/* $NetBSD: namei.src,v 1.39 2017/03/18 21:03:28 riastradh Exp $ */
/*
* Copyright (c) 1985, 1989, 1991, 1993
@@ -201,21 +201,28 @@
* accessed and mostly read-only data is toward the front, with
* infrequently accessed data and the lock towards the rear. The
* lock is then more likely to be in a seperate cache line.
+ *
+ * Locking rules:
+ *
+ * - stable after initialization
+ * L namecache_lock
+ * C struct nchcpu::cpu_lock
+ * N struct namecache::nc_lock
*/
-struct namecache {
- LIST_ENTRY(namecache) nc_hash; /* hash chain */
- LIST_ENTRY(namecache) nc_vhash; /* directory hash chain */
- struct vnode *nc_dvp; /* vnode of parent of name */
- struct vnode *nc_vp; /* vnode the name refers to */
- int nc_flags; /* copy of componentname's ISWHITEOUT */
- char nc_nlen; /* length of name */
- char nc_name[NCHNAMLEN]; /* segment name */
- void *nc_gcqueue; /* queue for garbage collection */
- TAILQ_ENTRY(namecache) nc_lru; /* psuedo-lru chain */
- LIST_ENTRY(namecache) nc_dvlist;
- LIST_ENTRY(namecache) nc_vlist;
- kmutex_t nc_lock; /* lock on this entry */
- int nc_hittime; /* last time scored a hit */
+struct namecache {
+ LIST_ENTRY(namecache) nc_hash; /* L hash chain */
+ LIST_ENTRY(namecache) nc_vhash; /* L directory hash chain */
+ struct vnode *nc_dvp; /* - vnode of parent of name */
+ struct vnode *nc_vp; /* - vnode the name refers to */
+ int nc_flags; /* - copy of componentname ISWHITEOUT */
+ char nc_nlen; /* - length of name */
+ char nc_name[NCHNAMLEN]; /* - segment name */
+ void *nc_gcqueue; /* N queue for garbage collection */
+ TAILQ_ENTRY(namecache) nc_lru; /* L psuedo-lru chain */
+ LIST_ENTRY(namecache) nc_dvlist;/* L dvp's list of cache entries */
+ LIST_ENTRY(namecache) nc_vlist; /* L vp's list of cache entries */
+ kmutex_t nc_lock; /* lock on this entry */
+ int nc_hittime; /* N last time scored a hit */
};
#ifdef _KERNEL
Home |
Main Index |
Thread Index |
Old Index