[src/trunk]: src pool_cache:

To: source-changes-hg%NetBSD.org@localhost
Subject: [src/trunk]: src pool_cache:
From: ad <ad%NetBSD.org@localhost>
Date: Sun, 14 Jun 2020 23:54:53 +0000
details:   https://anonhg.NetBSD.org/src/rev/e23576313628
branches:  trunk
changeset: 934621:e23576313628
user:      ad <ad%NetBSD.org@localhost>
date:      Sun Jun 14 21:34:25 2020 +0000

description:
pool_cache:

- make all counters per-CPU and make cache layer do its work with atomic ops.
- conserve memory by caching empty groups globally.

diffstat:

 sys/kern/subr_pool.c    |  398 ++++++++++++++++++++++++-----------------------
 sys/sys/pool.h          |   43 ++--
 usr.bin/vmstat/vmstat.c |   31 ++-
 3 files changed, 245 insertions(+), 227 deletions(-)

diffs (truncated from 877 to 300 lines):

diff -r bd2d4a22f5f8 -r e23576313628 sys/kern/subr_pool.c
--- a/sys/kern/subr_pool.c      Sun Jun 14 21:33:28 2020 +0000
+++ b/sys/kern/subr_pool.c      Sun Jun 14 21:34:25 2020 +0000
@@ -1,8 +1,8 @@
-/*     $NetBSD: subr_pool.c,v 1.270 2020/06/07 09:45:19 maxv Exp $     */
+/*     $NetBSD: subr_pool.c,v 1.271 2020/06/14 21:34:25 ad Exp $       */
 
 /*
- * Copyright (c) 1997, 1999, 2000, 2002, 2007, 2008, 2010, 2014, 2015, 2018
- *     The NetBSD Foundation, Inc.
+ * Copyright (c) 1997, 1999, 2000, 2002, 2007, 2008, 2010, 2014, 2015, 2018,
+ *     2020 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -33,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.270 2020/06/07 09:45:19 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.271 2020/06/14 21:34:25 ad Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_ddb.h"
@@ -52,6 +52,7 @@
 #include <sys/pool.h>
 #include <sys/syslog.h>
 #include <sys/debug.h>
+#include <sys/lock.h>
 #include <sys/lockdebug.h>
 #include <sys/xcall.h>
 #include <sys/cpu.h>
@@ -337,6 +338,9 @@
 static struct pool cache_pool;
 static struct pool cache_cpu_pool;
 
+static pcg_t *volatile pcg_large_cache __cacheline_aligned;
+static pcg_t *volatile pcg_normal_cache __cacheline_aligned;
+
 /* List of all caches. */
 TAILQ_HEAD(,pool_cache) pool_cache_head =
     TAILQ_HEAD_INITIALIZER(pool_cache_head);
@@ -344,14 +348,17 @@
 int pool_cache_disable;                /* global disable for caching */
 static const pcg_t pcg_dummy;  /* zero sized: always empty, yet always full */
 
-static bool    pool_cache_put_slow(pool_cache_cpu_t *, int,
+static bool    pool_cache_put_slow(pool_cache_t, pool_cache_cpu_t *, int,
                                    void *);
-static bool    pool_cache_get_slow(pool_cache_cpu_t *, int,
+static bool    pool_cache_get_slow(pool_cache_t, pool_cache_cpu_t *, int,
                                    void **, paddr_t *, int);
 static void    pool_cache_cpu_init1(struct cpu_info *, pool_cache_t);
-static void    pool_cache_invalidate_groups(pool_cache_t, pcg_t *);
+static int     pool_cache_invalidate_groups(pool_cache_t, pcg_t *);
 static void    pool_cache_invalidate_cpu(pool_cache_t, u_int);
 static void    pool_cache_transfer(pool_cache_t);
+static int     pool_pcg_get(pcg_t *volatile *, pcg_t **);
+static int     pool_pcg_put(pcg_t *volatile *, pcg_t *);
+static pcg_t * pool_pcg_trunc(pcg_t *volatile *);
 
 static int     pool_catchup(struct pool *);
 static void    pool_prime_page(struct pool *, void *,
@@ -1804,7 +1811,8 @@
        pool_cache_t pc;
        pcg_t *pcg;
        pool_cache_cpu_t *cc;
-       uint64_t cpuhit, cpumiss;
+       uint64_t cpuhit, cpumiss, pchit, pcmiss;
+       uint32_t nfull;
        int i, print_log = 0, print_pagelist = 0, print_cache = 0;
        char c;
 
@@ -1881,31 +1889,26 @@
        if (pc != NULL) {
                cpuhit = 0;
                cpumiss = 0;
+               pcmiss = 0;
+               nfull = 0;
                for (i = 0; i < __arraycount(pc->pc_cpus); i++) {
                        if ((cc = pc->pc_cpus[i]) == NULL)
                                continue;
                        cpuhit += cc->cc_hits;
                        cpumiss += cc->cc_misses;
+                       pcmiss += cc->cc_pcmisses;
+                       nfull += cc->cc_nfull;
                }
+               pchit = cpumiss - pcmiss;
                (*pr)("\tcpu layer hits %llu misses %llu\n", cpuhit, cpumiss);
-               (*pr)("\tcache layer hits %llu misses %llu\n",
-                   pc->pc_hits, pc->pc_misses);
-               (*pr)("\tcache layer entry uncontended %llu contended %llu\n",
-                   pc->pc_hits + pc->pc_misses - pc->pc_contended,
-                   pc->pc_contended);
-               (*pr)("\tcache layer empty groups %u full groups %u\n",
-                   pc->pc_nempty, pc->pc_nfull);
+               (*pr)("\tcache layer hits %llu misses %llu\n", pchit, pcmiss);
+               (*pr)("\tcache layer full groups %u\n", nfull);
                if (print_cache) {
                        (*pr)("\tfull cache groups:\n");
                        for (pcg = pc->pc_fullgroups; pcg != NULL;
                            pcg = pcg->pcg_next) {
                                PR_GROUPLIST(pcg);
                        }
-                       (*pr)("\tempty cache groups:\n");
-                       for (pcg = pc->pc_emptygroups; pcg != NULL;
-                           pcg = pcg->pcg_next) {
-                               PR_GROUPLIST(pcg);
-                       }
                }
        }
 #undef PR_GROUPLIST
@@ -2051,7 +2054,6 @@
                        palloc = &pool_allocator_nointr;
        }
        pool_init(pp, size, align, align_offset, flags, wchan, palloc, ipl);
-       mutex_init(&pc->pc_lock, MUTEX_DEFAULT, ipl);
 
        if (ctor == NULL) {
                ctor = NO_CTOR;
@@ -2060,27 +2062,22 @@
                dtor = NO_DTOR;
        }
 
-       pc->pc_emptygroups = NULL;
        pc->pc_fullgroups = NULL;
        pc->pc_partgroups = NULL;
        pc->pc_ctor = ctor;
        pc->pc_dtor = dtor;
        pc->pc_arg  = arg;
-       pc->pc_hits  = 0;
-       pc->pc_misses = 0;
-       pc->pc_nempty = 0;
-       pc->pc_npart = 0;
-       pc->pc_nfull = 0;
-       pc->pc_contended = 0;
        pc->pc_refcnt = 0;
        pc->pc_freecheck = NULL;
 
        if ((flags & PR_LARGECACHE) != 0) {
                pc->pc_pcgsize = PCG_NOBJECTS_LARGE;
                pc->pc_pcgpool = &pcg_large_pool;
+               pc->pc_pcgcache = &pcg_large_cache;
        } else {
                pc->pc_pcgsize = PCG_NOBJECTS_NORMAL;
                pc->pc_pcgpool = &pcg_normal_pool;
+               pc->pc_pcgcache = &pcg_normal_cache;
        }
 
        /* Allocate per-CPU caches. */
@@ -2157,7 +2154,6 @@
                pool_cache_invalidate_cpu(pc, i);
 
        /* Finally, destroy it. */
-       mutex_destroy(&pc->pc_lock);
        pool_destroy(pp);
 }
 
@@ -2177,7 +2173,6 @@
        KASSERT(index < __arraycount(pc->pc_cpus));
 
        if ((cc = pc->pc_cpus[index]) != NULL) {
-               KASSERT(cc->cc_cpuindex == index);
                return;
        }
 
@@ -2189,20 +2184,19 @@
                cc = &pc->pc_cpu0;
                pc->pc_ncpu = 1;
        } else {
-               mutex_enter(&pc->pc_lock);
                pc->pc_ncpu++;
-               mutex_exit(&pc->pc_lock);
                cc = pool_get(&cache_cpu_pool, PR_WAITOK);
        }
 
-       cc->cc_ipl = pc->pc_pool.pr_ipl;
-       cc->cc_iplcookie = makeiplcookie(cc->cc_ipl);
-       cc->cc_cache = pc;
-       cc->cc_cpuindex = index;
+       cc->cc_current = __UNCONST(&pcg_dummy);
+       cc->cc_previous = __UNCONST(&pcg_dummy);
+       cc->cc_pcgcache = pc->pc_pcgcache;
        cc->cc_hits = 0;
        cc->cc_misses = 0;
-       cc->cc_current = __UNCONST(&pcg_dummy);
-       cc->cc_previous = __UNCONST(&pcg_dummy);
+       cc->cc_pcmisses = 0;
+       cc->cc_contended = 0;
+       cc->cc_nfull = 0;
+       cc->cc_npart = 0;
 
        pc->pc_cpus[index] = cc;
 }
@@ -2268,16 +2262,17 @@
 /*
  * pool_cache_invalidate_groups:
  *
- *     Invalidate a chain of groups and destruct all objects.
+ *     Invalidate a chain of groups and destruct all objects.  Return the
+ *     number of groups that were invalidated.
  */
-static void
+static int
 pool_cache_invalidate_groups(pool_cache_t pc, pcg_t *pcg)
 {
        void *object;
        pcg_t *next;
-       int i;
-
-       for (; pcg != NULL; pcg = next) {
+       int i, n;
+
+       for (n = 0; pcg != NULL; pcg = next, n++) {
                next = pcg->pcg_next;
 
                for (i = 0; i < pcg->pcg_avail; i++) {
@@ -2292,6 +2287,7 @@
                        pool_put(&pcg_normal_pool, pcg);
                }
        }
+       return n;
 }
 
 /*
@@ -2311,7 +2307,8 @@
 pool_cache_invalidate(pool_cache_t pc)
 {
        uint64_t where;
-       pcg_t *full, *empty, *part;
+       pcg_t *pcg;
+       int n, s;
 
        KASSERT(!cpu_intr_p() && !cpu_softintr_p());
 
@@ -2335,22 +2332,24 @@
                xc_wait(where);
        }
 
-       /* Empty pool caches, then invalidate objects */
-       mutex_enter(&pc->pc_lock);
-       full = pc->pc_fullgroups;
-       empty = pc->pc_emptygroups;
-       part = pc->pc_partgroups;
-       pc->pc_fullgroups = NULL;
-       pc->pc_emptygroups = NULL;
-       pc->pc_partgroups = NULL;
-       pc->pc_nfull = 0;
-       pc->pc_nempty = 0;
-       pc->pc_npart = 0;
-       mutex_exit(&pc->pc_lock);
-
-       pool_cache_invalidate_groups(pc, full);
-       pool_cache_invalidate_groups(pc, empty);
-       pool_cache_invalidate_groups(pc, part);
+       /* Now dequeue and invalidate everything. */
+       pcg = pool_pcg_trunc(&pcg_normal_cache);
+       (void)pool_cache_invalidate_groups(pc, pcg);
+
+       pcg = pool_pcg_trunc(&pcg_large_cache);
+       (void)pool_cache_invalidate_groups(pc, pcg);
+
+       pcg = pool_pcg_trunc(&pc->pc_fullgroups);
+       n = pool_cache_invalidate_groups(pc, pcg);
+       s = splvm();
+       ((pool_cache_cpu_t *)pc->pc_cpus[curcpu()->ci_index])->cc_nfull -= n;
+       splx(s);
+
+       pcg = pool_pcg_trunc(&pc->pc_partgroups);
+       n = pool_cache_invalidate_groups(pc, pcg);
+       s = splvm();
+       ((pool_cache_cpu_t *)pc->pc_cpus[curcpu()->ci_index])->cc_npart -= n;
+       splx(s);
 }
 
 /*
@@ -2421,61 +2420,131 @@
        pool_prime(&pc->pc_pool, n);
 }
 
+/*
+ * pool_pcg_get:
+ *
+ *     Get a cache group from the specified list.  Return true if
+ *     contention was encountered.  Must be called at IPL_VM because
+ *     of spin wait vs. kernel_lock.
+ */
+static int
+pool_pcg_get(pcg_t *volatile *head, pcg_t **pcgp)
+{
+       int count = SPINLOCK_BACKOFF_MIN;
+       pcg_t *o, *n;
+
+       for (o = atomic_load_relaxed(head);; o = n) {
+               if (__predict_false(o == &pcg_dummy)) {
+                       /* Wait for concurrent get to complete. */
+                       SPINLOCK_BACKOFF(count);
+                       n = atomic_load_relaxed(head);
+                       continue;
+               }
Prev by Date: [src/trunk]: src/lib/libpthread Another bug. The CAS loop in pthread_cond_si...
Next by Date: [src/trunk]: src/usr.bin/vmstat Remove PG_ZERO. It worked brilliantly on x86...
Previous by Thread: [src/trunk]: src/lib/libpthread Another bug. The CAS loop in pthread_cond_si...
Next by Thread: [src/trunk]: src/usr.bin/vmstat Remove PG_ZERO. It worked brilliantly on x86...
Indexes:
Home | Main Index | Thread Index | Old Index