[src/trunk]: src/sys/uvm - If the hardware provided NUMA info, then use it to...

To: source-changes-hg%NetBSD.org@localhost
Subject: [src/trunk]: src/sys/uvm - If the hardware provided NUMA info, then use it to...
From: ad <ad%NetBSD.org@localhost>
Date: Mon, 08 Nov 2021 11:20:15 +0000

details:   https://anonhg.NetBSD.org/src/rev/dafa8bbff3bb
branches:  trunk
changeset: 1010190:dafa8bbff3bb
user:      ad <ad%NetBSD.org@localhost>
date:      Sun May 17 15:11:57 2020 +0000

description:
- If the hardware provided NUMA info, then use it to decide how to set up
  the allocator's buckets, instead of doing round robin distribution.  There
  are open questions here but this is better than doing nothing.

- Kernel reserve pages are for the kernel not realtime threads.

diffstat:

 sys/uvm/uvm.h      |   3 +--
 sys/uvm/uvm_page.c |  39 ++++++++++++---------------------------
 2 files changed, 13 insertions(+), 29 deletions(-)

diffs (129 lines):

diff -r 70641e07e70b -r dafa8bbff3bb sys/uvm/uvm.h
--- a/sys/uvm/uvm.h     Sun May 17 15:07:22 2020 +0000
+++ b/sys/uvm/uvm.h     Sun May 17 15:11:57 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm.h,v 1.76 2020/03/14 20:23:51 ad Exp $      */
+/*     $NetBSD: uvm.h,v 1.77 2020/05/17 15:11:57 ad Exp $      */
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -107,7 +107,6 @@
        struct pgfreelist page_free[VM_NFREELIST]; /* unallocated pages */
        u_int   bucketcount;
        bool    page_init_done;         /* true if uvm_page_init() finished */
-       bool    numa_alloc;             /* use NUMA page allocation strategy */
 
                /* page daemon trigger */
        int pagedaemon;                 /* daemon sleeps on this */
diff -r 70641e07e70b -r dafa8bbff3bb sys/uvm/uvm_page.c
--- a/sys/uvm/uvm_page.c        Sun May 17 15:07:22 2020 +0000
+++ b/sys/uvm/uvm_page.c        Sun May 17 15:11:57 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_page.c,v 1.234 2020/03/17 18:31:39 ad Exp $        */
+/*     $NetBSD: uvm_page.c,v 1.235 2020/05/17 15:11:57 ad Exp $        */
 
 /*-
  * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc.
@@ -95,7 +95,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.234 2020/03/17 18:31:39 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.235 2020/05/17 15:11:57 ad Exp $");
 
 #include "opt_ddb.h"
 #include "opt_uvm.h"
@@ -749,7 +749,6 @@
        static bool warned;
        paddr_t pa;
 
-       KASSERT(uvm.numa_alloc);
        KASSERT(uvm_page_numa_region != NULL);
 
        pa = VM_PAGE_TO_PHYS(pg);
@@ -852,7 +851,7 @@
                                            uvm_page_get_bucket(pg) == ob);
                                        KASSERT(fl ==
                                            uvm_page_get_freelist(pg));
-                                       if (uvm.numa_alloc) {
+                                       if (uvm_page_numa_region != NULL) {
                                                nb = uvm_page_numa_lookup(pg);
                                        } else {
                                                nb = atop(VM_PAGE_TO_PHYS(pg))
@@ -919,8 +918,7 @@
        /*
         * If we have more than one NUMA node, and the maximum NUMA node ID
         * is less than PGFL_MAX_BUCKETS, then we'll use NUMA distribution
-        * for free pages.  uvm_pagefree() will not reassign pages to a
-        * different bucket on free.
+        * for free pages.
         */
        min_numa = (u_int)-1;
        max_numa = 0;
@@ -933,28 +931,19 @@
                }
        }
        if (min_numa != max_numa && max_numa < PGFL_MAX_BUCKETS) {
-#ifdef NUMA
-               /*
-                * We can do this, and it seems to work well, but until
-                * further experiments are done we'll stick with the cache
-                * locality strategy.
-                */
                aprint_debug("UVM: using NUMA allocation scheme\n");
                for (CPU_INFO_FOREACH(cii, ci)) {
                        ci->ci_data.cpu_uvm->pgflbucket = ci->ci_numa_id;
                }
-               uvm.numa_alloc = true;
                uvm_page_redim(uvmexp.ncolors, max_numa + 1);
                return;
-#endif
        }
 
        /*
         * Otherwise we'll go with a scheme to maximise L2/L3 cache locality
         * and minimise lock contention.  Count the total number of CPU
         * packages, and then try to distribute the buckets among CPU
-        * packages evenly.  uvm_pagefree() will reassign pages to the
-        * freeing CPU's preferred bucket on free.
+        * packages evenly.
         */
        npackage = curcpu()->ci_nsibling[CPUREL_PACKAGE1ST];
 
@@ -1220,18 +1209,13 @@
         * [3]  only pagedaemon "reserved" pages remain and
         *        the requestor isn't the pagedaemon.
         * we make kernel reserve pages available if called by a
-        * kernel thread or a realtime thread.
+        * kernel thread.
         */
        l = curlwp;
-       if (__predict_true(l != NULL) && lwp_eprio(l) >= PRI_KTHREAD) {
+       if (__predict_true(l != NULL) && (l->l_flag & LW_SYSTEM) != 0) {
                flags |= UVM_PGA_USERESERVE;
        }
 
-       /* If the allocator's running in NUMA mode, go with NUMA strategy. */
-       if (uvm.numa_alloc && strat == UVM_PGA_STRAT_NORMAL) {
-               strat = UVM_PGA_STRAT_NUMA;
-       }
-
  again:
        switch (strat) {
        case UVM_PGA_STRAT_NORMAL:
@@ -1270,10 +1254,11 @@
 
        case UVM_PGA_STRAT_NUMA:
                /*
-                * NUMA strategy: allocating from the correct bucket is more
-                * important than observing freelist priority.  Look only to
-                * the current NUMA node; if that fails, we need to look to
-                * other NUMA nodes, so retry with the normal strategy.
+                * NUMA strategy (experimental): allocating from the correct
+                * bucket is more important than observing freelist
+                * priority.  Look only to the current NUMA node; if that
+                * fails, we need to look to other NUMA nodes, so retry with
+                * the normal strategy.
                 */
                for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
                        pg = uvm_pgflcache_alloc(ucpu, lcv, color);

Prev by Date: [src/trunk]: src/sys/uvm Mark amappl with PR_LARGECACHE.
Next by Date: [src/trunk]: src/distrib/sets/lists Fix lists for RUMP-less builds
Previous by Thread: [src/trunk]: src/sys/uvm Mark amappl with PR_LARGECACHE.
Next by Thread: [src/trunk]: src/distrib/sets/lists Fix lists for RUMP-less builds
Indexes:

Home | Main Index | Thread Index | Old Index