Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/aarch64 Part IV of ad's performance improvements fo...



details:   https://anonhg.NetBSD.org/src/rev/dc1a9a035037
branches:  trunk
changeset: 974818:dc1a9a035037
user:      skrll <skrll%NetBSD.org@localhost>
date:      Wed Aug 12 13:36:36 2020 +0000

description:
Part IV of ad's performance improvements for aarch64

- Implement pmap_growkernel(), and update kernel pmap's stats with atomics.

- Then, pmap_kenter_pa() and pmap_kremove() no longer need to allocate
  memory nor take pm_lock, because they only modify L3 PTEs.

- Then, pm_lock and pp_lock can be adaptive mutexes at IPL_NONE which are
  cheaper than spin mutexes.

- Take the pmap's lock in pmap_extract() if not the kernel's pmap, otherwise
  pmap_extract() might see inconsistent state.

diffstat:

 sys/arch/aarch64/aarch64/pmap.c |  346 +++++++++++++++++++++++----------------
 sys/arch/aarch64/include/pmap.h |    4 +-
 2 files changed, 201 insertions(+), 149 deletions(-)

diffs (truncated from 585 to 300 lines):

diff -r fe9505f2caf8 -r dc1a9a035037 sys/arch/aarch64/aarch64/pmap.c
--- a/sys/arch/aarch64/aarch64/pmap.c   Wed Aug 12 13:28:46 2020 +0000
+++ b/sys/arch/aarch64/aarch64/pmap.c   Wed Aug 12 13:36:36 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pmap.c,v 1.85 2020/08/09 09:11:41 skrll Exp $  */
+/*     $NetBSD: pmap.c,v 1.86 2020/08/12 13:36:36 skrll Exp $  */
 
 /*
  * Copyright (c) 2017 Ryo Shimizu <ryo%nerv.org@localhost>
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.85 2020/08/09 09:11:41 skrll Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.86 2020/08/12 13:36:36 skrll Exp $");
 
 #include "opt_arm_debug.h"
 #include "opt_ddb.h"
@@ -195,6 +195,8 @@
 static void _pmap_remove(struct pmap *, vaddr_t, vaddr_t, bool,
     struct pv_entry **);
 static int _pmap_enter(struct pmap *, vaddr_t, paddr_t, vm_prot_t, u_int, bool);
+static int _pmap_get_pdp(struct pmap *, vaddr_t, bool, int, paddr_t *,
+    struct vm_page **, bool *);
 
 static struct pmap kernel_pmap __cacheline_aligned;
 
@@ -215,27 +217,27 @@
 pmap_pv_lock(struct pmap_page *pp)
 {
 
-       mutex_spin_enter(&pp->pp_pvlock);
+       mutex_enter(&pp->pp_pvlock);
 }
 
 static inline void
 pmap_pv_unlock(struct pmap_page *pp)
 {
 
-       mutex_spin_exit(&pp->pp_pvlock);
+       mutex_exit(&pp->pp_pvlock);
 }
 
 
 static inline void
 pm_lock(struct pmap *pm)
 {
-       mutex_spin_enter(&pm->pm_lock);
+       mutex_enter(&pm->pm_lock);
 }
 
 static inline void
 pm_unlock(struct pmap *pm)
 {
-       mutex_spin_exit(&pm->pm_lock);
+       mutex_exit(&pm->pm_lock);
 }
 
 static bool
@@ -249,13 +251,13 @@
 
        if (pm != pmap_kernel())
                pmap_reference(pm);
-       mutex_spin_exit(&pp->pp_pvlock);
-       mutex_spin_enter(&pm->pm_lock);
+       mutex_exit(&pp->pp_pvlock);
+       mutex_enter(&pm->pm_lock);
        /* nothing, just wait for lock */
-       mutex_spin_exit(&pm->pm_lock);
+       mutex_exit(&pm->pm_lock);
        if (pm != pmap_kernel())
                pmap_destroy(pm);
-       mutex_spin_enter(&pp->pp_pvlock);
+       mutex_enter(&pp->pp_pvlock);
        return false;
 }
 
@@ -471,26 +473,34 @@
        kpm->pm_l0table_pa = l0pa;
        kpm->pm_activated = true;
        LIST_INIT(&kpm->pm_vmlist);
-       mutex_init(&kpm->pm_lock, MUTEX_DEFAULT, IPL_VM);
+       mutex_init(&kpm->pm_lock, MUTEX_DEFAULT, IPL_NONE);
 
        CTASSERT(sizeof(kpm->pm_stats.wired_count) == sizeof(long));
        CTASSERT(sizeof(kpm->pm_stats.resident_count) == sizeof(long));
-#define PMSTAT_INC_WIRED_COUNT(pm) do { \
-       KASSERT(mutex_owned(&(pm)->pm_lock)); \
-       (pm)->pm_stats.wired_count++; \
-} while (/* CONSTCOND */ 0);
-#define PMSTAT_DEC_WIRED_COUNT(pm) do{ \
-       KASSERT(mutex_owned(&(pm)->pm_lock)); \
-       (pm)->pm_stats.wired_count--; \
-} while (/* CONSTCOND */ 0);
-#define PMSTAT_INC_RESIDENT_COUNT(pm) do { \
-       KASSERT(mutex_owned(&(pm)->pm_lock)); \
-       (pm)->pm_stats.resident_count++; \
-} while (/* CONSTCOND */ 0);
-#define PMSTAT_DEC_RESIDENT_COUNT(pm) do { \
-       KASSERT(mutex_owned(&(pm)->pm_lock)); \
-       (pm)->pm_stats.resident_count--; \
-} while (/* CONSTCOND */ 0);
+}
+
+static inline void
+_pmap_adj_wired_count(struct pmap *pm, int adj)
+{
+
+       if (pm == pmap_kernel()) {
+               atomic_add_long(&pm->pm_stats.wired_count, adj);
+       } else {
+               KASSERT(mutex_owned(&pm->pm_lock));
+               pm->pm_stats.wired_count += adj;
+       }
+}
+
+static inline void
+_pmap_adj_resident_count(struct pmap *pm, int adj)
+{
+
+       if (pm == pmap_kernel()) {
+               atomic_add_long(&pm->pm_stats.resident_count, adj);
+       } else {
+               KASSERT(mutex_owned(&pm->pm_lock));
+               pm->pm_stats.resident_count += adj;
+       }
 }
 
 inline static int
@@ -709,18 +719,34 @@
 vaddr_t
 pmap_growkernel(vaddr_t maxkvaddr)
 {
+       struct pmap *pm = pmap_kernel();
+       struct vm_page *pg;
+       bool l3only = true;
+       int error;
+       vaddr_t va;
+       paddr_t pa;
+
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLED(pmaphist);
 
        UVMHIST_LOG(pmaphist, "maxkvaddr=%llx, pmap_maxkvaddr=%llx",
            maxkvaddr, pmap_maxkvaddr, 0, 0);
 
+       mutex_enter(&pm->pm_lock);
+       for (va = pmap_maxkvaddr & L2_FRAME; va <= maxkvaddr; va += L2_SIZE) {
+               error = _pmap_get_pdp(pm, va, false, 0, &pa, &pg, &l3only);
+               if (error != 0) {
+                       panic("%s: cannot allocate L3 table error=%d",
+                           __func__, error);
+               }
+       }
+       aarch64_tlbi_by_asid(pm->pm_asid);
        kasan_shadow_map((void *)pmap_maxkvaddr,
-           (size_t)(maxkvaddr - pmap_maxkvaddr));
-
-       pmap_maxkvaddr = maxkvaddr;
-
-       return maxkvaddr;
+           (size_t)(va - pmap_maxkvaddr));
+       pmap_maxkvaddr = va;
+       mutex_exit(&pm->pm_lock);
+
+       return va;
 }
 
 bool
@@ -738,7 +764,7 @@
        paddr_t pa;
        vsize_t blocksize = 0;
        int space;
-       bool coherency;
+       bool coherency, valid;
        extern char __kernel_text[];
        extern char _end[];
 
@@ -781,12 +807,17 @@
         * because the page may be in an access fault state due to
         * reference bit emulation.
         */
+       if (pm != pmap_kernel())
+               mutex_enter(&pm->pm_lock);
        ptep = _pmap_pte_lookup_bs(pm, va, &blocksize);
-       if (ptep == NULL)
+       valid = (ptep != NULL && lxpde_valid(pte = *ptep));
+       if (pm != pmap_kernel())
+               mutex_exit(&pm->pm_lock);
+
+       if (!valid) {
                return false;
-       pte = *ptep;
-       if (!lxpde_valid(pte))
-               return false;
+       }
+
        pa = lxpde_pa(pte) + (va & (blocksize - 1));
 
        switch (pte & LX_BLKPAG_ATTR_MASK) {
@@ -834,6 +865,8 @@
        vsize_t blocksize;
        unsigned int idx;
 
+       KASSERT(pm == pmap_kernel() || mutex_owned(&pm->pm_lock));
+
        /*
         * traverse L0 -> L1 -> L2 -> L3
         */
@@ -1220,9 +1253,7 @@
        KDASSERT(!IN_KSEG_ADDR(va));
        KDASSERT(IN_RANGE(va, VM_MIN_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS));
 
-       pm_lock(kpm);
        _pmap_remove(kpm, va, va + size, true, NULL);
-       pm_unlock(kpm);
 }
 
 static void
@@ -1455,7 +1486,7 @@
        pm->pm_idlepdp = 0;
        pm->pm_asid = -1;
        LIST_INIT(&pm->pm_vmlist);
-       mutex_init(&pm->pm_lock, MUTEX_DEFAULT, IPL_VM);
+       mutex_init(&pm->pm_lock, MUTEX_DEFAULT, IPL_NONE);
 
        pm->pm_l0table_pa = pmap_alloc_pdp(pm, NULL, 0, true);
        KASSERT(pm->pm_l0table_pa != POOL_PADDR_INVALID);
@@ -1620,17 +1651,94 @@
        return removed;
 }
 
+/*
+ * traverse L0 -> L1 -> L2 -> L3 table with growing pdp if needed.
+ */
+static int
+_pmap_get_pdp(struct pmap *pm, vaddr_t va, bool kenter, int flags,
+    paddr_t *pap, struct vm_page **pgp, bool *l3only)
+{
+       pd_entry_t *l0, *l1, *l2;
+       struct vm_page *pdppg, *pdppg0;
+       paddr_t pdppa, pdppa0;
+       unsigned int idx;
+       pd_entry_t pde;
+
+       KASSERT(kenter || mutex_owned(&pm->pm_lock));
+
+       l0 = pm->pm_l0table;
+
+       idx = l0pde_index(va);
+       pde = l0[idx];
+       if (!l0pde_valid(pde)) {
+               KASSERT(!kenter);
+               /* no need to increment L0 occupancy. L0 page never freed */
+               pdppa = pmap_alloc_pdp(pm, &pdppg, flags, false);  /* L1 pdp */
+               if (pdppa == POOL_PADDR_INVALID) {
+                       return ENOMEM;
+               }
+               atomic_swap_64(&l0[idx], pdppa | L0_TABLE);
+               _pmap_pdp_setparent(pm, pdppg, &l0[idx]);
+               *l3only = false;
+       } else {
+               pdppa = l0pde_pa(pde);
+               pdppg = NULL;
+       }
+       l1 = (void *)AARCH64_PA_TO_KVA(pdppa);
+
+       idx = l1pde_index(va);
+       pde = l1[idx];
+       if (!l1pde_valid(pde)) {
+               KASSERT(!kenter);
+               pdppa0 = pdppa;
+               pdppg0 = pdppg;
+               pdppa = pmap_alloc_pdp(pm, &pdppg, flags, false);  /* L2 pdp */
+               if (pdppa == POOL_PADDR_INVALID) {
+                       return ENOMEM;
+               }
+               atomic_swap_64(&l1[idx], pdppa | L1_TABLE);
+               _pmap_pdp_addref(pm, pdppa0, pdppg0);   /* L1 occupancy++ */
+               _pmap_pdp_setparent(pm, pdppg, &l1[idx]);
+               *l3only = false;
+       } else {
+               pdppa = l1pde_pa(pde);
+               pdppg = NULL;
+       }
+       l2 = (void *)AARCH64_PA_TO_KVA(pdppa);
+
+       idx = l2pde_index(va);
+       pde = l2[idx];
+       if (!l2pde_valid(pde)) {
+               KASSERT(!kenter);
+               pdppa0 = pdppa;
+               pdppg0 = pdppg;
+               pdppa = pmap_alloc_pdp(pm, &pdppg, flags, false);  /* L3 pdp */
+               if (pdppa == POOL_PADDR_INVALID) {
+                       return ENOMEM;
+               }
+               atomic_swap_64(&l2[idx], pdppa | L2_TABLE);
+               _pmap_pdp_addref(pm, pdppa0, pdppg0);   /* L2 occupancy++ */
+               _pmap_pdp_setparent(pm, pdppg, &l2[idx]);



Home | Main Index | Thread Index | Old Index