Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/aarch64 Part IV of ad's performance improvements fo...
details: https://anonhg.NetBSD.org/src/rev/09e695686b52
branches: trunk
changeset: 1012853:09e695686b52
user: skrll <skrll%NetBSD.org@localhost>
date: Wed Aug 12 13:36:36 2020 +0000
description:
Part IV of ad's performance improvements for aarch64
- Implement pmap_growkernel(), and update kernel pmap's stats with atomics.
- Then, pmap_kenter_pa() and pmap_kremove() no longer need to allocate
memory nor take pm_lock, because they only modify L3 PTEs.
- Then, pm_lock and pp_lock can be adaptive mutexes at IPL_NONE which are
cheaper than spin mutexes.
- Take the pmap's lock in pmap_extract() if not the kernel's pmap, otherwise
pmap_extract() might see inconsistent state.
diffstat:
sys/arch/aarch64/aarch64/pmap.c | 346 +++++++++++++++++++++++----------------
sys/arch/aarch64/include/pmap.h | 4 +-
2 files changed, 201 insertions(+), 149 deletions(-)
diffs (truncated from 585 to 300 lines):
diff -r 880953549e67 -r 09e695686b52 sys/arch/aarch64/aarch64/pmap.c
--- a/sys/arch/aarch64/aarch64/pmap.c Wed Aug 12 13:28:46 2020 +0000
+++ b/sys/arch/aarch64/aarch64/pmap.c Wed Aug 12 13:36:36 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap.c,v 1.85 2020/08/09 09:11:41 skrll Exp $ */
+/* $NetBSD: pmap.c,v 1.86 2020/08/12 13:36:36 skrll Exp $ */
/*
* Copyright (c) 2017 Ryo Shimizu <ryo%nerv.org@localhost>
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.85 2020/08/09 09:11:41 skrll Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.86 2020/08/12 13:36:36 skrll Exp $");
#include "opt_arm_debug.h"
#include "opt_ddb.h"
@@ -195,6 +195,8 @@
static void _pmap_remove(struct pmap *, vaddr_t, vaddr_t, bool,
struct pv_entry **);
static int _pmap_enter(struct pmap *, vaddr_t, paddr_t, vm_prot_t, u_int, bool);
+static int _pmap_get_pdp(struct pmap *, vaddr_t, bool, int, paddr_t *,
+ struct vm_page **, bool *);
static struct pmap kernel_pmap __cacheline_aligned;
@@ -215,27 +217,27 @@
pmap_pv_lock(struct pmap_page *pp)
{
- mutex_spin_enter(&pp->pp_pvlock);
+ mutex_enter(&pp->pp_pvlock);
}
static inline void
pmap_pv_unlock(struct pmap_page *pp)
{
- mutex_spin_exit(&pp->pp_pvlock);
+ mutex_exit(&pp->pp_pvlock);
}
static inline void
pm_lock(struct pmap *pm)
{
- mutex_spin_enter(&pm->pm_lock);
+ mutex_enter(&pm->pm_lock);
}
static inline void
pm_unlock(struct pmap *pm)
{
- mutex_spin_exit(&pm->pm_lock);
+ mutex_exit(&pm->pm_lock);
}
static bool
@@ -249,13 +251,13 @@
if (pm != pmap_kernel())
pmap_reference(pm);
- mutex_spin_exit(&pp->pp_pvlock);
- mutex_spin_enter(&pm->pm_lock);
+ mutex_exit(&pp->pp_pvlock);
+ mutex_enter(&pm->pm_lock);
/* nothing, just wait for lock */
- mutex_spin_exit(&pm->pm_lock);
+ mutex_exit(&pm->pm_lock);
if (pm != pmap_kernel())
pmap_destroy(pm);
- mutex_spin_enter(&pp->pp_pvlock);
+ mutex_enter(&pp->pp_pvlock);
return false;
}
@@ -471,26 +473,34 @@
kpm->pm_l0table_pa = l0pa;
kpm->pm_activated = true;
LIST_INIT(&kpm->pm_vmlist);
- mutex_init(&kpm->pm_lock, MUTEX_DEFAULT, IPL_VM);
+ mutex_init(&kpm->pm_lock, MUTEX_DEFAULT, IPL_NONE);
CTASSERT(sizeof(kpm->pm_stats.wired_count) == sizeof(long));
CTASSERT(sizeof(kpm->pm_stats.resident_count) == sizeof(long));
-#define PMSTAT_INC_WIRED_COUNT(pm) do { \
- KASSERT(mutex_owned(&(pm)->pm_lock)); \
- (pm)->pm_stats.wired_count++; \
-} while (/* CONSTCOND */ 0);
-#define PMSTAT_DEC_WIRED_COUNT(pm) do{ \
- KASSERT(mutex_owned(&(pm)->pm_lock)); \
- (pm)->pm_stats.wired_count--; \
-} while (/* CONSTCOND */ 0);
-#define PMSTAT_INC_RESIDENT_COUNT(pm) do { \
- KASSERT(mutex_owned(&(pm)->pm_lock)); \
- (pm)->pm_stats.resident_count++; \
-} while (/* CONSTCOND */ 0);
-#define PMSTAT_DEC_RESIDENT_COUNT(pm) do { \
- KASSERT(mutex_owned(&(pm)->pm_lock)); \
- (pm)->pm_stats.resident_count--; \
-} while (/* CONSTCOND */ 0);
+}
+
+static inline void
+_pmap_adj_wired_count(struct pmap *pm, int adj)
+{
+
+ if (pm == pmap_kernel()) {
+ atomic_add_long(&pm->pm_stats.wired_count, adj);
+ } else {
+ KASSERT(mutex_owned(&pm->pm_lock));
+ pm->pm_stats.wired_count += adj;
+ }
+}
+
+static inline void
+_pmap_adj_resident_count(struct pmap *pm, int adj)
+{
+
+ if (pm == pmap_kernel()) {
+ atomic_add_long(&pm->pm_stats.resident_count, adj);
+ } else {
+ KASSERT(mutex_owned(&pm->pm_lock));
+ pm->pm_stats.resident_count += adj;
+ }
}
inline static int
@@ -709,18 +719,34 @@
vaddr_t
pmap_growkernel(vaddr_t maxkvaddr)
{
+ struct pmap *pm = pmap_kernel();
+ struct vm_page *pg;
+ bool l3only = true;
+ int error;
+ vaddr_t va;
+ paddr_t pa;
+
UVMHIST_FUNC(__func__);
UVMHIST_CALLED(pmaphist);
UVMHIST_LOG(pmaphist, "maxkvaddr=%llx, pmap_maxkvaddr=%llx",
maxkvaddr, pmap_maxkvaddr, 0, 0);
+ mutex_enter(&pm->pm_lock);
+ for (va = pmap_maxkvaddr & L2_FRAME; va <= maxkvaddr; va += L2_SIZE) {
+ error = _pmap_get_pdp(pm, va, false, 0, &pa, &pg, &l3only);
+ if (error != 0) {
+ panic("%s: cannot allocate L3 table error=%d",
+ __func__, error);
+ }
+ }
+ aarch64_tlbi_by_asid(pm->pm_asid);
kasan_shadow_map((void *)pmap_maxkvaddr,
- (size_t)(maxkvaddr - pmap_maxkvaddr));
-
- pmap_maxkvaddr = maxkvaddr;
-
- return maxkvaddr;
+ (size_t)(va - pmap_maxkvaddr));
+ pmap_maxkvaddr = va;
+ mutex_exit(&pm->pm_lock);
+
+ return va;
}
bool
@@ -738,7 +764,7 @@
paddr_t pa;
vsize_t blocksize = 0;
int space;
- bool coherency;
+ bool coherency, valid;
extern char __kernel_text[];
extern char _end[];
@@ -781,12 +807,17 @@
* because the page may be in an access fault state due to
* reference bit emulation.
*/
+ if (pm != pmap_kernel())
+ mutex_enter(&pm->pm_lock);
ptep = _pmap_pte_lookup_bs(pm, va, &blocksize);
- if (ptep == NULL)
+ valid = (ptep != NULL && lxpde_valid(pte = *ptep));
+ if (pm != pmap_kernel())
+ mutex_exit(&pm->pm_lock);
+
+ if (!valid) {
return false;
- pte = *ptep;
- if (!lxpde_valid(pte))
- return false;
+ }
+
pa = lxpde_pa(pte) + (va & (blocksize - 1));
switch (pte & LX_BLKPAG_ATTR_MASK) {
@@ -834,6 +865,8 @@
vsize_t blocksize;
unsigned int idx;
+ KASSERT(pm == pmap_kernel() || mutex_owned(&pm->pm_lock));
+
/*
* traverse L0 -> L1 -> L2 -> L3
*/
@@ -1220,9 +1253,7 @@
KDASSERT(!IN_KSEG_ADDR(va));
KDASSERT(IN_RANGE(va, VM_MIN_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS));
- pm_lock(kpm);
_pmap_remove(kpm, va, va + size, true, NULL);
- pm_unlock(kpm);
}
static void
@@ -1455,7 +1486,7 @@
pm->pm_idlepdp = 0;
pm->pm_asid = -1;
LIST_INIT(&pm->pm_vmlist);
- mutex_init(&pm->pm_lock, MUTEX_DEFAULT, IPL_VM);
+ mutex_init(&pm->pm_lock, MUTEX_DEFAULT, IPL_NONE);
pm->pm_l0table_pa = pmap_alloc_pdp(pm, NULL, 0, true);
KASSERT(pm->pm_l0table_pa != POOL_PADDR_INVALID);
@@ -1620,17 +1651,94 @@
return removed;
}
+/*
+ * traverse L0 -> L1 -> L2 -> L3 table with growing pdp if needed.
+ */
+static int
+_pmap_get_pdp(struct pmap *pm, vaddr_t va, bool kenter, int flags,
+ paddr_t *pap, struct vm_page **pgp, bool *l3only)
+{
+ pd_entry_t *l0, *l1, *l2;
+ struct vm_page *pdppg, *pdppg0;
+ paddr_t pdppa, pdppa0;
+ unsigned int idx;
+ pd_entry_t pde;
+
+ KASSERT(kenter || mutex_owned(&pm->pm_lock));
+
+ l0 = pm->pm_l0table;
+
+ idx = l0pde_index(va);
+ pde = l0[idx];
+ if (!l0pde_valid(pde)) {
+ KASSERT(!kenter);
+ /* no need to increment L0 occupancy. L0 page never freed */
+ pdppa = pmap_alloc_pdp(pm, &pdppg, flags, false); /* L1 pdp */
+ if (pdppa == POOL_PADDR_INVALID) {
+ return ENOMEM;
+ }
+ atomic_swap_64(&l0[idx], pdppa | L0_TABLE);
+ _pmap_pdp_setparent(pm, pdppg, &l0[idx]);
+ *l3only = false;
+ } else {
+ pdppa = l0pde_pa(pde);
+ pdppg = NULL;
+ }
+ l1 = (void *)AARCH64_PA_TO_KVA(pdppa);
+
+ idx = l1pde_index(va);
+ pde = l1[idx];
+ if (!l1pde_valid(pde)) {
+ KASSERT(!kenter);
+ pdppa0 = pdppa;
+ pdppg0 = pdppg;
+ pdppa = pmap_alloc_pdp(pm, &pdppg, flags, false); /* L2 pdp */
+ if (pdppa == POOL_PADDR_INVALID) {
+ return ENOMEM;
+ }
+ atomic_swap_64(&l1[idx], pdppa | L1_TABLE);
+ _pmap_pdp_addref(pm, pdppa0, pdppg0); /* L1 occupancy++ */
+ _pmap_pdp_setparent(pm, pdppg, &l1[idx]);
+ *l3only = false;
+ } else {
+ pdppa = l1pde_pa(pde);
+ pdppg = NULL;
+ }
+ l2 = (void *)AARCH64_PA_TO_KVA(pdppa);
+
+ idx = l2pde_index(va);
+ pde = l2[idx];
+ if (!l2pde_valid(pde)) {
+ KASSERT(!kenter);
+ pdppa0 = pdppa;
+ pdppg0 = pdppg;
+ pdppa = pmap_alloc_pdp(pm, &pdppg, flags, false); /* L3 pdp */
+ if (pdppa == POOL_PADDR_INVALID) {
+ return ENOMEM;
+ }
+ atomic_swap_64(&l2[idx], pdppa | L2_TABLE);
+ _pmap_pdp_addref(pm, pdppa0, pdppg0); /* L2 occupancy++ */
+ _pmap_pdp_setparent(pm, pdppg, &l2[idx]);
Home |
Main Index |
Thread Index |
Old Index