Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/sparc64 - add a kmutex_t ci_ctx_lock to struct cpu_...
details: https://anonhg.NetBSD.org/src/rev/09dff652a483
branches: trunk
changeset: 753427:09dff652a483
user: mrg <mrg%NetBSD.org@localhost>
date: Sun Mar 28 05:24:00 2010 +0000
description:
- add a kmutex_t ci_ctx_lock to struct cpu_info, and initialise it in
cpu_pmap_init() and replace pmap_ctx_lock usage with this new ci_ctx_lock
- replace smp_dcache_flush_page_all() with smp_dcache_flush_page_cpuset(),
that flushes only on the set of CPUs not, everyone. add new
dcache_flush_page_cpuset() to flush this page from the D$ only
on the specified set of CPUs.
- add a cpuset to pmap_free_page() and use it when freeing PTE pages
when a pmap is destroyed
- introduce pmap_free_page_noflush(), and use it when we allocated a
page for PTEs but didn't use it and don't need to flush it
- don't bother with pmap_lock in pmap_extract(), the only potential
issue is pseg_get() which is already safe
tested on sb2000, sb2500 and ultra80 with a bunch of various heavy
workloads, and seems to give a clear 1-2% speed up for high-forking /
short lived processes, such as ./configure.
diffstat:
sys/arch/sparc64/include/cpu.h | 5 +-
sys/arch/sparc64/sparc64/cache.h | 8 +-
sys/arch/sparc64/sparc64/ipifuncs.c | 10 +-
sys/arch/sparc64/sparc64/pmap.c | 97 ++++++++++++++++++++++++------------
4 files changed, 77 insertions(+), 43 deletions(-)
diffs (truncated from 345 to 300 lines):
diff -r 05d6be25b611 -r 09dff652a483 sys/arch/sparc64/include/cpu.h
--- a/sys/arch/sparc64/include/cpu.h Sun Mar 28 04:29:34 2010 +0000
+++ b/sys/arch/sparc64/include/cpu.h Sun Mar 28 05:24:00 2010 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.h,v 1.89 2010/03/06 08:08:29 mrg Exp $ */
+/* $NetBSD: cpu.h,v 1.90 2010/03/28 05:24:00 mrg Exp $ */
/*
* Copyright (c) 1992, 1993
@@ -151,7 +151,10 @@
* the right pointer and you get to the pmap segment tables. These are
* physical addresses, of course.
*
+ * ci_ctx_lock protects this CPUs context allocation/free.
+ * These are all allocated almost with in the same cacheline.
*/
+ kmutex_t ci_ctx_lock;
int ci_pmap_next_ctx;
int ci_numctx;
paddr_t *ci_ctxbusy;
diff -r 05d6be25b611 -r 09dff652a483 sys/arch/sparc64/sparc64/cache.h
--- a/sys/arch/sparc64/sparc64/cache.h Sun Mar 28 04:29:34 2010 +0000
+++ b/sys/arch/sparc64/sparc64/cache.h Sun Mar 28 05:24:00 2010 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cache.h,v 1.17 2010/03/08 08:59:06 mrg Exp $ */
+/* $NetBSD: cache.h,v 1.18 2010/03/28 05:24:00 mrg Exp $ */
/*
* Copyright (c) 1996
@@ -144,14 +144,16 @@
#ifdef MULTIPROCESSOR
void smp_tlb_flush_pte(vaddr_t, struct pmap *);
-void smp_dcache_flush_page_all(paddr_t pa);
+void smp_dcache_flush_page_cpuset(paddr_t pa, sparc64_cpuset_t);
void smp_blast_dcache(sparc64_cpuset_t);
#define tlb_flush_pte(va,pm ) smp_tlb_flush_pte(va, pm)
-#define dcache_flush_page_all(pa) smp_dcache_flush_page_all(pa)
+#define dcache_flush_page_all(pa) smp_dcache_flush_page_cpuset(pa, cpus_active)
+#define dcache_flush_page_cpuset(pa,cs) smp_dcache_flush_page_cpuset(pa, cs)
#define blast_dcache() smp_blast_dcache(cpus_active)
#else
#define tlb_flush_pte(va,pm) sp_tlb_flush_pte(va, (pm)->pm_ctx[0])
#define dcache_flush_page_all(pa) dcache_flush_page(pa)
+#define dcache_flush_page_cpuset(pa,cs) dcache_flush_page(pa)
#define blast_dcache() sp_blast_dcache(dcache_size, \
dcache_line_size)
#endif
diff -r 05d6be25b611 -r 09dff652a483 sys/arch/sparc64/sparc64/ipifuncs.c
--- a/sys/arch/sparc64/sparc64/ipifuncs.c Sun Mar 28 04:29:34 2010 +0000
+++ b/sys/arch/sparc64/sparc64/ipifuncs.c Sun Mar 28 05:24:00 2010 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: ipifuncs.c,v 1.35 2010/03/08 08:59:06 mrg Exp $ */
+/* $NetBSD: ipifuncs.c,v 1.36 2010/03/28 05:24:00 mrg Exp $ */
/*-
* Copyright (c) 2004 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ipifuncs.c,v 1.35 2010/03/08 08:59:06 mrg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ipifuncs.c,v 1.36 2010/03/28 05:24:00 mrg Exp $");
#include "opt_ddb.h"
@@ -412,10 +412,10 @@
}
/*
- * Make sure this page is flushed from all CPUs.
+ * Make sure this page is flushed from all/some CPUs.
*/
void
-smp_dcache_flush_page_all(paddr_t pa)
+smp_dcache_flush_page_cpuset(paddr_t pa, sparc64_cpuset_t activecpus)
{
ipifunc_t func;
@@ -424,7 +424,7 @@
else
func = sparc64_ipi_dcache_flush_page_us;
- sparc64_broadcast_ipi(func, pa, dcache_line_size);
+ sparc64_multicast_ipi(activecpus, func, pa, dcache_line_size);
dcache_flush_page(pa);
}
diff -r 05d6be25b611 -r 09dff652a483 sys/arch/sparc64/sparc64/pmap.c
--- a/sys/arch/sparc64/sparc64/pmap.c Sun Mar 28 04:29:34 2010 +0000
+++ b/sys/arch/sparc64/sparc64/pmap.c Sun Mar 28 05:24:00 2010 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap.c,v 1.261 2010/03/21 22:38:08 mrg Exp $ */
+/* $NetBSD: pmap.c,v 1.262 2010/03/28 05:24:00 mrg Exp $ */
/*
*
* Copyright (C) 1996-1999 Eduardo Horvath.
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.261 2010/03/21 22:38:08 mrg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.262 2010/03/28 05:24:00 mrg Exp $");
#undef NO_VCACHE /* Don't forget the locked TLB in dostart */
#define HWREF
@@ -323,14 +323,14 @@
#define pv_check()
-static int pmap_get_page(paddr_t *p);
-static void pmap_free_page(paddr_t pa);
+static int pmap_get_page(paddr_t *);
+static void pmap_free_page(paddr_t, sparc64_cpuset_t);
+static void pmap_free_page_noflush(paddr_t);
/*
- * Global pmap lock.
+ * Global pmap locks.
*/
static kmutex_t pmap_lock;
-static kmutex_t pmap_ctx_lock;
static bool lock_available = false;
/*
@@ -1226,6 +1226,7 @@
{
size_t ctxsize;
+ mutex_init(&ci->ci_ctx_lock, MUTEX_SPIN, IPL_VM);
ci->ci_pmap_next_ctx = 1;
#ifdef SUN4V
#error find out if we have 16 or 13 bit context ids
@@ -1295,7 +1296,6 @@
vm_num_phys = avail_end - avail_start;
mutex_init(&pmap_lock, MUTEX_DEFAULT, IPL_NONE);
- mutex_init(&pmap_ctx_lock, MUTEX_SPIN, IPL_VM);
#if defined(USE_LOCKSAFE_PSEG_GETSET)
mutex_init(&pseg_lock, MUTEX_SPIN, IPL_VM);
#endif
@@ -1410,6 +1410,9 @@
{
#ifdef MULTIPROCESSOR
struct cpu_info *ci;
+ sparc64_cpuset_t pmap_cpus_active;
+#else
+#define pmap_cpus_active 0
#endif
struct vm_page *pg, *nextpg;
@@ -1417,26 +1420,36 @@
return;
}
DPRINTF(PDB_DESTROY, ("pmap_destroy: freeing pmap %p\n", pm));
- mutex_enter(&pmap_ctx_lock);
#ifdef MULTIPROCESSOR
+ CPUSET_CLEAR(pmap_cpus_active);
for (ci = cpus; ci != NULL; ci = ci->ci_next) {
- if (CPUSET_HAS(cpus_active, ci->ci_index))
- ctx_free(pm, ci);
+ /* XXXMRG: Move the lock inside one or both tests? */
+ mutex_enter(&ci->ci_ctx_lock);
+ if (CPUSET_HAS(cpus_active, ci->ci_index)) {
+ if (pm->pm_ctx[ci->ci_index] > 0) {
+ CPUSET_ADD(pmap_cpus_active, ci->ci_index);
+ ctx_free(pm, ci);
+ }
+ }
+ mutex_exit(&ci->ci_ctx_lock);
}
#else
- ctx_free(pm, curcpu());
+ if (pmap_ctx(pm)) {
+ mutex_enter(&curcpu()->ci_ctx_lock);
+ ctx_free(pm, curcpu());
+ mutex_exit(&curcpu()->ci_ctx_lock);
+ }
#endif
- mutex_exit(&pmap_ctx_lock);
/* we could be a little smarter and leave pages zeroed */
for (pg = TAILQ_FIRST(&pm->pm_obj.memq); pg != NULL; pg = nextpg) {
nextpg = TAILQ_NEXT(pg, listq.queue);
TAILQ_REMOVE(&pm->pm_obj.memq, pg, listq.queue);
KASSERT(pg->mdpage.mdpg_pvh.pv_pmap == NULL);
- dcache_flush_page_all(VM_PAGE_TO_PHYS(pg));
+ dcache_flush_page_cpuset(VM_PAGE_TO_PHYS(pg), pmap_cpus_active);
uvm_pagefree(pg);
}
- pmap_free_page((paddr_t)(u_long)pm->pm_segs);
+ pmap_free_page((paddr_t)(u_long)pm->pm_segs, pmap_cpus_active);
UVM_OBJ_DESTROY(&pm->pm_obj);
pool_cache_put(&pmap_cache, pm);
}
@@ -1555,7 +1568,7 @@
/* We allocated a spare page but didn't use it. Free it. */
printf("pmap_kenter_pa: freeing unused page %llx\n",
(long long)ptp);
- pmap_free_page(ptp);
+ pmap_free_page_noflush(ptp);
}
#ifdef DEBUG
i = ptelookup_va(va);
@@ -1826,7 +1839,7 @@
/* We allocated a spare page but didn't use it. Free it. */
printf("pmap_enter: freeing unused page %llx\n",
(long long)ptp);
- pmap_free_page(ptp);
+ pmap_free_page_noflush(ptp);
}
if (dopv) {
pmap_enter_pv(pm, va, pa, pg, npv);
@@ -1924,22 +1937,36 @@
write_user_windows();
pm->pm_refs = 0;
- mutex_enter(&pmap_ctx_lock);
+ /*
+ * XXXMRG: pmap_destroy() does exactly the same dance here.
+ * surely one of them isn't necessary?
+ */
#ifdef MULTIPROCESSOR
CPUSET_CLEAR(pmap_cpus_active);
for (ci = cpus; ci != NULL; ci = ci->ci_next) {
+ /* XXXMRG: Move the lock inside one or both tests? */
+ mutex_enter(&ci->ci_ctx_lock);
if (CPUSET_HAS(cpus_active, ci->ci_index)) {
- if (pm->pm_ctx[ci->ci_index] > 0)
+ if (pm->pm_ctx[ci->ci_index] > 0) {
CPUSET_ADD(pmap_cpus_active, ci->ci_index);
- ctx_free(pm, ci);
+ ctx_free(pm, ci);
+ }
}
+ mutex_exit(&ci->ci_ctx_lock);
}
#else
- ctx_free(pm, curcpu());
+ if (pmap_ctx(pm)) {
+ mutex_enter(&curcpu()->ci_ctx_lock);
+ ctx_free(pm, curcpu());
+ mutex_exit(&curcpu()->ci_ctx_lock);
+ }
#endif
- mutex_exit(&pmap_ctx_lock);
REMOVE_STAT(flushes);
+ /*
+ * XXXMRG: couldn't we do something less severe here, and
+ * only flush the right context on each CPU?
+ */
#ifdef MULTIPROCESSOR
smp_blast_dcache(pmap_cpus_active);
#else
@@ -2021,7 +2048,8 @@
continue;
/*
- * if the pmap is being torn down, don't bother flushing.
+ * if the pmap is being torn down, don't bother flushing,
+ * we already have done so.
*/
if (!pm->pm_refs)
@@ -2166,9 +2194,6 @@
*pap = pa;
return TRUE;
} else {
- if (pm != pmap_kernel()) {
- mutex_enter(&pmap_lock);
- }
data = pseg_get(pm, va);
pa = data & TLB_PA_MASK;
#ifdef DEBUG
@@ -2200,9 +2225,6 @@
printf(" pseg_get: %lx\n", (long)pa);
}
#endif
- if (pm != pmap_kernel()) {
- mutex_exit(&pmap_lock);
- }
}
if ((data & TLB_V) == 0)
return (FALSE);
@@ -3073,7 +3095,7 @@
KASSERT(pm != pmap_kernel());
KASSERT(pm == curproc->p_vmspace->vm_map.pmap);
- mutex_enter(&pmap_ctx_lock);
+ mutex_enter(&curcpu()->ci_ctx_lock);
ctx = curcpu()->ci_pmap_next_ctx++;
/*
@@ -3108,7 +3130,7 @@
curcpu()->ci_ctxbusy[ctx] = pm->pm_physaddr;
LIST_INSERT_HEAD(&curcpu()->ci_pmap_ctxlist, pm, pm_list[cpu_number()]);
pmap_ctx(pm) = ctx;
- mutex_exit(&pmap_ctx_lock);
+ mutex_exit(&curcpu()->ci_ctx_lock);
Home |
Main Index |
Thread Index |
Old Index