Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys Another locking issue in NVMM: the {svm, vmx}_tlb_flush f...
details: https://anonhg.NetBSD.org/src/rev/a626767ad19a
branches: trunk
changeset: 449080:a626767ad19a
user: maxv <maxv%NetBSD.org@localhost>
date: Thu Feb 21 12:17:52 2019 +0000
description:
Another locking issue in NVMM: the {svm,vmx}_tlb_flush functions take VCPU
mutexes which can sleep, but their context does not allow it.
Rewrite the TLB handling code to fix that. It becomes a bit complex. In
short, we use a per-VM generation number, which we increase on each TLB
flush, before sending a broadcast IPI to everybody. The IPIs cause a
#VMEXIT of each VCPU, and each VCPU Loop will synchronize the per-VM gen
with a per-VCPU copy, and apply the flushes as neededi lazily.
The behavior differs between AMD and Intel; in short, on Intel we don't
flush the hTLB (EPT cache) if a context switch of a VCPU occurs, so now,
we need to maintain a kcpuset to know which VCPU's hTLBs are active on
which hCPU. This creates some redundancy on Intel, ie there are cases
where we flush the hTLB several times unnecessarily; but hTLB flushes are
very rare, so there is no real performance regression.
The thing is lock-less and non-blocking, so it solves our problem.
diffstat:
sys/arch/x86/x86/x86_tlb.c | 5 +-
sys/dev/nvmm/x86/nvmm_x86_svm.c | 75 ++++++++++++++++++++++++------
sys/dev/nvmm/x86/nvmm_x86_vmx.c | 99 +++++++++++++++++++++++++++-------------
3 files changed, 128 insertions(+), 51 deletions(-)
diffs (truncated from 398 to 300 lines):
diff -r 16f17060507c -r a626767ad19a sys/arch/x86/x86/x86_tlb.c
--- a/sys/arch/x86/x86/x86_tlb.c Thu Feb 21 11:58:04 2019 +0000
+++ b/sys/arch/x86/x86/x86_tlb.c Thu Feb 21 12:17:52 2019 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: x86_tlb.c,v 1.5 2019/02/11 14:59:33 cherry Exp $ */
+/* $NetBSD: x86_tlb.c,v 1.6 2019/02/21 12:17:52 maxv Exp $ */
/*-
* Copyright (c) 2008-2012 The NetBSD Foundation, Inc.
@@ -40,7 +40,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: x86_tlb.c,v 1.5 2019/02/11 14:59:33 cherry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: x86_tlb.c,v 1.6 2019/02/21 12:17:52 maxv Exp $");
#include <sys/param.h>
#include <sys/kernel.h>
@@ -229,6 +229,7 @@
if (__predict_false(pm->pm_tlb_flush != NULL)) {
(*pm->pm_tlb_flush)(pm);
+ return;
}
/*
diff -r 16f17060507c -r a626767ad19a sys/dev/nvmm/x86/nvmm_x86_svm.c
--- a/sys/dev/nvmm/x86/nvmm_x86_svm.c Thu Feb 21 11:58:04 2019 +0000
+++ b/sys/dev/nvmm/x86/nvmm_x86_svm.c Thu Feb 21 12:17:52 2019 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: nvmm_x86_svm.c,v 1.28 2019/02/21 11:58:04 maxv Exp $ */
+/* $NetBSD: nvmm_x86_svm.c,v 1.29 2019/02/21 12:17:52 maxv Exp $ */
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_svm.c,v 1.28 2019/02/21 11:58:04 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_svm.c,v 1.29 2019/02/21 12:17:52 maxv Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -493,6 +493,7 @@
struct svm_machdata {
bool cpuidpresent[SVM_NCPUIDS];
struct nvmm_x86_conf_cpuid cpuid[SVM_NCPUIDS];
+ volatile uint64_t mach_htlb_gen;
};
static const size_t svm_conf_sizes[NVMM_X86_NCONF] = {
@@ -503,6 +504,7 @@
/* General */
bool shared_asid;
bool gtlb_want_flush;
+ uint64_t vcpu_htlb_gen;
/* VMCB */
struct vmcb *vmcb;
@@ -1101,6 +1103,8 @@
svm_inject_gp(mach, vcpu);
}
+/* -------------------------------------------------------------------------- */
+
static void
svm_vcpu_guest_fpu_enter(struct nvmm_cpu *vcpu)
{
@@ -1197,18 +1201,57 @@
}
}
+static inline void
+svm_htlb_catchup(struct nvmm_cpu *vcpu, int hcpu)
+{
+ /*
+ * Nothing to do. If an hTLB flush was needed, either the VCPU was
+ * executing on this hCPU and the hTLB already got flushed, or it
+ * was executing on another hCPU in which case the catchup is done
+ * in svm_gtlb_catchup().
+ */
+}
+
+static inline uint64_t
+svm_htlb_flush(struct svm_machdata *machdata, struct svm_cpudata *cpudata)
+{
+ struct vmcb *vmcb = cpudata->vmcb;
+ uint64_t machgen;
+
+ machgen = machdata->mach_htlb_gen;
+ if (__predict_true(machgen == cpudata->vcpu_htlb_gen)) {
+ return machgen;
+ }
+
+ vmcb->ctrl.tlb_ctrl = svm_ctrl_tlb_flush;
+ return machgen;
+}
+
+static inline void
+svm_htlb_flush_ack(struct svm_cpudata *cpudata, uint64_t machgen)
+{
+ struct vmcb *vmcb = cpudata->vmcb;
+
+ if (__predict_true(vmcb->ctrl.exitcode != VMCB_EXITCODE_INVALID)) {
+ cpudata->vcpu_htlb_gen = machgen;
+ }
+}
+
static int
svm_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
struct nvmm_exit *exit)
{
+ struct svm_machdata *machdata = mach->machdata;
struct svm_cpudata *cpudata = vcpu->cpudata;
struct vmcb *vmcb = cpudata->vmcb;
+ uint64_t machgen;
int hcpu, s;
kpreempt_disable();
hcpu = cpu_number();
svm_gtlb_catchup(vcpu, hcpu);
+ svm_htlb_catchup(vcpu, hcpu);
if (vcpu->hcpu_last != hcpu) {
vmcb->ctrl.tsc_offset = cpudata->tsc_offset +
@@ -1227,9 +1270,11 @@
}
s = splhigh();
+ machgen = svm_htlb_flush(machdata, cpudata);
svm_vcpu_guest_fpu_enter(vcpu);
svm_vmrun(cpudata->vmcb_pa, cpudata->gprs);
svm_vcpu_guest_fpu_leave(vcpu);
+ svm_htlb_flush_ack(cpudata, machgen);
splx(s);
svm_vmcb_cache_default(vmcb);
@@ -1982,30 +2027,28 @@
svm_tlb_flush(struct pmap *pm)
{
struct nvmm_machine *mach = pm->pm_data;
- struct svm_cpudata *cpudata;
- struct nvmm_cpu *vcpu;
- int error;
- size_t i;
+ struct svm_machdata *machdata = mach->machdata;
- /* Request TLB flushes. */
- for (i = 0; i < NVMM_MAX_VCPUS; i++) {
- error = nvmm_vcpu_get(mach, i, &vcpu);
- if (error)
- continue;
- cpudata = vcpu->cpudata;
- cpudata->gtlb_want_flush = true;
- nvmm_vcpu_put(vcpu);
- }
+ atomic_inc_64(&machdata->mach_htlb_gen);
+
+ /* Generates IPIs, which cause #VMEXITs. */
+ pmap_tlb_shootdown(pmap_kernel(), -1, PG_G, TLBSHOOT_UPDATE);
}
static void
svm_machine_create(struct nvmm_machine *mach)
{
+ struct svm_machdata *machdata;
+
/* Fill in pmap info. */
mach->vm->vm_map.pmap->pm_data = (void *)mach;
mach->vm->vm_map.pmap->pm_tlb_flush = svm_tlb_flush;
- mach->machdata = kmem_zalloc(sizeof(struct svm_machdata), KM_SLEEP);
+ machdata = kmem_zalloc(sizeof(struct svm_machdata), KM_SLEEP);
+ mach->machdata = machdata;
+
+ /* Start with an hTLB flush everywhere. */
+ machdata->mach_htlb_gen = 1;
}
static void
diff -r 16f17060507c -r a626767ad19a sys/dev/nvmm/x86/nvmm_x86_vmx.c
--- a/sys/dev/nvmm/x86/nvmm_x86_vmx.c Thu Feb 21 11:58:04 2019 +0000
+++ b/sys/dev/nvmm/x86/nvmm_x86_vmx.c Thu Feb 21 12:17:52 2019 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: nvmm_x86_vmx.c,v 1.8 2019/02/21 11:58:04 maxv Exp $ */
+/* $NetBSD: nvmm_x86_vmx.c,v 1.9 2019/02/21 12:17:52 maxv Exp $ */
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_vmx.c,v 1.8 2019/02/21 11:58:04 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_vmx.c,v 1.9 2019/02/21 12:17:52 maxv Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -627,7 +627,7 @@
struct vmx_machdata {
bool cpuidpresent[VMX_NCPUIDS];
struct nvmm_x86_conf_cpuid cpuid[VMX_NCPUIDS];
- kcpuset_t *ept_want_flush;
+ volatile uint64_t mach_htlb_gen;
};
static const size_t vmx_conf_sizes[NVMM_X86_NCONF] = {
@@ -638,6 +638,8 @@
/* General */
uint64_t asid;
bool gtlb_want_flush;
+ uint64_t vcpu_htlb_gen;
+ kcpuset_t *htlb_want_flush;
/* VMCS */
struct vmcs *vmcs;
@@ -1510,6 +1512,8 @@
exit->u.mem.inst_len = 0;
}
+/* -------------------------------------------------------------------------- */
+
static void
vmx_vcpu_guest_fpu_enter(struct nvmm_cpu *vcpu)
{
@@ -1601,7 +1605,7 @@
wrmsr(MSR_KERNELGSBASE, cpudata->kernelgsbase);
}
-/* --------------------------------------------------------------------- */
+/* -------------------------------------------------------------------------- */
#define VMX_INVVPID_ADDRESS 0
#define VMX_INVVPID_CONTEXT 1
@@ -1621,6 +1625,49 @@
}
}
+static inline void
+vmx_htlb_catchup(struct nvmm_cpu *vcpu, int hcpu)
+{
+ struct vmx_cpudata *cpudata = vcpu->cpudata;
+ struct ept_desc ept_desc;
+
+ if (__predict_true(!kcpuset_isset(cpudata->htlb_want_flush, hcpu))) {
+ return;
+ }
+
+ vmx_vmread(VMCS_EPTP, &ept_desc.eptp);
+ ept_desc.mbz = 0;
+ vmx_invept(vmx_ept_flush_op, &ept_desc);
+ kcpuset_clear(cpudata->htlb_want_flush, hcpu);
+}
+
+static inline uint64_t
+vmx_htlb_flush(struct vmx_machdata *machdata, struct vmx_cpudata *cpudata)
+{
+ struct ept_desc ept_desc;
+ uint64_t machgen;
+
+ machgen = machdata->mach_htlb_gen;
+ if (__predict_true(machgen == cpudata->vcpu_htlb_gen)) {
+ return machgen;
+ }
+
+ kcpuset_copy(cpudata->htlb_want_flush, kcpuset_running);
+
+ vmx_vmread(VMCS_EPTP, &ept_desc.eptp);
+ ept_desc.mbz = 0;
+ vmx_invept(vmx_ept_flush_op, &ept_desc);
+
+ return machgen;
+}
+
+static inline void
+vmx_htlb_flush_ack(struct vmx_cpudata *cpudata, uint64_t machgen)
+{
+ cpudata->vcpu_htlb_gen = machgen;
+ kcpuset_clear(cpudata->htlb_want_flush, cpu_number());
+}
+
static int
vmx_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
struct nvmm_exit *exit)
@@ -1628,10 +1675,10 @@
struct vmx_machdata *machdata = mach->machdata;
struct vmx_cpudata *cpudata = vcpu->cpudata;
struct vpid_desc vpid_desc;
- struct ept_desc ept_desc;
struct cpu_info *ci;
uint64_t exitcode;
uint64_t intstate;
+ uint64_t machgen;
int hcpu, s, ret;
bool launched = false;
@@ -1640,13 +1687,7 @@
hcpu = cpu_number();
Home |
Main Index |
Thread Index |
Old Index