Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys Add Intel-VMX support in NVMM. This allows us to run har...
details: https://anonhg.NetBSD.org/src/rev/afcde54c95c5
branches: trunk
changeset: 996929:afcde54c95c5
user: maxv <maxv%NetBSD.org@localhost>
date: Wed Feb 13 16:03:16 2019 +0000
description:
Add Intel-VMX support in NVMM. This allows us to run hardware-accelerated
VMs on Intel CPUs. Overall this implementation is fast and reliable, I am
able to run NetBSD VMs with many VCPUs on a quad-core Intel i5.
NVMM-Intel applies several optimizations already present in NVMM-AMD, and
has a code structure similar to it. No change was needed in the NVMM MI
frontend, or in libnvmm.
Some differences exist against AMD:
- On Intel the ASID space is big, so we don't fall back to a shared ASID
when there are more VCPUs executing than available ASIDs in the host,
contrary to AMD. There are enough ASIDs for the maximum number of VCPUs
supported by NVMM.
- On Intel there are two TLBs we need to take care of, one for the host
(EPT) and one for the guest (VPID). Changes in EPT paging flush the
host TLB, changes to the guest mode flush the guest TLB.
- On Intel there is no easy way to set/fetch the VTPR, so we intercept
reads/writes to CR8 and maintain a software TPR, that we give to the
virtualizer as if it was the effective TPR in the guest.
- On Intel, because of SVS, the host CR4 and LSTAR are not static, so
we're forced to save them on each VMENTRY.
- There is extra Intel weirdness we need to take care of, for example the
reserved bits in CR0 and CR4 when accesses trap.
While this implementation is functional and can already run many OSes, we
likely have a problem on 32bit-PAE guests, because they require special
care on Intel CPUs, and currently we don't handle that correctly; such
guests may misbehave for now (without altering the host stability). I
expect to fix that soon.
diffstat:
sys/dev/nvmm/nvmm.c | 7 +-
sys/dev/nvmm/nvmm_internal.h | 3 +-
sys/dev/nvmm/x86/nvmm_x86_vmx.c | 2823 +++++++++++++++++++++++++++++++++++
sys/dev/nvmm/x86/nvmm_x86_vmxfunc.S | 357 ++++
sys/modules/nvmm/Makefile | 3 +-
5 files changed, 3188 insertions(+), 5 deletions(-)
diffs (truncated from 3249 to 300 lines):
diff -r a20dd2ab1c74 -r afcde54c95c5 sys/dev/nvmm/nvmm.c
--- a/sys/dev/nvmm/nvmm.c Wed Feb 13 14:55:29 2019 +0000
+++ b/sys/dev/nvmm/nvmm.c Wed Feb 13 16:03:16 2019 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: nvmm.c,v 1.6 2019/01/26 15:25:51 maxv Exp $ */
+/* $NetBSD: nvmm.c,v 1.7 2019/02/13 16:03:16 maxv Exp $ */
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.6 2019/01/26 15:25:51 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.7 2019/02/13 16:03:16 maxv Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -54,7 +54,8 @@
static struct nvmm_machine machines[NVMM_MAX_MACHINES];
static const struct nvmm_impl *nvmm_impl_list[] = {
- &nvmm_x86_svm /* x86 AMD SVM */
+ &nvmm_x86_svm, /* x86 AMD SVM */
+ &nvmm_x86_vmx /* x86 Intel VMX */
};
static const struct nvmm_impl *nvmm_impl = NULL;
diff -r a20dd2ab1c74 -r afcde54c95c5 sys/dev/nvmm/nvmm_internal.h
--- a/sys/dev/nvmm/nvmm_internal.h Wed Feb 13 14:55:29 2019 +0000
+++ b/sys/dev/nvmm/nvmm_internal.h Wed Feb 13 16:03:16 2019 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: nvmm_internal.h,v 1.4 2019/02/11 07:07:37 maxv Exp $ */
+/* $NetBSD: nvmm_internal.h,v 1.5 2019/02/13 16:03:16 maxv Exp $ */
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -109,5 +109,6 @@
void nvmm_vcpu_put(struct nvmm_cpu *);
extern const struct nvmm_impl nvmm_x86_svm;
+extern const struct nvmm_impl nvmm_x86_vmx;
#endif /* _NVMM_INTERNAL_H_ */
diff -r a20dd2ab1c74 -r afcde54c95c5 sys/dev/nvmm/x86/nvmm_x86_vmx.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/dev/nvmm/x86/nvmm_x86_vmx.c Wed Feb 13 16:03:16 2019 +0000
@@ -0,0 +1,2823 @@
+/* $NetBSD: nvmm_x86_vmx.c,v 1.1 2019/02/13 16:03:16 maxv Exp $ */
+
+/*
+ * Copyright (c) 2018 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Maxime Villard.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_vmx.c,v 1.1 2019/02/13 16:03:16 maxv Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/kmem.h>
+#include <sys/cpu.h>
+#include <sys/xcall.h>
+
+#include <uvm/uvm.h>
+#include <uvm/uvm_page.h>
+
+#include <x86/cputypes.h>
+#include <x86/specialreg.h>
+#include <x86/pmap.h>
+#include <x86/dbregs.h>
+#include <machine/cpuvar.h>
+
+#include <dev/nvmm/nvmm.h>
+#include <dev/nvmm/nvmm_internal.h>
+#include <dev/nvmm/x86/nvmm_x86.h>
+
+int _vmx_vmxon(paddr_t *pa);
+int _vmx_vmxoff(void);
+int _vmx_invept(uint64_t op, void *desc);
+int _vmx_invvpid(uint64_t op, void *desc);
+int _vmx_vmread(uint64_t op, uint64_t *val);
+int _vmx_vmwrite(uint64_t op, uint64_t val);
+int _vmx_vmptrld(paddr_t *pa);
+int _vmx_vmptrst(paddr_t *pa);
+int _vmx_vmclear(paddr_t *pa);
+int vmx_vmlaunch(uint64_t *gprs);
+int vmx_vmresume(uint64_t *gprs);
+
+#define vmx_vmxon(a) \
+ if (__predict_false(_vmx_vmxon(a) != 0)) { \
+ panic("%s: VMXON failed", __func__); \
+ }
+#define vmx_vmxoff() \
+ if (__predict_false(_vmx_vmxoff() != 0)) { \
+ panic("%s: VMXOFF failed", __func__); \
+ }
+#define vmx_invept(a, b) \
+ if (__predict_false(_vmx_invept(a, b) != 0)) { \
+ panic("%s: INVEPT failed", __func__); \
+ }
+#define vmx_invvpid(a, b) \
+ if (__predict_false(_vmx_invvpid(a, b) != 0)) { \
+ panic("%s: INVVPID failed", __func__); \
+ }
+#define vmx_vmread(a, b) \
+ if (__predict_false(_vmx_vmread(a, b) != 0)) { \
+ panic("%s: VMREAD failed", __func__); \
+ }
+#define vmx_vmwrite(a, b) \
+ if (__predict_false(_vmx_vmwrite(a, b) != 0)) { \
+ panic("%s: VMWRITE failed", __func__); \
+ }
+#define vmx_vmptrld(a) \
+ if (__predict_false(_vmx_vmptrld(a) != 0)) { \
+ panic("%s: VMPTRLD failed", __func__); \
+ }
+#define vmx_vmptrst(a) \
+ if (__predict_false(_vmx_vmptrst(a) != 0)) { \
+ panic("%s: VMPTRST failed", __func__); \
+ }
+#define vmx_vmclear(a) \
+ if (__predict_false(_vmx_vmclear(a) != 0)) { \
+ panic("%s: VMCLEAR failed", __func__); \
+ }
+
+#define MSR_IA32_FEATURE_CONTROL 0x003A
+#define IA32_FEATURE_CONTROL_LOCK __BIT(0)
+#define IA32_FEATURE_CONTROL_IN_SMX __BIT(1)
+#define IA32_FEATURE_CONTROL_OUT_SMX __BIT(2)
+
+#define MSR_IA32_VMX_BASIC 0x0480
+#define IA32_VMX_BASIC_IDENT __BITS(30,0)
+#define IA32_VMX_BASIC_DATA_SIZE __BITS(44,32)
+#define IA32_VMX_BASIC_MEM_WIDTH __BIT(48)
+#define IA32_VMX_BASIC_DUAL __BIT(49)
+#define IA32_VMX_BASIC_MEM_TYPE __BITS(53,50)
+#define MEM_TYPE_UC 0
+#define MEM_TYPE_WB 6
+#define IA32_VMX_BASIC_IO_REPORT __BIT(54)
+#define IA32_VMX_BASIC_TRUE_CTLS __BIT(55)
+
+#define MSR_IA32_VMX_PINBASED_CTLS 0x0481
+#define MSR_IA32_VMX_PROCBASED_CTLS 0x0482
+#define MSR_IA32_VMX_EXIT_CTLS 0x0483
+#define MSR_IA32_VMX_ENTRY_CTLS 0x0484
+#define MSR_IA32_VMX_PROCBASED_CTLS2 0x048B
+
+#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x048D
+#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x048E
+#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x048F
+#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x0490
+
+#define MSR_IA32_VMX_CR0_FIXED0 0x0486
+#define MSR_IA32_VMX_CR0_FIXED1 0x0487
+#define MSR_IA32_VMX_CR4_FIXED0 0x0488
+#define MSR_IA32_VMX_CR4_FIXED1 0x0489
+
+#define MSR_IA32_VMX_EPT_VPID_CAP 0x048C
+#define IA32_VMX_EPT_VPID_WALKLENGTH_4 __BIT(6)
+#define IA32_VMX_EPT_VPID_UC __BIT(8)
+#define IA32_VMX_EPT_VPID_WB __BIT(14)
+#define IA32_VMX_EPT_VPID_INVEPT __BIT(20)
+#define IA32_VMX_EPT_VPID_FLAGS_AD __BIT(21)
+#define IA32_VMX_EPT_VPID_INVEPT_CONTEXT __BIT(25)
+#define IA32_VMX_EPT_VPID_INVEPT_ALL __BIT(26)
+#define IA32_VMX_EPT_VPID_INVVPID __BIT(32)
+#define IA32_VMX_EPT_VPID_INVVPID_ADDR __BIT(40)
+#define IA32_VMX_EPT_VPID_INVVPID_CONTEXT __BIT(41)
+#define IA32_VMX_EPT_VPID_INVVPID_ALL __BIT(42)
+#define IA32_VMX_EPT_VPID_INVVPID_CONTEXT_NOG __BIT(43)
+
+/* -------------------------------------------------------------------------- */
+
+/* 16-bit control fields */
+#define VMCS_VPID 0x00000000
+#define VMCS_PIR_VECTOR 0x00000002
+#define VMCS_EPTP_INDEX 0x00000004
+/* 16-bit guest-state fields */
+#define VMCS_GUEST_ES_SELECTOR 0x00000800
+#define VMCS_GUEST_CS_SELECTOR 0x00000802
+#define VMCS_GUEST_SS_SELECTOR 0x00000804
+#define VMCS_GUEST_DS_SELECTOR 0x00000806
+#define VMCS_GUEST_FS_SELECTOR 0x00000808
+#define VMCS_GUEST_GS_SELECTOR 0x0000080A
+#define VMCS_GUEST_LDTR_SELECTOR 0x0000080C
+#define VMCS_GUEST_TR_SELECTOR 0x0000080E
+#define VMCS_GUEST_INTR_STATUS 0x00000810
+#define VMCS_PML_INDEX 0x00000812
+/* 16-bit host-state fields */
+#define VMCS_HOST_ES_SELECTOR 0x00000C00
+#define VMCS_HOST_CS_SELECTOR 0x00000C02
+#define VMCS_HOST_SS_SELECTOR 0x00000C04
+#define VMCS_HOST_DS_SELECTOR 0x00000C06
+#define VMCS_HOST_FS_SELECTOR 0x00000C08
+#define VMCS_HOST_GS_SELECTOR 0x00000C0A
+#define VMCS_HOST_TR_SELECTOR 0x00000C0C
+/* 64-bit control fields */
+#define VMCS_IO_BITMAP_A 0x00002000
+#define VMCS_IO_BITMAP_B 0x00002002
+#define VMCS_MSR_BITMAP 0x00002004
+#define VMCS_EXIT_MSR_STORE_ADDRESS 0x00002006
+#define VMCS_EXIT_MSR_LOAD_ADDRESS 0x00002008
+#define VMCS_ENTRY_MSR_LOAD_ADDRESS 0x0000200A
+#define VMCS_EXECUTIVE_VMCS 0x0000200C
+#define VMCS_PML_ADDRESS 0x0000200E
+#define VMCS_TSC_OFFSET 0x00002010
+#define VMCS_VIRTUAL_APIC 0x00002012
+#define VMCS_APIC_ACCESS 0x00002014
+#define VMCS_PIR_DESC 0x00002016
+#define VMCS_VM_CONTROL 0x00002018
+#define VMCS_EPTP 0x0000201A
+#define EPTP_TYPE __BITS(2,0)
+#define EPTP_TYPE_UC 0
+#define EPTP_TYPE_WB 6
+#define EPTP_WALKLEN __BITS(5,3)
+#define EPTP_FLAGS_AD __BIT(6)
+#define EPTP_PHYSADDR __BITS(63,12)
+#define VMCS_EOI_EXIT0 0x0000201C
+#define VMCS_EOI_EXIT1 0x0000201E
+#define VMCS_EOI_EXIT2 0x00002020
+#define VMCS_EOI_EXIT3 0x00002022
+#define VMCS_EPTP_LIST 0x00002024
+#define VMCS_VMREAD_BITMAP 0x00002026
+#define VMCS_VMWRITE_BITMAP 0x00002028
+#define VMCS_VIRTUAL_EXCEPTION 0x0000202A
+#define VMCS_XSS_EXIT_BITMAP 0x0000202C
+#define VMCS_ENCLS_EXIT_BITMAP 0x0000202E
+#define VMCS_TSC_MULTIPLIER 0x00002032
+/* 64-bit read-only fields */
+#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400
+/* 64-bit guest-state fields */
+#define VMCS_LINK_POINTER 0x00002800
+#define VMCS_GUEST_IA32_DEBUGCTL 0x00002802
+#define VMCS_GUEST_IA32_PAT 0x00002804
+#define VMCS_GUEST_IA32_EFER 0x00002806
+#define VMCS_GUEST_IA32_PERF_GLOBAL_CTRL 0x00002808
+#define VMCS_GUEST_PDPTE0 0x0000280A
+#define VMCS_GUEST_PDPTE1 0x0000280C
+#define VMCS_GUEST_PDPTE2 0x0000280E
+#define VMCS_GUEST_PDPTE3 0x00002810
+#define VMCS_GUEST_BNDCFGS 0x00002812
+/* 64-bit host-state fields */
+#define VMCS_HOST_IA32_PAT 0x00002C00
+#define VMCS_HOST_IA32_EFER 0x00002C02
+#define VMCS_HOST_IA32_PERF_GLOBAL_CTRL 0x00002C04
+/* 32-bit control fields */
+#define VMCS_PINBASED_CTLS 0x00004000
+#define PIN_CTLS_INT_EXITING __BIT(0)
+#define PIN_CTLS_NMI_EXITING __BIT(3)
+#define PIN_CTLS_VIRTUAL_NMIS __BIT(5)
+#define PIN_CTLS_ACTIVATE_PREEMPT_TIMER __BIT(6)
+#define PIN_CTLS_PROCESS_POSTEd_INTS __BIT(7)
+#define VMCS_PROCBASED_CTLS 0x00004002
+#define PROC_CTLS_INT_WINDOW_EXITING __BIT(2)
+#define PROC_CTLS_USE_TSC_OFFSETTING __BIT(3)
+#define PROC_CTLS_HLT_EXITING __BIT(7)
+#define PROC_CTLS_INVLPG_EXITING __BIT(9)
+#define PROC_CTLS_MWAIT_EXITING __BIT(10)
+#define PROC_CTLS_RDPMC_EXITING __BIT(11)
+#define PROC_CTLS_RDTSC_EXITING __BIT(12)
+#define PROC_CTLS_RCR3_EXITING __BIT(15)
+#define PROC_CTLS_LCR3_EXITING __BIT(16)
+#define PROC_CTLS_RCR8_EXITING __BIT(19)
+#define PROC_CTLS_LCR8_EXITING __BIT(20)
+#define PROC_CTLS_USE_TPR_SHADOW __BIT(21)
+#define PROC_CTLS_NMI_WINDOW_EXITING __BIT(22)
+#define PROC_CTLS_DR_EXITING __BIT(23)
+#define PROC_CTLS_UNCOND_IO_EXITING __BIT(24)
+#define PROC_CTLS_USE_IO_BITMAPS __BIT(25)
+#define PROC_CTLS_MONITOR_TRAP_FLAG __BIT(27)
+#define PROC_CTLS_USE_MSR_BITMAPS __BIT(28)
+#define PROC_CTLS_MONITOR_EXITING __BIT(29)
+#define PROC_CTLS_PAUSE_EXITING __BIT(30)
+#define PROC_CTLS_ACTIVATE_CTLS2 __BIT(31)
Home |
Main Index |
Thread Index |
Old Index