Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src Improvements and fixes in NVMM.
details: https://anonhg.NetBSD.org/src/rev/68cee890c922
branches: trunk
changeset: 447283:68cee890c922
user: maxv <maxv%NetBSD.org@localhost>
date: Sun Jan 06 16:10:51 2019 +0000
description:
Improvements and fixes in NVMM.
Kernel driver:
* Don't take an extra (unneeded) reference to the UAO.
* Provide npc for HLT. I'm not really happy with it right now, will
likely be revisited.
* Add the INT_SHADOW, INT_WINDOW_EXIT and NMI_WINDOW_EXIT states. Provide
them in the exitstate too.
* Don't take the TPR into account when processing INTs. The virtualizer
can do that itself (Qemu already does).
* Provide a hypervisor signature in CPUID, and hide SVM.
* Ignore certain MSRs. One special case is MSR_NB_CFG in which we set
NB_CFG_INITAPICCPUIDLO. Allow reads of MSR_TSC.
* If the LWP has pending signals or softints, leave, rather than waiting
for a rescheduling to happen later. This reduces interrupt processing
time in the guest (Qemu sends a signal to the thread, and now we leave
right away). This could be improved even more by sending an actual IPI
to the CPU, but I'll see later.
Libnvmm:
* Fix the MMU translation of large pages, we need to add the lower bits
too.
* Change the IO and Mem structures to take a pointer rather than a
static array. This provides more flexibility.
* Batch together the str+rep IO transactions. We do one big memory
read/write, and then send the IO commands to the hypervisor all at
once. This considerably increases performance.
* Decode MOVZX.
With these changes in place, Qemu+NVMM works. I can install NetBSD 8.0
in a VM with multiple VCPUs, connect to the network, etc.
diffstat:
lib/libnvmm/libnvmm.3 | 12 +-
lib/libnvmm/libnvmm_x86.c | 206 ++++++++++++++++++++++++++++++++++++++-
lib/libnvmm/nvmm.h | 6 +-
sys/dev/nvmm/nvmm.c | 7 +-
sys/dev/nvmm/nvmm.h | 7 +-
sys/dev/nvmm/x86/nvmm_x86.h | 14 +-
sys/dev/nvmm/x86/nvmm_x86_svm.c | 146 +++++++++++++++++++++++----
7 files changed, 344 insertions(+), 54 deletions(-)
diffs (truncated from 877 to 300 lines):
diff -r b2f389174e42 -r 68cee890c922 lib/libnvmm/libnvmm.3
--- a/lib/libnvmm/libnvmm.3 Sun Jan 06 15:37:17 2019 +0000
+++ b/lib/libnvmm/libnvmm.3 Sun Jan 06 16:10:51 2019 +0000
@@ -1,4 +1,4 @@
-.\" $NetBSD: libnvmm.3,v 1.6 2018/12/27 07:22:31 maxv Exp $
+.\" $NetBSD: libnvmm.3,v 1.7 2019/01/06 16:10:51 maxv Exp $
.\"
.\" Copyright (c) 2018 The NetBSD Foundation, Inc.
.\" All rights reserved.
@@ -27,7 +27,7 @@
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
-.Dd December 26, 2018
+.Dd January 06, 2019
.Dt LIBNVMM 3
.Os
.Sh NAME
@@ -242,8 +242,6 @@
.Fa cpuid
from machine
.Fa mach .
-.Fa cb
-will be called to handle the transaction.
See
.Sx I/O Assist
below for details.
@@ -255,8 +253,6 @@
.Fa cpuid
from machine
.Fa mach .
-.Fa cb
-will be called to handle the transaction.
See
.Sx Mem Assist
below for details.
@@ -415,7 +411,7 @@
uint64_t port;
bool in;
size_t size;
- uint8_t data[8];
+ uint8_t *data;
};
.Ed
.Pp
@@ -463,7 +459,7 @@
gpaddr_t gpa;
bool write;
size_t size;
- uint8_t data[8];
+ uint8_t *data;
};
.Ed
.Pp
diff -r b2f389174e42 -r 68cee890c922 lib/libnvmm/libnvmm_x86.c
--- a/lib/libnvmm/libnvmm_x86.c Sun Jan 06 15:37:17 2019 +0000
+++ b/lib/libnvmm/libnvmm_x86.c Sun Jan 06 16:10:51 2019 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: libnvmm_x86.c,v 1.9 2019/01/04 10:25:39 maxv Exp $ */
+/* $NetBSD: libnvmm_x86.c,v 1.10 2019/01/06 16:10:51 maxv Exp $ */
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -45,6 +45,8 @@
#include "nvmm.h"
+#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+
#include <x86/specialreg.h>
extern struct nvmm_callbacks __callbacks;
@@ -83,6 +85,11 @@
(void *)state.segs[i].limit,
state.segs[i].attrib.p, state.segs[i].attrib.def32);
}
+ printf("| -> MSR_EFER=%p\n", (void *)state.msrs[NVMM_X64_MSR_EFER]);
+ printf("| -> CR0=%p\n", (void *)state.crs[NVMM_X64_CR_CR0]);
+ printf("| -> CR3=%p\n", (void *)state.crs[NVMM_X64_CR_CR3]);
+ printf("| -> CR4=%p\n", (void *)state.crs[NVMM_X64_CR_CR4]);
+ printf("| -> CR8=%p\n", (void *)state.crs[NVMM_X64_CR_CR8]);
printf("| -> CPL=%p\n", (void *)state.misc[NVMM_X64_MISC_CPL]);
return 0;
@@ -131,6 +138,7 @@
return -1;
if (pte & PG_PS) {
*gpa = (pte & PTE32_L2_FRAME);
+ *gpa = *gpa + (gva & PTE32_L1_MASK);
return 0;
}
@@ -215,6 +223,7 @@
return -1;
if (pte & PG_PS) {
*gpa = (pte & PTE32_PAE_L2_FRAME);
+ *gpa = *gpa + (gva & PTE32_PAE_L1_MASK);
return 0;
}
@@ -320,6 +329,7 @@
return -1;
if (pte & PG_PS) {
*gpa = (pte & PTE64_L3_FRAME);
+ *gpa = *gpa + (gva & (PTE64_L2_MASK|PTE64_L1_MASK));
return 0;
}
@@ -341,6 +351,7 @@
return -1;
if (pte & PG_PS) {
*gpa = (pte & PTE64_L2_FRAME);
+ *gpa = *gpa + (gva & PTE64_L1_MASK);
return 0;
}
@@ -500,13 +511,34 @@
}
static uint64_t
+rep_get_cnt(struct nvmm_x64_state *state, size_t adsize)
+{
+ uint64_t mask, cnt;
+
+ mask = mask_from_adsize(adsize);
+ cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
+
+ return cnt;
+}
+
+static void
+rep_set_cnt(struct nvmm_x64_state *state, size_t adsize, uint64_t cnt)
+{
+ uint64_t mask;
+
+ mask = mask_from_adsize(adsize);
+ state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
+ state->gprs[NVMM_X64_GPR_RCX] |= cnt;
+}
+
+static uint64_t
rep_dec_apply(struct nvmm_x64_state *state, size_t adsize)
{
uint64_t mask, cnt;
mask = mask_from_adsize(adsize);
- cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
+ cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
cnt -= 1;
cnt &= mask;
@@ -521,6 +553,7 @@
gvaddr_t gva, uint8_t *data, size_t size)
{
struct nvmm_mem mem;
+ uint8_t membuf[8];
nvmm_prot_t prot;
gpaddr_t gpa;
uintptr_t hva;
@@ -547,6 +580,7 @@
is_mmio = (ret == -1);
if (is_mmio) {
+ mem.data = membuf;
mem.gva = gva;
mem.gpa = gpa;
mem.write = false;
@@ -572,6 +606,7 @@
gvaddr_t gva, uint8_t *data, size_t size)
{
struct nvmm_mem mem;
+ uint8_t membuf[8];
nvmm_prot_t prot;
gpaddr_t gpa;
uintptr_t hva;
@@ -598,6 +633,7 @@
is_mmio = (ret == -1);
if (is_mmio) {
+ mem.data = membuf;
mem.gva = gva;
mem.gpa = gpa;
mem.write = true;
@@ -622,16 +658,55 @@
static int fetch_segment(struct nvmm_machine *, struct nvmm_x64_state *);
+#define NVMM_IO_BATCH_SIZE 32
+
+static int
+assist_io_batch(struct nvmm_machine *mach, struct nvmm_x64_state *state,
+ struct nvmm_io *io, gvaddr_t gva, uint64_t cnt)
+{
+ uint8_t iobuf[NVMM_IO_BATCH_SIZE];
+ size_t i, iosize, iocnt;
+ int ret;
+
+ cnt = MIN(cnt, NVMM_IO_BATCH_SIZE);
+ iosize = MIN(io->size * cnt, NVMM_IO_BATCH_SIZE);
+ iocnt = iosize / io->size;
+
+ io->data = iobuf;
+
+ if (!io->in) {
+ ret = read_guest_memory(mach, state, gva, iobuf, iosize);
+ if (ret == -1)
+ return -1;
+ }
+
+ for (i = 0; i < iocnt; i++) {
+ (*__callbacks.io)(io);
+ io->data += io->size;
+ }
+
+ if (io->in) {
+ ret = write_guest_memory(mach, state, gva, iobuf, iosize);
+ if (ret == -1)
+ return -1;
+ }
+
+ return iocnt;
+}
+
int
nvmm_assist_io(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
struct nvmm_exit *exit)
{
struct nvmm_x64_state state;
struct nvmm_io io;
- uint64_t cnt;
+ uint64_t cnt = 0; /* GCC */
+ uint8_t iobuf[8];
+ int iocnt = 1;
gvaddr_t gva;
int reg = 0; /* GCC */
int ret, seg;
+ bool psld = false;
if (__predict_false(exit->reason != NVMM_EXIT_IO)) {
errno = EINVAL;
@@ -641,6 +716,7 @@
io.port = exit->u.io.port;
io.in = (exit->u.io.type == NVMM_EXIT_IO_IN);
io.size = exit->u.io.operand_size;
+ io.data = iobuf;
ret = nvmm_vcpu_getstate(mach, cpuid, &state,
NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
@@ -648,6 +724,17 @@
if (ret == -1)
return -1;
+ if (exit->u.io.rep) {
+ cnt = rep_get_cnt(&state, exit->u.io.address_size);
+ if (__predict_false(cnt == 0)) {
+ return 0;
+ }
+ }
+
+ if (__predict_false(state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_D)) {
+ psld = true;
+ }
+
/*
* Determine GVA.
*/
@@ -678,6 +765,13 @@
if (ret == -1)
return -1;
}
+
+ if (exit->u.io.rep && !psld) {
+ iocnt = assist_io_batch(mach, &state, &io, gva, cnt);
+ if (iocnt == -1)
+ return -1;
+ goto done;
+ }
}
if (!io.in) {
@@ -704,16 +798,18 @@
}
}
+done:
if (exit->u.io.str) {
- if (state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
- state.gprs[reg] -= io.size;
+ if (__predict_false(psld)) {
+ state.gprs[reg] -= iocnt * io.size;
} else {
- state.gprs[reg] += io.size;
+ state.gprs[reg] += iocnt * io.size;
}
}
if (exit->u.io.rep) {
- cnt = rep_dec_apply(&state, exit->u.io.address_size);
+ cnt -= iocnt;
Home |
Main Index |
Thread Index |
Old Index