Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/dev/nvmm nvmm(4): Add suspend/resume support.
details: https://anonhg.NetBSD.org/src/rev/1b6f5bdaa449
branches: trunk
changeset: 370059:1b6f5bdaa449
user: riastradh <riastradh%NetBSD.org@localhost>
date: Tue Sep 13 20:10:04 2022 +0000
description:
nvmm(4): Add suspend/resume support.
New MD nvmm_impl callbacks:
- .suspend_interrupt forces all VMs on all physical CPUs to exit.
- .vcpu_suspend suspends an individual vCPU on a machine.
- .machine_suspend suspends an individual machine.
- .suspend suspends the whole system.
- .resume resumes the whole system.
- .machine_resume resumes an individual machine.
- .vcpu_resume resumes an indidivudal vCPU on a machine.
Suspending nvmm:
1. causes new VM operations (ioctl and close) to block until resumed,
2. uses .suspend_interrupt to interrupt any concurrent and force them
to return early, and then
3. uses the various suspend callbacks to suspend all vCPUs, machines,
and the whole system -- all vCPUs before the machine they're on,
and all machines before the system.
Resuming nvmm does the reverse of (3) -- resume system, resume each
machine and then the vCPUs on that machine -- and then unblocks
operations.
Implemented only for x86-vmx for now:
- suspend_interrupt triggers a TLB IPI to cause VM exits;
- vcpu_suspend issues VMCLEAR to force any in-CPU state to be written
to memory;
- machine_suspend does nothing;
- suspend does VMXOFF on all CPUs;
- resume does VMXON on all CPUs;
- machine_resume does nothing; and
- vcpu_resume just marks each vCPU as valid but inactive so
subsequent use will clear it and load it with vmptrld.
x86-svm left as an exercise for the reader.
diffstat:
sys/dev/nvmm/nvmm.c | 223 +++++++++++++++++++++++++++++++++++++++-
sys/dev/nvmm/nvmm_internal.h | 15 ++-
sys/dev/nvmm/x86/nvmm_x86_vmx.c | 85 +++++++++++++-
3 files changed, 310 insertions(+), 13 deletions(-)
diffs (truncated from 505 to 300 lines):
diff -r 2e7e4a1bf7fa -r 1b6f5bdaa449 sys/dev/nvmm/nvmm.c
--- a/sys/dev/nvmm/nvmm.c Tue Sep 13 13:09:16 2022 +0000
+++ b/sys/dev/nvmm/nvmm.c Tue Sep 13 20:10:04 2022 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: nvmm.c,v 1.46 2022/07/07 23:50:33 pgoyette Exp $ */
+/* $NetBSD: nvmm.c,v 1.47 2022/09/13 20:10:04 riastradh Exp $ */
/*
* Copyright (c) 2018-2020 Maxime Villard, m00nbsd.net
@@ -29,7 +29,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.46 2022/07/07 23:50:33 pgoyette Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.47 2022/09/13 20:10:04 riastradh Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -59,6 +59,15 @@
static struct nvmm_machine machines[NVMM_MAX_MACHINES];
static volatile unsigned int nmachines __cacheline_aligned;
+static struct {
+ kmutex_t lock;
+ kcondvar_t suspendcv;
+ kcondvar_t resumecv;
+ unsigned users;
+} suspension;
+
+volatile bool nvmm_suspending;
+
static const struct nvmm_impl *nvmm_impl_list[] = {
#if defined(__x86_64__)
&nvmm_x86_svm, /* x86 AMD SVM */
@@ -73,6 +82,50 @@
/* -------------------------------------------------------------------------- */
static int
+nvmm_enter_sig(void)
+{
+ int error;
+
+ mutex_enter(&suspension.lock);
+ while (nvmm_suspending) {
+ error = cv_wait_sig(&suspension.resumecv, &suspension.lock);
+ if (error)
+ goto out;
+ }
+ KASSERT(suspension.users < UINT_MAX);
+ suspension.users++;
+ error = 0;
+out: mutex_exit(&suspension.lock);
+
+ return 0;
+}
+
+static void
+nvmm_enter(void)
+{
+
+ mutex_enter(&suspension.lock);
+ while (nvmm_suspending)
+ cv_wait(&suspension.resumecv, &suspension.lock);
+ KASSERT(suspension.users < UINT_MAX);
+ suspension.users++;
+ mutex_exit(&suspension.lock);
+}
+
+static void
+nvmm_exit(void)
+{
+
+ mutex_enter(&suspension.lock);
+ KASSERT(suspension.users > 0);
+ if (--suspension.users == 0)
+ cv_signal(&suspension.suspendcv);
+ mutex_exit(&suspension.lock);
+}
+
+/* -------------------------------------------------------------------------- */
+
+static int
nvmm_machine_alloc(struct nvmm_machine **ret)
{
struct nvmm_machine *mach;
@@ -989,6 +1042,11 @@
}
}
+ mutex_init(&suspension.lock, MUTEX_DEFAULT, IPL_NONE);
+ cv_init(&suspension.suspendcv, "nvmmsus");
+ cv_init(&suspension.resumecv, "nvmmres");
+ suspension.users = 0;
+
(*nvmm_impl->init)();
return 0;
@@ -1080,7 +1138,11 @@
struct nvmm_owner *owner = fp->f_data;
KASSERT(owner != NULL);
+
+ nvmm_enter();
nvmm_kill_machines(owner);
+ nvmm_exit();
+
if (owner != &root_owner) {
kmem_free(owner, sizeof(*owner));
}
@@ -1126,7 +1188,7 @@
}
static int
-nvmm_ioctl(file_t *fp, u_long cmd, void *data)
+nvmm_ioctl_internal(file_t *fp, u_long cmd, void *data)
{
struct nvmm_owner *owner = fp->f_data;
@@ -1170,11 +1232,27 @@
}
}
+static int
+nvmm_ioctl(struct file *fp, u_long cmd, void *data)
+{
+ int error;
+
+ error = nvmm_enter_sig();
+ if (error)
+ return error;
+ error = nvmm_ioctl_internal(fp, cmd, data);
+ nvmm_exit();
+
+ return error;
+}
+
/* -------------------------------------------------------------------------- */
static int nvmm_match(device_t, cfdata_t, void *);
static void nvmm_attach(device_t, device_t, void *);
static int nvmm_detach(device_t, int);
+static bool nvmm_suspend(device_t, const pmf_qual_t *);
+static bool nvmm_resume(device_t, const pmf_qual_t *);
extern struct cfdriver nvmm_cd;
@@ -1209,6 +1287,8 @@
panic("%s: impossible", __func__);
aprint_normal_dev(self, "attached, using backend %s\n",
nvmm_impl->name);
+ if (nvmm_impl->suspend != NULL && nvmm_impl->resume != NULL)
+ pmf_device_register(self, nvmm_suspend, nvmm_resume);
}
static int
@@ -1216,10 +1296,147 @@
{
if (atomic_load_relaxed(&nmachines) > 0)
return EBUSY;
+ pmf_device_deregister(self);
nvmm_fini();
return 0;
}
+static void
+nvmm_suspend_vcpu(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
+{
+
+ mutex_enter(&vcpu->lock);
+ if (vcpu->present && nvmm_impl->vcpu_suspend)
+ (*nvmm_impl->vcpu_suspend)(mach, vcpu);
+ mutex_exit(&vcpu->lock);
+}
+
+static void
+nvmm_resume_vcpu(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
+{
+
+ mutex_enter(&vcpu->lock);
+ if (vcpu->present && nvmm_impl->vcpu_resume)
+ (*nvmm_impl->vcpu_resume)(mach, vcpu);
+ mutex_exit(&vcpu->lock);
+}
+
+static void
+nvmm_suspend_machine(struct nvmm_machine *mach)
+{
+
+ rw_enter(&mach->lock, RW_WRITER);
+ if (mach->present) {
+ if (nvmm_impl->vcpu_suspend) {
+ size_t cpuid;
+
+ for (cpuid = 0; cpuid < NVMM_MAX_VCPUS; cpuid++)
+ nvmm_suspend_vcpu(mach, &mach->cpus[cpuid]);
+ }
+ if (nvmm_impl->machine_suspend)
+ (*nvmm_impl->machine_suspend)(mach);
+ }
+ rw_exit(&mach->lock);
+}
+
+static void
+nvmm_resume_machine(struct nvmm_machine *mach)
+{
+
+ rw_enter(&mach->lock, RW_WRITER);
+ if (mach->present) {
+ if (nvmm_impl->vcpu_resume) {
+ size_t cpuid;
+
+ for (cpuid = 0; cpuid < NVMM_MAX_VCPUS; cpuid++)
+ nvmm_resume_vcpu(mach, &mach->cpus[cpuid]);
+ }
+ if (nvmm_impl->machine_resume)
+ (*nvmm_impl->machine_resume)(mach);
+ }
+ rw_exit(&mach->lock);
+}
+
+static bool
+nvmm_suspend(device_t self, const pmf_qual_t *qual)
+{
+ size_t i;
+
+ /*
+ * Prevent new users (via ioctl) from starting.
+ */
+ mutex_enter(&suspension.lock);
+ KASSERT(!nvmm_suspending);
+ atomic_store_relaxed(&nvmm_suspending, true);
+ mutex_exit(&suspension.lock);
+
+ /*
+ * Interrupt any running VMs so they will break out of run
+ * loops or anything else and not start up again until we've
+ * resumed.
+ */
+ if (nvmm_impl->suspend_interrupt)
+ (*nvmm_impl->suspend_interrupt)();
+
+ /*
+ * Wait for any running VMs or other ioctls to finish running
+ * or handling any other ioctls.
+ */
+ mutex_enter(&suspension.lock);
+ while (suspension.users)
+ cv_wait(&suspension.suspendcv, &suspension.lock);
+ mutex_exit(&suspension.lock);
+
+ /*
+ * Suspend all the machines.
+ */
+ if (nvmm_impl->machine_suspend || nvmm_impl->vcpu_suspend) {
+ for (i = 0; i < NVMM_MAX_MACHINES; i++)
+ nvmm_suspend_machine(&machines[i]);
+ }
+
+ /*
+ * Take any systemwide suspend action.
+ */
+ if (nvmm_impl->suspend)
+ (*nvmm_impl->suspend)();
+
+ return true;
+}
+
+static bool
+nvmm_resume(device_t self, const pmf_qual_t *qual)
+{
+ size_t i;
+
+ KASSERT(atomic_load_relaxed(&nvmm_suspending));
+ KASSERT(suspension.users == 0);
+
+ /*
+ * Take any systemwide resume action.
+ */
+ if (nvmm_impl->resume)
+ (*nvmm_impl->resume)();
+
+ /*
+ * Resume all the machines.
+ */
+ if (nvmm_impl->machine_resume || nvmm_impl->vcpu_resume) {
+ for (i = 0; i < NVMM_MAX_MACHINES; i++)
+ nvmm_resume_machine(&machines[i]);
+ }
+
+ /*
+ * Allow new users (via ioctl) to start again.
+ */
+ mutex_enter(&suspension.lock);
+ atomic_store_relaxed(&nvmm_suspending, false);
+ cv_broadcast(&suspension.resumecv);
+ mutex_exit(&suspension.lock);
+
+ return true;
+}
+
Home |
Main Index |
Thread Index |
Old Index