pkgsrc-Changes archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
CVS commit: pkgsrc/sysutils/xenkernel411
Module Name: pkgsrc
Committed By: bouyer
Date: Thu Nov 12 11:29:25 UTC 2020
Modified Files:
pkgsrc/sysutils/xenkernel411: Makefile distinfo
pkgsrc/sysutils/xenkernel411/patches: patch-XSA286
Added Files:
pkgsrc/sysutils/xenkernel411/patches: patch-XSA351
Log Message:
Update patch for XSA286 from upstream
Add upstream patch for XSA351
bump PKGREVISION
To generate a diff of this commit:
cvs rdiff -u -r1.17 -r1.18 pkgsrc/sysutils/xenkernel411/Makefile
cvs rdiff -u -r1.15 -r1.16 pkgsrc/sysutils/xenkernel411/distinfo
cvs rdiff -u -r1.1 -r1.2 pkgsrc/sysutils/xenkernel411/patches/patch-XSA286
cvs rdiff -u -r0 -r1.1 pkgsrc/sysutils/xenkernel411/patches/patch-XSA351
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: pkgsrc/sysutils/xenkernel411/Makefile
diff -u pkgsrc/sysutils/xenkernel411/Makefile:1.17 pkgsrc/sysutils/xenkernel411/Makefile:1.18
--- pkgsrc/sysutils/xenkernel411/Makefile:1.17 Wed Oct 21 09:03:05 2020
+++ pkgsrc/sysutils/xenkernel411/Makefile Thu Nov 12 11:29:25 2020
@@ -1,8 +1,8 @@
-# $NetBSD: Makefile,v 1.17 2020/10/21 09:03:05 bouyer Exp $
+# $NetBSD: Makefile,v 1.18 2020/11/12 11:29:25 bouyer Exp $
VERSION= 4.11.4
#keep >= 1 if we have security patches
-PKGREVISION= 3
+PKGREVISION= 4
DISTNAME= xen-${VERSION}
PKGNAME= xenkernel411-${VERSION}
CATEGORIES= sysutils
Index: pkgsrc/sysutils/xenkernel411/distinfo
diff -u pkgsrc/sysutils/xenkernel411/distinfo:1.15 pkgsrc/sysutils/xenkernel411/distinfo:1.16
--- pkgsrc/sysutils/xenkernel411/distinfo:1.15 Wed Oct 21 09:03:05 2020
+++ pkgsrc/sysutils/xenkernel411/distinfo Thu Nov 12 11:29:25 2020
@@ -1,11 +1,11 @@
-$NetBSD: distinfo,v 1.15 2020/10/21 09:03:05 bouyer Exp $
+$NetBSD: distinfo,v 1.16 2020/11/12 11:29:25 bouyer Exp $
SHA1 (xen411/xen-4.11.4.tar.gz) = 6c8cdf441621c14dc5345196b48df6982c060c4f
RMD160 (xen411/xen-4.11.4.tar.gz) = 49819fcd1de3985d4dea370be962548c862f2933
SHA512 (xen411/xen-4.11.4.tar.gz) = 8383f0b369fa08c8ecfdd68f902a2aaad140146a183131c50c020fe04c2f1e829c219b9bd9923fa8f1c180e1e7c6e73d0d68b7015fc39fd3b7f59e55c680cedb
Size (xen411/xen-4.11.4.tar.gz) = 25184564 bytes
SHA1 (patch-Config.mk) = 9372a09efd05c9fbdbc06f8121e411fcb7c7ba65
-SHA1 (patch-XSA286) = c7c5cc192be821721919cc035515ddf55d2c0658
+SHA1 (patch-XSA286) = de645acb85378b884e280be3dba8c5479334fbf8
SHA1 (patch-XSA317) = 3a3e7bf8f115bebaf56001afcf68c2bd501c00a5
SHA1 (patch-XSA319) = 4954bdc849666e1c735c3281256e4850c0594ee8
SHA1 (patch-XSA320) = 38d84a2ded4ccacee455ba64eb3b369e5661fbfd
@@ -23,6 +23,7 @@ SHA1 (patch-XSA344) = cf7184ac9263b41830
SHA1 (patch-XSA345) = 14ab754703af1045b2d049de1c6ba1c5baca5d81
SHA1 (patch-XSA346) = c1962c037c5ab62c2f7e9a558c4565331c981be0
SHA1 (patch-XSA347) = f3f98a794584d5d4321b95c2b1b9c88821fa567e
+SHA1 (patch-XSA351) = fca8d8c5c77ba8d6007d7643330be7f8835bbc5a
SHA1 (patch-xen_Makefile) = 465388d80de414ca3bb84faefa0f52d817e423a6
SHA1 (patch-xen_Rules.mk) = c743dc63f51fc280d529a7d9e08650292c171dac
SHA1 (patch-xen_arch_x86_Rules.mk) = 0bedfc53a128a87b6a249ae04fbdf6a053bfb70b
Index: pkgsrc/sysutils/xenkernel411/patches/patch-XSA286
diff -u pkgsrc/sysutils/xenkernel411/patches/patch-XSA286:1.1 pkgsrc/sysutils/xenkernel411/patches/patch-XSA286:1.2
--- pkgsrc/sysutils/xenkernel411/patches/patch-XSA286:1.1 Wed Oct 21 09:03:05 2020
+++ pkgsrc/sysutils/xenkernel411/patches/patch-XSA286 Thu Nov 12 11:29:25 2020
@@ -1,4 +1,4 @@
-$NetBSD: patch-XSA286,v 1.1 2020/10/21 09:03:05 bouyer Exp $
+$NetBSD: patch-XSA286,v 1.2 2020/11/12 11:29:25 bouyer Exp $
From: Jan Beulich <jbeulich%suse.com@localhost>
Subject: x86: don't allow clearing of TF_kernel_mode for other than 64-bit PV
@@ -776,3 +776,227 @@ index c1e92937c0..e72c277b9f 100644
extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
extern l2_pgentry_t *compat_idle_pg_table_l2;
extern unsigned int m2p_compat_vstart;
+From 1d021db3c8712d25e25f078833baa160c90f260f Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Date: Thu, 22 Oct 2020 11:28:58 +0100
+Subject: [PATCH 1/2] x86/pv: Drop FLUSH_TLB_GLOBAL in do_mmu_update() for XPTI
+
+c/s 9d1d31ad9498 "x86: slightly reduce Meltdown band-aid overhead" removed the
+use of Global TLB flushes on the Xen entry path, but added a FLUSH_TLB_GLOBAL
+to the L4 path in do_mmu_update().
+
+However, this was unnecessary.
+
+It is the guests responsibility to perform appropriate TLB flushing if the L4
+modification altered an established mapping in a flush-relevant way. In this
+case, an MMUEXT_OP hypercall will follow. The case which Xen needs to cover
+is when new mappings are created, and the resync on the exit-to-guest path
+covers this correctly.
+
+There is a corner case with multiple vCPUs in hypercalls at the same time,
+which 9d1d31ad9498 changed, and this patch changes back to its original XPTI
+behaviour.
+
+Architecturally, established TLB entries can continue to be used until the
+broadcast flush has completed. Therefore, even with concurrent hypercalls,
+the guest cannot depend on older mappings not being used until an MMUEXT_OP
+hypercall completes. Xen's implementation of guest-initiated flushes will
+take correct effect on top of an in-progress hypercall, picking up new mapping
+setting before the other vCPU's MMUEXT_OP completes.
+
+Note: The correctness of this change is not impacted by whether XPTI uses
+global mappings or not. Correctness there depends on the behaviour of Xen on
+the entry/exit paths when switching two/from the XPTI "shadow" pagetables.
+
+This is (not really) XSA-286 (but necessary to simplify the logic).
+
+Fixes: 9d1d31ad9498 ("x86: slightly reduce Meltdown band-aid overhead")
+Signed-off-by: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Reviewed-by: Jan Beulich <jbeulich%suse.com@localhost>
+(cherry picked from commit 055e1c3a3d95b1e753148369fbc4ba48782dd602)
+---
+ xen/arch/x86/mm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 5ca5c8c9a2..129da1e648 100644
+--- xen/arch/x86/mm.c.orig
++++ xen/arch/x86/mm.c
+@@ -4279,7 +4279,7 @@ long do_mmu_update(
+
+ cpumask_andnot(mask, pt_owner->dirty_cpumask, cpumask_of(cpu));
+ if ( !cpumask_empty(mask) )
+- flush_mask(mask, FLUSH_TLB_GLOBAL | FLUSH_ROOT_PGTBL);
++ flush_mask(mask, FLUSH_ROOT_PGTBL);
+ }
+
+ perfc_add(num_page_updates, i);
+--
+2.20.1
+
+From e274c8bdc12eb596e55233040e8b49da27150f31 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Date: Mon, 19 Oct 2020 15:51:22 +0100
+Subject: [PATCH 2/2] x86/pv: Flush TLB in response to paging structure changes
+
+With MMU_UPDATE, a PV guest can make changes to higher level pagetables. This
+is safe from Xen's point of view (as the update only affects guest mappings),
+and the guest is required to flush (if necessary) after making updates.
+
+However, Xen's use of linear pagetables (UPDATE_VA_MAPPING, GNTTABOP_map,
+writeable pagetables, etc.) is an implementation detail outside of the
+API/ABI.
+
+Changes in the paging structure require invalidations in the linear pagetable
+range for subsequent accesses into the linear pagetables to access non-stale
+mappings. Xen must provide suitable flushing to prevent intermixed guest
+actions from accidentally accessing/modifying the wrong pagetable.
+
+For all L2 and higher modifications, flush the TLB. PV guests cannot create
+L2 or higher entries with the Global bit set, so no mappings established in
+the linear range can be global. (This could in principle be an order 39 flush
+starting at LINEAR_PT_VIRT_START, but no such mechanism exists in practice.)
+
+Express the necessary flushes as a set of booleans which accumulate across the
+operation. Comment the flushing logic extensively.
+
+This is XSA-286.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Reviewed-by: Jan Beulich <jbeulich%suse.com@localhost>
+(cherry picked from commit 16a20963b3209788f2c0d3a3eebb7d92f03f5883)
+---
+ xen/arch/x86/mm.c | 69 ++++++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 59 insertions(+), 10 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 129da1e648..3528cf6b85 100644
+--- xen/arch/x86/mm.c.orig
++++ xen/arch/x86/mm.c
+@@ -3983,7 +3983,8 @@ long do_mmu_update(
+ struct vcpu *curr = current, *v = curr;
+ struct domain *d = v->domain, *pt_owner = d, *pg_owner;
+ mfn_t map_mfn = INVALID_MFN;
+- bool sync_guest = false;
++ bool flush_linear_pt = false, flush_root_pt_local = false,
++ flush_root_pt_others = false;
+ uint32_t xsm_needed = 0;
+ uint32_t xsm_checked = 0;
+ int rc = put_old_guest_table(curr);
+@@ -4133,6 +4134,8 @@ long do_mmu_update(
+ break;
+ rc = mod_l2_entry(va, l2e_from_intpte(req.val), mfn,
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
++ if ( !rc )
++ flush_linear_pt = true;
+ break;
+
+ case PGT_l3_page_table:
+@@ -4140,6 +4143,8 @@ long do_mmu_update(
+ break;
+ rc = mod_l3_entry(va, l3e_from_intpte(req.val), mfn,
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
++ if ( !rc )
++ flush_linear_pt = true;
+ break;
+
+ case PGT_l4_page_table:
+@@ -4147,6 +4152,8 @@ long do_mmu_update(
+ break;
+ rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn,
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
++ if ( !rc )
++ flush_linear_pt = true;
+ if ( !rc && pt_owner->arch.pv_domain.xpti )
+ {
+ bool local_in_use = false;
+@@ -4154,7 +4161,7 @@ long do_mmu_update(
+ if ( pagetable_get_pfn(curr->arch.guest_table) == mfn )
+ {
+ local_in_use = true;
+- get_cpu_info()->root_pgt_changed = true;
++ flush_root_pt_local = true;
+ }
+
+ /*
+@@ -4166,7 +4173,7 @@ long do_mmu_update(
+ (1 + !!(page->u.inuse.type_info & PGT_pinned) +
+ (pagetable_get_pfn(curr->arch.guest_table_user) ==
+ mfn) + local_in_use) )
+- sync_guest = true;
++ flush_root_pt_others = true;
+ }
+ break;
+
+@@ -4268,19 +4275,61 @@ long do_mmu_update(
+ if ( va )
+ unmap_domain_page(va);
+
+- if ( sync_guest )
++ /*
++ * Perform required TLB maintenance.
++ *
++ * This logic currently depend on flush_linear_pt being a superset of the
++ * flush_root_pt_* conditions.
++ *
++ * pt_owner may not be current->domain. This may occur during
++ * construction of 32bit PV guests, or debugging of PV guests. The
++ * behaviour cannot be correct with domain unpaused. We therefore expect
++ * pt_owner->dirty_cpumask to be empty, but it is a waste of effort to
++ * explicitly check for, and exclude, this corner case.
++ *
++ * flush_linear_pt requires a FLUSH_TLB to all dirty CPUs. The flush must
++ * be performed now to maintain correct behaviour across a multicall.
++ * i.e. we cannot relax FLUSH_TLB to FLUSH_ROOT_PGTBL, given that the
++ * former is a side effect of the latter, because the resync (which is in
++ * the return-to-guest path) happens too late.
++ *
++ * flush_root_pt_* requires FLUSH_ROOT_PGTBL on either the local CPU
++ * (implies pt_owner == current->domain and current->processor set in
++ * pt_owner->dirty_cpumask), and/or all *other* dirty CPUs as there are
++ * references we can't account for locally.
++ */
++ if ( flush_linear_pt /* || flush_root_pt_local || flush_root_pt_others */ )
+ {
++ unsigned int cpu = smp_processor_id();
++ cpumask_t *mask = pt_owner->dirty_cpumask;
++
+ /*
+- * Force other vCPU-s of the affected guest to pick up L4 entry
+- * changes (if any).
++ * Always handle local flushing separately (if applicable), to
++ * separate the flush invocations appropriately for scope of the two
++ * flush_root_pt_* variables.
+ */
+- unsigned int cpu = smp_processor_id();
+- cpumask_t *mask = per_cpu(scratch_cpumask, cpu);
++ if ( likely(cpumask_test_cpu(cpu, mask)) )
++ {
++ mask = per_cpu(scratch_cpumask, cpu);
+
+- cpumask_andnot(mask, pt_owner->dirty_cpumask, cpumask_of(cpu));
++ cpumask_copy(mask, pt_owner->dirty_cpumask);
++ __cpumask_clear_cpu(cpu, mask);
++
++ flush_local(FLUSH_TLB |
++ (flush_root_pt_local ? FLUSH_ROOT_PGTBL : 0));
++ }
++ else
++ /* Sanity check. flush_root_pt_local implies local cpu is dirty. */
++ ASSERT(!flush_root_pt_local);
++
++ /* Flush the remote dirty CPUs. Does not include the local CPU. */
+ if ( !cpumask_empty(mask) )
+- flush_mask(mask, FLUSH_ROOT_PGTBL);
++ flush_mask(mask, FLUSH_TLB |
++ (flush_root_pt_others ? FLUSH_ROOT_PGTBL : 0));
+ }
++ else
++ /* Sanity check. flush_root_pt_* implies flush_linear_pt. */
++ ASSERT(!flush_root_pt_local && !flush_root_pt_others);
+
+ perfc_add(num_page_updates, i);
+
+--
+2.20.1
+
Added files:
Index: pkgsrc/sysutils/xenkernel411/patches/patch-XSA351
diff -u /dev/null pkgsrc/sysutils/xenkernel411/patches/patch-XSA351:1.1
--- /dev/null Thu Nov 12 11:29:25 2020
+++ pkgsrc/sysutils/xenkernel411/patches/patch-XSA351 Thu Nov 12 11:29:25 2020
@@ -0,0 +1,283 @@
+$NetBSD: patch-XSA351,v 1.1 2020/11/12 11:29:25 bouyer Exp $
+
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau%citrix.com@localhost>
+Subject: x86/msr: fix handling of MSR_IA32_PERF_{STATUS/CTL}
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Currently a PV hardware domain can also be given control over the CPU
+frequency, and such guest is allowed to write to MSR_IA32_PERF_CTL.
+However since commit 322ec7c89f6 the default behavior has been changed
+to reject accesses to not explicitly handled MSRs, preventing PV
+guests that manage CPU frequency from reading
+MSR_IA32_PERF_{STATUS/CTL}.
+
+Additionally some HVM guests (Windows at least) will attempt to read
+MSR_IA32_PERF_CTL and will panic if given back a #GP fault:
+
+ vmx.c:3035:d8v0 RDMSR 0x00000199 unimplemented
+ d8v0 VIRIDIAN CRASH: 3b c0000096 fffff806871c1651 ffffda0253683720 0
+
+Move the handling of MSR_IA32_PERF_{STATUS/CTL} to the common MSR
+handling shared between HVM and PV guests, and add an explicit case
+for reads to MSR_IA32_PERF_{STATUS/CTL}.
+
+Restore previous behavior and allow PV guests with the required
+permissions to read the contents of the mentioned MSRs. Non privileged
+guests will get 0 when trying to read those registers, as writes to
+MSR_IA32_PERF_CTL by such guest will already be silently dropped.
+
+Fixes: 322ec7c89f6 ('x86/pv: disallow access to unknown MSRs')
+Fixes: 84e848fd7a1 ('x86/hvm: disallow access to unknown MSRs')
+Signed-off-by: Roger Pau Monné <roger.pau%citrix.com@localhost>
+Signed-off-by: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Reviewed-by: Roger Pau Monné <roger.pau%citrix.com@localhost>
+Reviewed-by: Jan Beulich <jbeulich%suse.com@localhost>
+(cherry picked from commit 3059178798a23ba870ff86ff54d442a07e6651fc)
+
+diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
+index 256e58d82b..3495ac9f4a 100644
+--- xen/arch/x86/msr.c.orig
++++ xen/arch/x86/msr.c
+@@ -141,6 +141,7 @@ int init_vcpu_msr_policy(struct vcpu *v)
+
+ int guest_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
+ {
++ const struct domain *d = v->domain;
+ const struct cpuid_policy *cp = v->domain->arch.cpuid;
+ const struct msr_domain_policy *dp = v->domain->arch.msr;
+ const struct msr_vcpu_policy *vp = v->arch.msr;
+@@ -212,6 +213,25 @@ int guest_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
+ break;
+
+ /*
++ * These MSRs are not enumerated in CPUID. They have been around
++ * since the Pentium 4, and implemented by other vendors.
++ *
++ * Some versions of Windows try reading these before setting up a #GP
++ * handler, and Linux has several unguarded reads as well. Provide
++ * RAZ semantics, in general, but permit a cpufreq controller dom0 to
++ * have full access.
++ */
++ case MSR_IA32_PERF_STATUS:
++ case MSR_IA32_PERF_CTL:
++ if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_CENTAUR)) )
++ goto gp_fault;
++
++ *val = 0;
++ if ( likely(!is_cpufreq_controller(d)) || rdmsr_safe(msr, *val) == 0 )
++ break;
++ goto gp_fault;
++
++ /*
+ * TODO: Implement when we have better topology representation.
+ case MSR_INTEL_CORE_THREAD_COUNT:
+ */
+@@ -241,6 +261,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
+ case MSR_INTEL_CORE_THREAD_COUNT:
+ case MSR_INTEL_PLATFORM_INFO:
+ case MSR_ARCH_CAPABILITIES:
++ case MSR_IA32_PERF_STATUS:
+ /* Read-only */
+ case MSR_TSX_FORCE_ABORT:
+ case MSR_TSX_CTRL:
+@@ -345,6 +366,21 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
+ break;
+ }
+
++ /*
++ * This MSR is not enumerated in CPUID. It has been around since the
++ * Pentium 4, and implemented by other vendors.
++ *
++ * To match the RAZ semantics, implement as write-discard, except for
++ * a cpufreq controller dom0 which has full access.
++ */
++ case MSR_IA32_PERF_CTL:
++ if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_CENTAUR)) )
++ goto gp_fault;
++
++ if ( likely(!is_cpufreq_controller(d)) || wrmsr_safe(msr, val) == 0 )
++ break;
++ goto gp_fault;
++
+ default:
+ return X86EMUL_UNHANDLEABLE;
+ }
+diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
+index 8120ded330..755f00db33 100644
+--- xen/arch/x86/pv/emul-priv-op.c.orig
++++ xen/arch/x86/pv/emul-priv-op.c
+@@ -816,12 +816,6 @@ static inline uint64_t guest_misc_enable(uint64_t val)
+ return val;
+ }
+
+-static inline bool is_cpufreq_controller(const struct domain *d)
+-{
+- return ((cpufreq_controller == FREQCTL_dom0_kernel) &&
+- is_hardware_domain(d));
+-}
+-
+ static int read_msr(unsigned int reg, uint64_t *val,
+ struct x86_emulate_ctxt *ctxt)
+ {
+@@ -1096,14 +1090,6 @@ static int write_msr(unsigned int reg, uint64_t val,
+ return X86EMUL_OKAY;
+ break;
+
+- case MSR_IA32_PERF_CTL:
+- if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
+- break;
+- if ( likely(!is_cpufreq_controller(currd)) ||
+- wrmsr_safe(reg, val) == 0 )
+- return X86EMUL_OKAY;
+- break;
+-
+ case MSR_IA32_THERM_CONTROL:
+ case MSR_IA32_ENERGY_PERF_BIAS:
+ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
+diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
+index c0cc5d9336..7e4ad5d51b 100644
+--- xen/include/xen/sched.h.orig
++++ xen/include/xen/sched.h
+@@ -920,6 +920,22 @@ extern enum cpufreq_controller {
+ FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen
+ } cpufreq_controller;
+
++static always_inline bool is_cpufreq_controller(const struct domain *d)
++{
++ /*
++ * A PV dom0 can be nominated as the cpufreq controller, instead of using
++ * Xen's cpufreq driver, at which point dom0 gets direct access to certain
++ * MSRs.
++ *
++ * This interface only works when dom0 is identity pinned and has the same
++ * number of vCPUs as pCPUs on the system.
++ *
++ * It would be far better to paravirtualise the interface.
++ */
++ return (is_pv_domain(d) && is_hardware_domain(d) &&
++ cpufreq_controller == FREQCTL_dom0_kernel);
++}
++
+ #define CPUPOOLID_NONE -1
+
+ struct cpupool *cpupool_get_by_id(int poolid);
+From: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Subject: x86/msr: Disallow guest access to the RAPL MSRs
+
+Researchers have demonstrated using the RAPL interface to perform a
+differential power analysis attack to recover AES keys used by other cores in
+the system.
+
+Furthermore, even privileged guests cannot use this interface correctly, due
+to MSR scope and vcpu scheduling issues. The interface would want to be
+paravirtualised to be used sensibly.
+
+Disallow access to the RAPL MSRs completely, as well as other MSRs which
+potentially access fine grain power information.
+
+This is part of XSA-351.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3%citrix.com@localhost>
+Reviewed-by: Jan Beulich <jbeulich%suse.com@localhost>
+
+diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
+index 3495ac9f4a..99c848ff41 100644
+--- xen/arch/x86/msr.c.orig
++++ xen/arch/x86/msr.c
+@@ -156,6 +156,15 @@ int guest_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
+ case MSR_TSX_FORCE_ABORT:
+ case MSR_TSX_CTRL:
+ case MSR_MCU_OPT_CTRL:
++ case MSR_RAPL_POWER_UNIT:
++ case MSR_PKG_POWER_LIMIT ... MSR_PKG_POWER_INFO:
++ case MSR_DRAM_POWER_LIMIT ... MSR_DRAM_POWER_INFO:
++ case MSR_PP0_POWER_LIMIT ... MSR_PP0_POLICY:
++ case MSR_PP1_POWER_LIMIT ... MSR_PP1_POLICY:
++ case MSR_PLATFORM_ENERGY_COUNTER:
++ case MSR_PLATFORM_POWER_LIMIT:
++ case MSR_F15H_CU_POWER ... MSR_F15H_CU_MAX_POWER:
++ case MSR_AMD_RAPL_POWER_UNIT ... MSR_AMD_PKG_ENERGY_STATUS:
+ /* Not offered to guests. */
+ goto gp_fault;
+
+@@ -266,6 +275,15 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
+ case MSR_TSX_FORCE_ABORT:
+ case MSR_TSX_CTRL:
+ case MSR_MCU_OPT_CTRL:
++ case MSR_RAPL_POWER_UNIT:
++ case MSR_PKG_POWER_LIMIT ... MSR_PKG_POWER_INFO:
++ case MSR_DRAM_POWER_LIMIT ... MSR_DRAM_POWER_INFO:
++ case MSR_PP0_POWER_LIMIT ... MSR_PP0_POLICY:
++ case MSR_PP1_POWER_LIMIT ... MSR_PP1_POLICY:
++ case MSR_PLATFORM_ENERGY_COUNTER:
++ case MSR_PLATFORM_POWER_LIMIT:
++ case MSR_F15H_CU_POWER ... MSR_F15H_CU_MAX_POWER:
++ case MSR_AMD_RAPL_POWER_UNIT ... MSR_AMD_PKG_ENERGY_STATUS:
+ /* Not offered to guests. */
+ goto gp_fault;
+
+diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
+index 480d1d8102..a685dcdcca 100644
+--- xen/include/asm-x86/msr-index.h.orig
++++ xen/include/asm-x86/msr-index.h
+@@ -96,6 +96,38 @@
+ /* Lower 6 bits define the format of the address in the LBR stack */
+ #define MSR_IA32_PERF_CAP_LBR_FORMAT 0x3f
+
++/*
++ * Intel Runtime Average Power Limiting (RAPL) interface. Power plane base
++ * addresses (MSR_*_POWER_LIMIT) are model specific, but have so-far been
++ * consistent since their introduction in SandyBridge.
++ *
++ * Offsets of functionality from the power plane base is architectural, but
++ * not all power planes support all functionality.
++ */
++#define MSR_RAPL_POWER_UNIT 0x00000606
++
++#define MSR_PKG_POWER_LIMIT 0x00000610
++#define MSR_PKG_ENERGY_STATUS 0x00000611
++#define MSR_PKG_PERF_STATUS 0x00000613
++#define MSR_PKG_POWER_INFO 0x00000614
++
++#define MSR_DRAM_POWER_LIMIT 0x00000618
++#define MSR_DRAM_ENERGY_STATUS 0x00000619
++#define MSR_DRAM_PERF_STATUS 0x0000061b
++#define MSR_DRAM_POWER_INFO 0x0000061c
++
++#define MSR_PP0_POWER_LIMIT 0x00000638
++#define MSR_PP0_ENERGY_STATUS 0x00000639
++#define MSR_PP0_POLICY 0x0000063a
++
++#define MSR_PP1_POWER_LIMIT 0x00000640
++#define MSR_PP1_ENERGY_STATUS 0x00000641
++#define MSR_PP1_POLICY 0x00000642
++
++/* Intel Platform-wide power interface. */
++#define MSR_PLATFORM_ENERGY_COUNTER 0x0000064d
++#define MSR_PLATFORM_POWER_LIMIT 0x0000065c
++
+ #define MSR_IA32_BNDCFGS 0x00000d90
+ #define IA32_BNDCFGS_ENABLE 0x00000001
+ #define IA32_BNDCFGS_PRESERVE 0x00000002
+@@ -218,6 +250,8 @@
+ #define MSR_K8_VM_CR 0xc0010114
+ #define MSR_K8_VM_HSAVE_PA 0xc0010117
+
++#define MSR_F15H_CU_POWER 0xc001007a
++#define MSR_F15H_CU_MAX_POWER 0xc001007b
+ #define MSR_AMD_FAM15H_EVNTSEL0 0xc0010200
+ #define MSR_AMD_FAM15H_PERFCTR0 0xc0010201
+ #define MSR_AMD_FAM15H_EVNTSEL1 0xc0010202
+@@ -231,6 +265,10 @@
+ #define MSR_AMD_FAM15H_EVNTSEL5 0xc001020a
+ #define MSR_AMD_FAM15H_PERFCTR5 0xc001020b
+
++#define MSR_AMD_RAPL_POWER_UNIT 0xc0010299
++#define MSR_AMD_CORE_ENERGY_STATUS 0xc001029a
++#define MSR_AMD_PKG_ENERGY_STATUS 0xc001029b
++
+ #define MSR_AMD_L7S0_FEATURE_MASK 0xc0011002
+ #define MSR_AMD_THRM_FEATURE_MASK 0xc0011003
+ #define MSR_K8_FEATURE_MASK 0xc0011004
Home |
Main Index |
Thread Index |
Old Index