Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch Unmap the kernel heap from the user page tables (SVS).
details: https://anonhg.NetBSD.org/src/rev/24462ba85264
branches: trunk
changeset: 358822:24462ba85264
user: maxv <maxv%NetBSD.org@localhost>
date: Thu Jan 18 07:25:34 2018 +0000
description:
Unmap the kernel heap from the user page tables (SVS).
This implementation is optimized and organized in such a way that we
don't need to copy the kernel stack to a safe place during user<->kernel
transitions. We create two VAs that point to the same physical page; one
will be mapped in userland and is offset in order to contain only the
trapframe, the other is mapped in the kernel and maps the entire stack.
Sent on tech-kern@ a week ago.
diffstat:
sys/arch/amd64/amd64/amd64_trap.S | 8 +-
sys/arch/amd64/amd64/genassym.cf | 5 +-
sys/arch/amd64/amd64/locore.S | 29 +++++-
sys/arch/amd64/amd64/machdep.c | 164 ++++++++++++++++++++++++++++++++-----
sys/arch/amd64/include/frameasm.h | 55 +++++++----
sys/arch/x86/include/cpu.h | 7 +-
sys/arch/x86/include/pmap.h | 6 +-
sys/arch/x86/x86/vm_machdep.c | 10 +-
8 files changed, 224 insertions(+), 60 deletions(-)
diffs (truncated from 527 to 300 lines):
diff -r a53deb0c2ac3 -r 24462ba85264 sys/arch/amd64/amd64/amd64_trap.S
--- a/sys/arch/amd64/amd64/amd64_trap.S Thu Jan 18 00:34:05 2018 +0000
+++ b/sys/arch/amd64/amd64/amd64_trap.S Thu Jan 18 07:25:34 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: amd64_trap.S,v 1.17 2018/01/07 16:10:16 maxv Exp $ */
+/* $NetBSD: amd64_trap.S,v 1.18 2018/01/18 07:25:34 maxv Exp $ */
/*
* Copyright (c) 1998, 2007, 2008, 2017 The NetBSD Foundation, Inc.
@@ -66,7 +66,7 @@
#if 0
#include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: amd64_trap.S,v 1.17 2018/01/07 16:10:16 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: amd64_trap.S,v 1.18 2018/01/18 07:25:34 maxv Exp $");
#endif
/*
@@ -120,8 +120,8 @@
#else
ZTRAP_NJ(T_NMI)
subq $TF_REGSIZE,%rsp
- SVS_ENTER
INTR_SAVE_GPRS
+ SVS_ENTER_ALTSTACK
cld
SMAP_ENABLE
movw %gs,TF_GS(%rsp)
@@ -138,7 +138,7 @@
movq %rsp,%rdi
incq CPUVAR(NTRAP)
call _C_LABEL(nmitrap)
- SVS_LEAVE
+ SVS_LEAVE_ALTSTACK
swapgs
jmp .Lnmileave
diff -r a53deb0c2ac3 -r 24462ba85264 sys/arch/amd64/amd64/genassym.cf
--- a/sys/arch/amd64/amd64/genassym.cf Thu Jan 18 00:34:05 2018 +0000
+++ b/sys/arch/amd64/amd64/genassym.cf Thu Jan 18 07:25:34 2018 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: genassym.cf,v 1.66 2018/01/07 16:47:22 christos Exp $
+# $NetBSD: genassym.cf,v 1.67 2018/01/18 07:25:34 maxv Exp $
#
# Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -239,6 +239,9 @@
ifdef SVS
define CPU_INFO_UPDIRPA offsetof(struct cpu_info, ci_svs_updirpa)
define CPU_INFO_KPDIRPA offsetof(struct cpu_info, ci_svs_kpdirpa)
+define CPU_INFO_RSP0 offsetof(struct cpu_info, ci_svs_rsp0)
+define CPU_INFO_URSP0 offsetof(struct cpu_info, ci_svs_ursp0)
+define CPU_INFO_KRSP0 offsetof(struct cpu_info, ci_svs_krsp0)
endif
define CPU_INFO_NSYSCALL offsetof(struct cpu_info, ci_data.cpu_nsyscall)
define CPU_INFO_NTRAP offsetof(struct cpu_info, ci_data.cpu_ntrap)
diff -r a53deb0c2ac3 -r 24462ba85264 sys/arch/amd64/amd64/locore.S
--- a/sys/arch/amd64/amd64/locore.S Thu Jan 18 00:34:05 2018 +0000
+++ b/sys/arch/amd64/amd64/locore.S Thu Jan 18 07:25:34 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: locore.S,v 1.146 2018/01/11 09:00:04 maxv Exp $ */
+/* $NetBSD: locore.S,v 1.147 2018/01/18 07:25:34 maxv Exp $ */
/*
* Copyright-o-rama!
@@ -1112,7 +1112,11 @@
jnz .Lswitch_return
/* Switch ring0 stack */
-#ifndef XEN
+#ifdef SVS
+ movq CPUVAR(RSP0),%rax
+ movq CPUVAR(TSS),%rdi
+ movq %rax,TSS_RSP0(%rdi)
+#elif !defined(XEN)
movq PCB_RSP0(%r14),%rax
movq CPUVAR(TSS),%rdi
movq %rax,TSS_RSP0(%rdi)
@@ -1268,14 +1272,20 @@
* is ignored as well.
*/
swapgs
- SVS_ENTER_NOSTACK
+
+#ifdef SVS
+ movq %rax,SVS_UTLS+UTLS_SCRATCH
+ movq SVS_UTLS+UTLS_RSP0,%rax
+#define SP(x) (x)-(TF_SS+8)(%rax)
+#else
movq %r15,CPUVAR(SCRATCH)
movq CPUVAR(CURLWP),%r15
movq L_PCB(%r15),%r15
movq PCB_RSP0(%r15),%r15 /* LWP's kernel stack pointer */
+#define SP(x) (x)-(TF_SS+8)(%r15)
+#endif
/* Make stack look like an 'int nn' frame */
-#define SP(x) (x)-(TF_SS+8)(%r15)
movq $(LSEL(LUDATA_SEL, SEL_UPL)),SP(TF_SS) /* user %ss */
movq %rsp,SP(TF_RSP) /* user %rsp */
movq %r11,SP(TF_RFLAGS) /* user %rflags */
@@ -1283,8 +1293,11 @@
movq %rcx,SP(TF_RIP) /* user %rip */
leaq SP(0),%rsp /* %rsp now valid after frame */
+#ifdef SVS
+ movq SVS_UTLS+UTLS_SCRATCH,%rax
+#else
movq CPUVAR(SCRATCH),%r15
-#undef SP
+#endif
movq $2,TF_ERR(%rsp) /* syscall instruction size */
movq $T_ASTFLT,TF_TRAPNO(%rsp)
@@ -1301,6 +1314,7 @@
movw $GSEL(GUDATA_SEL, SEL_UPL),TF_ES(%rsp)
movw $0,TF_FS(%rsp)
movw $0,TF_GS(%rsp)
+ SVS_ENTER
STI(si)
.Ldo_syscall:
@@ -1339,8 +1353,8 @@
testl $(MDL_IRET|MDL_COMPAT32),L_MD_FLAGS(%r14)
jnz intrfastexit
+ SVS_LEAVE
INTR_RESTORE_GPRS
- SVS_LEAVE
SWAPGS
#ifndef XEN
movq TF_RIP(%rsp),%rcx /* %rip for sysret */
@@ -1483,6 +1497,8 @@
END(pagezero)
ENTRY(intrfastexit)
+ NOT_XEN(cli;)
+ SVS_LEAVE
INTR_RESTORE_GPRS
testw $SEL_UPL,TF_CS(%rsp) /* interrupted %cs */
jz .Lkexit
@@ -1513,7 +1529,6 @@
.Luexit64:
NOT_XEN(cli;)
- SVS_LEAVE
SWAPGS
.Lkexit:
diff -r a53deb0c2ac3 -r 24462ba85264 sys/arch/amd64/amd64/machdep.c
--- a/sys/arch/amd64/amd64/machdep.c Thu Jan 18 00:34:05 2018 +0000
+++ b/sys/arch/amd64/amd64/machdep.c Thu Jan 18 07:25:34 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: machdep.c,v 1.290 2018/01/12 09:12:01 maxv Exp $ */
+/* $NetBSD: machdep.c,v 1.291 2018/01/18 07:25:34 maxv Exp $ */
/*
* Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
@@ -110,7 +110,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.290 2018/01/12 09:12:01 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.291 2018/01/18 07:25:34 maxv Exp $");
/* #define XENDEBUG_LOW */
@@ -2265,10 +2265,16 @@
* PTE Space [OK]
* Direct Map [OK]
* Remote PCPU Areas [OK]
- * Kernel Heap [TODO]
+ * Kernel Heap [OK]
* Kernel Image [TODO]
*/
+struct svs_utls {
+ paddr_t kpdirpa;
+ uint64_t scratch;
+ vaddr_t rsp0;
+};
+
static pd_entry_t *
svs_tree_add(struct cpu_info *ci, vaddr_t va)
{
@@ -2334,6 +2340,84 @@
}
static void
+svs_rsp0_init(struct cpu_info *ci)
+{
+ const cpuid_t cid = cpu_index(ci);
+ vaddr_t va, rsp0;
+ pd_entry_t *pd;
+ size_t pidx;
+
+ rsp0 = (vaddr_t)&pcpuarea->ent[cid].rsp0;
+
+ /* The first page is a redzone. */
+ va = rsp0 + PAGE_SIZE;
+
+ /* Create levels L4, L3 and L2. */
+ pd = svs_tree_add(ci, va);
+
+ /* Get the info for L1. */
+ pidx = pl1_i(va % NBPD_L2);
+ if (pmap_valid_entry(pd[pidx])) {
+ panic("%s: rsp0 page already mapped", __func__);
+ }
+
+ ci->ci_svs_rsp0_pte = (pt_entry_t *)&pd[pidx];
+ ci->ci_svs_rsp0 = rsp0 + PAGE_SIZE + sizeof(struct trapframe);
+ ci->ci_svs_ursp0 = ci->ci_svs_rsp0 - sizeof(struct trapframe);
+ ci->ci_svs_krsp0 = 0;
+}
+
+static void
+svs_utls_init(struct cpu_info *ci)
+{
+ const vaddr_t utlsva = (vaddr_t)&pcpuarea->utls;
+ struct svs_utls *utls;
+ struct vm_page *pg;
+ pd_entry_t *pd;
+ size_t pidx;
+ paddr_t pa;
+ vaddr_t va;
+
+ /* Create levels L4, L3 and L2. */
+ pd = svs_tree_add(ci, utlsva);
+
+ /* Allocate L1. */
+ pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
+ if (pg == 0)
+ panic("%s: failed to allocate PA for CPU %d\n", __func__,
+ cpu_index(ci));
+ pa = VM_PAGE_TO_PHYS(pg);
+
+ /* Enter L1. */
+ if (pmap_valid_entry(L1_BASE[pl1_i(utlsva)])) {
+ panic("%s: local page already mapped", __func__);
+ }
+ pidx = pl1_i(utlsva % NBPD_L2);
+ if (pmap_valid_entry(pd[pidx])) {
+ panic("%s: L1 page already mapped", __func__);
+ }
+ pd[pidx] = PG_V | PG_RW | pmap_pg_nx | pa;
+
+ /*
+ * Now, allocate a VA in the kernel map, that points to the UTLS
+ * page.
+ */
+ va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
+ UVM_KMF_VAONLY|UVM_KMF_NOWAIT);
+ if (va == 0) {
+ panic("%s: unable to allocate VA\n", __func__);
+ }
+ pmap_kenter_pa(va, pa, VM_PROT_READ|VM_PROT_WRITE, 0);
+ pmap_update(pmap_kernel());
+
+ ci->ci_svs_utls = va;
+
+ /* Initialize the constant fields of the UTLS page */
+ utls = (struct svs_utls *)ci->ci_svs_utls;
+ utls->rsp0 = ci->ci_svs_rsp0;
+}
+
+static void
svs_range_add(struct cpu_info *ci, vaddr_t va, size_t size)
{
size_t i, n;
@@ -2377,7 +2461,10 @@
svs_page_add(ci, (vaddr_t)&pcpuarea->idt);
svs_page_add(ci, (vaddr_t)&pcpuarea->ldt);
svs_range_add(ci, (vaddr_t)&pcpuarea->ent[cid],
- sizeof(struct pcpu_entry));
+ offsetof(struct pcpu_entry, rsp0));
+
+ svs_rsp0_init(ci);
+ svs_utls_init(ci);
}
void
@@ -2412,7 +2499,43 @@
void
svs_lwp_switch(struct lwp *oldlwp, struct lwp *newlwp)
{
- /* Switch rsp0 */
+ struct cpu_info *ci = curcpu();
+ struct pcb *pcb;
+ pt_entry_t *pte;
+ uintptr_t rsp0;
+ vaddr_t va;
+
+ if (newlwp->l_flag & LW_SYSTEM) {
+ return;
+ }
+
+#ifdef DIAGNOSTIC
+ if (oldlwp != NULL && !(oldlwp->l_flag & LW_SYSTEM)) {
+ pcb = lwp_getpcb(oldlwp);
Home |
Main Index |
Thread Index |
Old Index