Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/amd64/amd64 Fix handling of segment register faults...
details: https://anonhg.NetBSD.org/src/rev/34ccd9eee411
branches: trunk
changeset: 830077:34ccd9eee411
user: maxv <maxv%NetBSD.org@localhost>
date: Sun Feb 25 12:37:16 2018 +0000
description:
Fix handling of segment register faults when running with SVS. The behavior
is changed also in the non-SVS case.
I've put a documentation in amd64_trap.S. Basically, the problem with SVS
is that if iret faults, we already have a full trapframe pushed on the
stack and the CPU will push another frame on this stack (nested), but it
hits the redzone below the stack since it is still running with the user
page table loaded.
To fix that, we pop a good part of the trapframe earlier in intrfastexit.
If iret faults, the current %rsp has enough room for an iret frame, and
the CPU can push that without problem. We then switch back to the outer
iret frame (the frame the CPU was trying to pop by executing iret, but that
it didn't pop for real because iret faulted), call INTRENTRY, and handle
the trap as if it had been received from userland directly.
diffstat:
sys/arch/amd64/amd64/amd64_trap.S | 152 +++++++++++++++++++++++++++----------
sys/arch/amd64/amd64/locore.S | 32 +++++--
sys/arch/amd64/amd64/trap.c | 92 +----------------------
3 files changed, 134 insertions(+), 142 deletions(-)
diffs (truncated from 380 to 300 lines):
diff -r 0404391e7fc6 -r 34ccd9eee411 sys/arch/amd64/amd64/amd64_trap.S
--- a/sys/arch/amd64/amd64/amd64_trap.S Sun Feb 25 12:28:18 2018 +0000
+++ b/sys/arch/amd64/amd64/amd64_trap.S Sun Feb 25 12:37:16 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: amd64_trap.S,v 1.36 2018/02/25 11:57:44 maxv Exp $ */
+/* $NetBSD: amd64_trap.S,v 1.37 2018/02/25 12:37:16 maxv Exp $ */
/*
* Copyright (c) 1998, 2007, 2008, 2017 The NetBSD Foundation, Inc.
@@ -368,69 +368,139 @@
#ifndef kernuser_reenter
/*
* We need to worry about traps in kernel mode while the kernel %gs isn't
- * loaded. These are either faults on iretq during return to user or loads to
- * %gs.
+ * loaded. When such traps happen, we have CPL=0 and %gs=userland, and we
+ * must perform an additional swapgs to get %gs=kernel.
+ */
+
+#define TF_SMALL(val, reg) (val - TF_REGSIZE)(reg)
+#define TF_SMALL_REGPUSHED(val, reg) (val - (TF_REGSIZE - 8))(reg)
+
+/*
+ * It is possible that we received a trap in kernel mode, but with the user
+ * context loaded. There are six cases where this can happen:
+ *
+ * o Execution of SYSRETQ.
+ * o Execution of IRETQ.
+ * o Reload of ES.
+ * o Reload of DS.
+ * o Reload of FS.
+ * o Reload of GS.
+ *
+ * When this happens, the kernel is re-entered in kernel mode, but the
+ * previous context is in kernel mode too.
+ *
+ * We have two iret frames in the stack. In the first one, the 'rsp' field
+ * points to the outer iret frame:
*
- * When such traps happen, we have CPL=0 and %gs=userland, and we must perform
- * an additional swapgs to get %gs=kernel.
+ * +---------------------------------------------------+
+ * | trapno | err | rip | cs=ring0 | rflags | rsp | ss |
+ * +-------------------------------------------|-------+
+ * |
+ * +---------------------------------+
+ * |
+ * | +---------------------------------------------------+
+ * +--> | trapno | err | rip | cs=ring3 | rflags | rsp | ss |
+ * +---------------------------------------------------+
+ *
+ * We perform a three-step procedure:
+ *
+ * o We copy the 'trapno' field of the current frame into the 'trapno'
+ * field of the outer frame.
+ *
+ * o We update RSP to point to the outer frame. This outer frame is in the
+ * same stack as the current frame, and likely just after the current
+ * frame.
+ *
+ * o We do a normal INTRENTRY. Now that RSP points to the outer frame,
+ * everything behaves as if we had received a trap from the outer frame,
+ * that is to say, from userland directly.
+ *
+ * Finally, we jump to 'calltrap' and handle the trap smoothly.
+ *
+ * Two notes regarding SVS:
+ *
+ * o With SVS, we will receive the trap while the user page tables are
+ * loaded. That's not a problem, we don't touch anything unmapped here.
+ *
+ * o With SVS, when the user page tables are loaded, the stack is really
+ * small, and can contain only one trapframe structure. Therefore, in
+ * intrfastexit, we must save the GPRs and pop their part of the stack
+ * right away. If we weren't doing that, and the reload of ES faulted for
+ * example, then the CPU would try to push an iret frame on the current
+ * stack (nested), and would double-fault because it touches the redzone
+ * below the stack (see the documentation in x86/x86/svs.c). By popping
+ * the GPR part of the stack, we leave enough stack for the CPU to push
+ * an iret frame, and for us to push two 8-byte registers too.
*/
_ALIGN_TEXT
LABEL(kernuser_reenter)
- INTRENTRY_L(3f,1:)
-2:
+ testb $SEL_UPL,TF_SMALL(TF_CS, %rsp)
+ jz .Lkernelmode
+
+.Lnormal_entry:
+ INTRENTRY
sti
jmp calltrap
-3:
- /*
- * Trap in kernel mode.
- */
+
+.Lkernelmode:
+ /* We will clobber %rdi */
+ pushq %rdi
/* Case 1: fault on sysretq? */
leaq do_sysret(%rip),%rdi
- cmpq %rdi,TF_RIP(%rsp)
- je 1b
+ cmpq %rdi,TF_SMALL_REGPUSHED(TF_RIP, %rsp)
+ je .Lkernelmode_but_user
/* Case 2: fault on iretq? */
leaq do_iret(%rip),%rdi
- cmpq %rdi,TF_RIP(%rsp)
+ cmpq %rdi,TF_SMALL_REGPUSHED(TF_RIP, %rsp)
jne 5f
- movq TF_RSP(%rsp),%rdi /* Must read %rsp, may be a pad word */
- testb $SEL_UPL,8(%rdi) /* Check %cs of outer iret frame */
- je 2b /* jump if iret was to kernel */
- jmp 1b /* to user - must restore %gs */
+ movq TF_SMALL_REGPUSHED(TF_RSP, %rsp),%rdi /* get %rsp */
+ testb $SEL_UPL,8(%rdi) /* check %cs of outer iret frame */
+ je .Lnormal_entry /* jump if iret was to kernel */
+ jmp .Lkernelmode_but_user /* to user - must restore %gs */
5:
- /* Case 3: move to %gs? */
+ /* Case 3: move to %es? */
+ leaq do_mov_es(%rip),%rdi
+ cmpq %rdi,TF_SMALL_REGPUSHED(TF_RIP, %rsp)
+ je .Lkernelmode_but_user
+
+ /* Case 4: move to %ds? */
+ leaq do_mov_ds(%rip),%rdi
+ cmpq %rdi,TF_SMALL_REGPUSHED(TF_RIP, %rsp)
+ je .Lkernelmode_but_user
+
+ /* Case 5: move to %fs? */
+ leaq do_mov_fs(%rip),%rdi
+ cmpq %rdi,TF_SMALL_REGPUSHED(TF_RIP, %rsp)
+ je .Lkernelmode_but_user
+
+ /* Case 6: move to %gs? */
leaq do_mov_gs(%rip),%rdi
- cmpq %rdi,TF_RIP(%rsp)
- je 1b
+ cmpq %rdi,TF_SMALL_REGPUSHED(TF_RIP, %rsp)
+ je .Lkernelmode_but_user
+
+ /* None of the above cases: normal kernel fault */
+ popq %rdi
+ jmp .Lnormal_entry
- /* None of the above cases */
- jmp 2b /* normal kernel fault */
+.Lkernelmode_but_user:
+ movq TF_SMALL_REGPUSHED(TF_RSP, %rsp),%rdi
+
+ pushq %rax
+ movq 16(%rsp),%rax /* 16(%rsp) = current TF_TRAPNO */
+ movq %rax,(%rdi) /* (%rdi) = outer TF_TRAPNO */
+ popq %rax
+
+ movq %rdi,%rsp
+ jmp .Lnormal_entry
END(kernuser_reenter)
#endif
TEXT_USER_END
/*
- * trap() calls here when it detects a fault in INTRFASTEXIT (loading the
- * segment registers or during the iret itself). The address of the (possibly
- * reconstructed) user trap frame is passed as an argument.
- *
- * Typically the code will have raised a SIGSEGV which will be actioned
- * by the code below.
- */
- .type _C_LABEL(trap_return_fault_return), @function
-LABEL(trap_return_fault_return)
- mov %rdi,%rsp /* frame for user return */
-#ifdef DIAGNOSTIC
- /* We can't recover the saved %rbx, so suppress warning */
- movl CPUVAR(ILEVEL),%ebx
-#endif
- jmp .Lalltraps_checkusr
-END(trap_return_fault_return)
-
-/*
* All traps go through here. Call the generic trap handler, and
* check for ASTs afterwards.
*/
diff -r 0404391e7fc6 -r 34ccd9eee411 sys/arch/amd64/amd64/locore.S
--- a/sys/arch/amd64/amd64/locore.S Sun Feb 25 12:28:18 2018 +0000
+++ b/sys/arch/amd64/amd64/locore.S Sun Feb 25 12:37:16 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: locore.S,v 1.156 2018/02/24 17:12:10 maxv Exp $ */
+/* $NetBSD: locore.S,v 1.157 2018/02/25 12:37:16 maxv Exp $ */
/*
* Copyright-o-rama!
@@ -1544,33 +1544,43 @@
TEXT_USER_BEGIN
+/*
+ * In intrfastexit, we advance %rsp at the beginning. We then access the
+ * segment registers in the trapframe with TF_BACKW (backwards). See the
+ * documentation in amd64_trap.S for an explanation.
+ */
+
+#define TF_BACKW(val, reg) (val - TF_REGSIZE)(reg)
+
_ALIGN_TEXT
LABEL(intrfastexit)
NOT_XEN(cli;)
SVS_LEAVE
INTR_RESTORE_GPRS
- testb $SEL_UPL,TF_CS(%rsp) /* interrupted %cs */
+ addq $TF_REGSIZE,%rsp /* iret frame */
+
+ testb $SEL_UPL,TF_BACKW(TF_CS, %rsp)
jz .Lkexit
- cmpw $LSEL(LUCODE_SEL, SEL_UPL),TF_CS(%rsp)
+ cmpw $LSEL(LUCODE_SEL, SEL_UPL),TF_BACKW(TF_CS, %rsp)
je .Luexit64
- cmpw $GSEL(GUCODE_SEL, SEL_UPL),TF_CS(%rsp)
+ cmpw $GSEL(GUCODE_SEL, SEL_UPL),TF_BACKW(TF_CS, %rsp)
je .Luexit64
#ifdef XEN
- cmpw $FLAT_RING3_CS64,TF_CS(%rsp)
+ cmpw $FLAT_RING3_CS64,TF_BACKW(TF_CS, %rsp)
je .Luexit64
#endif
.Luexit32:
+ SWAPGS
do_mov_es:
- movw TF_ES(%rsp),%es
+ movw TF_BACKW(TF_ES, %rsp),%es
do_mov_ds:
- movw TF_DS(%rsp),%ds
+ movw TF_BACKW(TF_DS, %rsp),%ds
do_mov_fs:
- movw TF_FS(%rsp),%fs
- SWAPGS
+ movw TF_BACKW(TF_FS, %rsp),%fs
#ifndef XEN
do_mov_gs:
- movw TF_GS(%rsp),%gs
+ movw TF_BACKW(TF_GS, %rsp),%gs
#endif
jmp .Lkexit
@@ -1578,7 +1588,7 @@
SWAPGS
.Lkexit:
- addq $TF_REGSIZE+16,%rsp /* + T_xxx and error code */
+ addq $16,%rsp /* 16 = T_xxx + error code */
do_iret:
iretq
END(intrfastexit)
diff -r 0404391e7fc6 -r 34ccd9eee411 sys/arch/amd64/amd64/trap.c
--- a/sys/arch/amd64/amd64/trap.c Sun Feb 25 12:28:18 2018 +0000
+++ b/sys/arch/amd64/amd64/trap.c Sun Feb 25 12:37:16 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: trap.c,v 1.111 2018/01/20 08:30:53 maxv Exp $ */
+/* $NetBSD: trap.c,v 1.112 2018/02/25 12:37:16 maxv Exp $ */
/*
* Copyright (c) 1998, 2000, 2017 The NetBSD Foundation, Inc.
@@ -64,7 +64,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.111 2018/01/20 08:30:53 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.112 2018/02/25 12:37:16 maxv Exp $");
#include "opt_ddb.h"
#include "opt_kgdb.h"
@@ -246,93 +246,6 @@
}
/*
- * Did we receive in kernel mode a trap that ought to be considered as a user
- * trap? If this function returns, the answer is no.
- *
- * Such traps can be triggered when the kernel fails to return to userland,
- * because of incorrect segment registers.
- */
-#ifndef XEN
-static void trap_user_kernelmode(struct trapframe *, int, lwp_t *, proc_t *);
-
-static void
-trap_user_kernelmode(struct trapframe *frame, int type, lwp_t *l, proc_t *p)
-{
- extern uint64_t do_mov_es, do_mov_ds, do_mov_fs, do_mov_gs;
- extern uint64_t do_iret;
- struct trapframe *vframe;
- ksiginfo_t ksi;
-
- if (frame->tf_rip == 0) {
Home |
Main Index |
Thread Index |
Old Index