Port-xen archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
TLS/lwp_private support
Hi all,
attached is a patch to properly implement %fs/%gs handling on amd64.
It is good enough to allow running a simple TLS test case in the linux
emulation after raising the faked kernel version to 2.6.19. I would like
to get both review and testing.
There is some code in cpu_switchto that is commented out as it can
potentially trap. This means that the selector values of 32bit
applications are leaked. Fixing this means either restructuring and
potentially a cli/sti pair in cpu_switchto or penalizing all system
calls. I don't think this is a security concern, so I am opting for the
faster approach here.
A few test cases can be found in
http://www.netbsd.org/~joerg/fsgs-test.tar.bz2.
Joerg
Index: arch/amd64/amd64/genassym.cf
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/amd64/amd64/genassym.cf,v
retrieving revision 1.44
diff -u -p -r1.44 genassym.cf
--- arch/amd64/amd64/genassym.cf 28 Apr 2010 19:17:03 -0000 1.44
+++ arch/amd64/amd64/genassym.cf 4 May 2010 01:24:19 -0000
@@ -188,6 +188,10 @@ define PCB_RSP0 offsetof(struct pcb, pc
define PCB_CR0 offsetof(struct pcb, pcb_cr0)
define PCB_ONFAULT offsetof(struct pcb, pcb_onfault)
define PCB_FPCPU offsetof(struct pcb, pcb_fpcpu)
+define PCB_FLAGS offsetof(struct pcb, pcb_flags)
+define PCB_COMPAT32 PCB_COMPAT32
+define PCB_FS offsetof(struct pcb, pcb_fs)
+define PCB_GS offsetof(struct pcb, pcb_gs)
define TF_RDI offsetof(struct trapframe, tf_rdi)
define TF_RSI offsetof(struct trapframe, tf_rsi)
@@ -227,6 +231,7 @@ define CPU_INFO_RESCHED offsetof(struct
define CPU_INFO_WANT_PMAPLOAD offsetof(struct cpu_info, ci_want_pmapload)
define CPU_INFO_PMAP_CPU offsetof(struct cpu_info, ci_pmap_cpu)
define CPU_INFO_TLBSTATE offsetof(struct cpu_info, ci_tlbstate)
+define CPU_INFO_NEED_COMPAT32 offsetof(struct cpu_info, ci_need_compat32)
define TLBSTATE_VALID TLBSTATE_VALID
define TLBSTATE_LAZY TLBSTATE_LAZY
define TLBSTATE_STALE TLBSTATE_STALE
Index: arch/amd64/amd64/locore.S
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/amd64/amd64/locore.S,v
retrieving revision 1.57
diff -u -p -r1.57 locore.S
--- arch/amd64/amd64/locore.S 18 Apr 2010 23:47:50 -0000 1.57
+++ arch/amd64/amd64/locore.S 4 May 2010 19:39:11 -0000
@@ -976,10 +976,48 @@ ENTRY(cpu_switchto)
/* Reloading CR0 is very expensive - avoid if possible. */
3: cmpq %rdx,%rcx
- je 4f
+ je 6f
movq %rcx,%cr0
#endif
+6: movl $1, CPUVAR(NEED_COMPAT32)
+ testl $PCB_COMPAT32, PCB_FLAGS(%r14)
+ jne 4f
+ movl $0, CPUVAR(NEED_COMPAT32)
+#ifdef notyet
+ movq L_MD_REGS(%r12), %rbx
+ movw TF_FS(%rbx), %fs
+#endif
+
+ movl $MSR_FSBASE, %ecx
+ rdmsr
+ cmpl PCB_FS(%r14), %eax
+ jne 7f
+ cmpl 4+PCB_FS(%r14), %edx
+ je 8f
+7: movl PCB_FS(%r14), %eax
+ movl 4+PCB_FS(%r14), %edx
+ wrmsr
+8:
+
+#ifdef notyet
+ movw %gs, %ax
+ cmpw %ax, TF_GS(%rbx)
+ je 9f
+ swapgs
+ movw TF_GS(%rbx), %gs
+ swapgs
+9:
+#endif
+ movl $MSR_KERNELGSBASE, %ecx
+ cmpl PCB_GS(%r14), %eax
+ jne 10f
+ cmpl 4+PCB_GS(%r14), %edx
+ je 4f
+10: movl PCB_GS(%r14), %eax
+ movl 4+PCB_GS(%r14), %edx
+ wrmsr
+
/* Return to the new LWP, returning 'oldlwp' in %rax. */
4: movq %r13,%rax
popq %r15
@@ -1088,14 +1126,17 @@ syscall_return:
cmpl $IPL_NONE,CPUVAR(ILEVEL)
jne 3f
#endif
+ cmpl $1, CPUVAR(NEED_COMPAT32)
#ifndef XEN
swapgs
#endif
movw TF_ES(%rsp),%es
+ jne 11f
movw TF_FS(%rsp),%fs
#ifndef XEN
movw TF_GS(%rsp),%gs
#endif
+11:
INTR_RESTORE_GPRS
movw $(LSEL(LUDATA_SEL, SEL_UPL)),%r11
movw %r11,%ds
Index: arch/amd64/amd64/netbsd32_machdep.c
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/amd64/amd64/netbsd32_machdep.c,v
retrieving revision 1.62
diff -u -p -r1.62 netbsd32_machdep.c
--- arch/amd64/amd64/netbsd32_machdep.c 23 Apr 2010 19:18:09 -0000 1.62
+++ arch/amd64/amd64/netbsd32_machdep.c 4 May 2010 01:48:15 -0000
@@ -145,7 +145,7 @@ netbsd32_setregs(struct lwp *l, struct e
netbsd32_adjust_limits(p);
l->l_md.md_flags &= ~MDP_USEDFPU;
- pcb->pcb_flags = 0;
+ pcb->pcb_flags = PCB_COMPAT32;
pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_NPXCW__;
pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;
pcb->pcb_savefpu.fp_fxsave.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
Index: arch/amd64/amd64/vector.S
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/amd64/amd64/vector.S,v
retrieving revision 1.33
diff -u -p -r1.33 vector.S
--- arch/amd64/amd64/vector.S 23 Feb 2010 06:27:40 -0000 1.33
+++ arch/amd64/amd64/vector.S 4 May 2010 13:16:09 -0000
@@ -154,9 +154,12 @@ IDTVEC(trap02)
movq %rsp,%rdi
incl CPUVAR(NTRAP)
call _C_LABEL(trap)
+ cmpl $1, CPUVAR(NEED_COMPAT32)
swapgs
+ jne 3f
movw TF_GS(%rsp),%gs
movw TF_FS(%rsp),%fs
+3:
movw TF_ES(%rsp),%es
movw TF_DS(%rsp),%ds
jmp 2f
Index: arch/amd64/include/frameasm.h
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/amd64/include/frameasm.h,v
retrieving revision 1.13
diff -u -p -r1.13 frameasm.h
--- arch/amd64/include/frameasm.h 21 Nov 2008 10:05:41 -0000 1.13
+++ arch/amd64/include/frameasm.h 4 May 2010 13:15:04 -0000
@@ -74,10 +74,12 @@
testq $SEL_UPL,TF_CS(%rsp) /* Interrupted %cs */ ; \
je 99f ; \
cli ; \
+ cmpl $1, CPUVAR(NEED_COMPAT32); \
swapgs ; \
+ jne 98f ; \
movw TF_GS(%rsp),%gs ; \
movw TF_FS(%rsp),%fs ; \
- movw TF_ES(%rsp),%es ; \
+98: movw TF_ES(%rsp),%es ; \
movw TF_DS(%rsp),%ds ; \
99: addq $TF_REGSIZE+16,%rsp /* + T_xxx and error code */ ; \
iretq
@@ -101,8 +103,10 @@
INTR_RESTORE_GPRS ; \
testq $SEL_UPL,TF_CS(%rsp) ; \
je 99f ; \
+ cmpl $1, CPUVAR(NEED_COMPAT32); \
+ jne 98f ; \
movw TF_FS(%rsp),%fs ; \
- movw TF_ES(%rsp),%es ; \
+98: movw TF_ES(%rsp),%es ; \
movw TF_DS(%rsp),%ds ; \
99: addq $TF_REGSIZE+16,%rsp /* + T_xxx and error code */ ; \
iretq
Index: arch/amd64/include/pcb.h
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/amd64/include/pcb.h,v
retrieving revision 1.16
diff -u -p -r1.16 pcb.h
--- arch/amd64/include/pcb.h 27 Oct 2009 03:05:28 -0000 1.16
+++ arch/amd64/include/pcb.h 4 May 2010 15:02:36 -0000
@@ -88,8 +88,7 @@
struct pcb {
int pcb_flags;
#define PCB_USER_LDT 0x01 /* has user-set LDT */
-#define PCB_GS64 0x02
-#define PCB_FS64 0x04
+#define PCB_COMPAT32 0x02
u_int pcb_cr0; /* saved image of CR0 */
uint64_t pcb_rsp0;
uint64_t pcb_cr2; /* page fault address (CR2) */
@@ -101,8 +100,8 @@ struct pcb {
struct savefpu pcb_savefpu __aligned(16); /* floating point state */
void *pcb_onfault; /* copyin/out fault recovery */
struct cpu_info *pcb_fpcpu; /* cpu holding our fp state. */
- uint64_t pcb_gs;
- uint64_t pcb_fs;
+ uint64_t pcb_fs; /* FSbase. Must be canonical. */
+ uint64_t pcb_gs; /* GSbase. Must be canonical. */
int pcb_iopl;
};
Index: arch/x86/include/cpu.h
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/x86/include/cpu.h,v
retrieving revision 1.21
diff -u -p -r1.21 cpu.h
--- arch/x86/include/cpu.h 18 Apr 2010 23:47:51 -0000 1.21
+++ arch/x86/include/cpu.h 4 May 2010 01:25:04 -0000
@@ -107,6 +107,9 @@ struct cpu_info {
struct pmap *ci_pmap; /* current pmap */
int ci_need_tlbwait; /* need to wait for TLB invalidations */
int ci_want_pmapload; /* pmap_load() is needed */
+#ifdef __x86_64__
+ int ci_need_compat32; /* %fs and %gs should be restored */
+#endif
volatile int ci_tlbstate; /* one of TLBSTATE_ states. see below */
#define TLBSTATE_VALID 0 /* all user tlbs are valid */
#define TLBSTATE_LAZY 1 /* tlbs are valid but won't be kept
uptodate */
Index: arch/x86/x86/pmap.c
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/x86/x86/pmap.c,v
retrieving revision 1.107
diff -u -p -r1.107 pmap.c
--- arch/x86/x86/pmap.c 18 Apr 2010 23:47:51 -0000 1.107
+++ arch/x86/x86/pmap.c 4 May 2010 01:19:01 -0000
@@ -2566,13 +2566,6 @@ pmap_activate(struct lwp *l)
pcb = lwp_getpcb(l);
ci->ci_want_pmapload = 1;
-
-#if defined(__x86_64__)
- if (pcb->pcb_flags & PCB_GS64)
- wrmsr(MSR_KERNELGSBASE, pcb->pcb_gs);
- if (pcb->pcb_flags & PCB_FS64)
- wrmsr(MSR_FSBASE, pcb->pcb_fs);
-#endif /* defined(__x86_64__) */
}
}
Index: arch/x86/x86/sys_machdep.c
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/x86/x86/sys_machdep.c,v
retrieving revision 1.23
diff -u -p -r1.23 sys_machdep.c
--- arch/x86/x86/sys_machdep.c 23 Apr 2010 16:07:33 -0000 1.23
+++ arch/x86/x86/sys_machdep.c 4 May 2010 01:51:11 -0000
@@ -628,7 +628,40 @@ x86_set_sdbase(void *arg, char which, lw
return 0;
#else
- return EINVAL;
+ struct pcb *pcb;
+ vaddr_t base;
+ int error;
+
+ if (direct) {
+ base = (vaddr_t)arg;
+ } else {
+ error = copyin(arg, &base, sizeof(base));
+ if (error != 0)
+ return error;
+ }
+
+ if (base >= VM_MAXUSER_ADDRESS)
+ return EINVAL;
+
+ pcb = lwp_getpcb(l);
+
+ switch(which) {
+ case 'f':
+ pcb->pcb_fs = base;
+ if (l == curlwp)
+ wrmsr(MSR_FSBASE, pcb->pcb_fs);
+ break;
+ case 'g':
+ pcb->pcb_gs = base;
+ if (l == curlwp)
+ wrmsr(MSR_KERNELGSBASE, pcb->pcb_gs);
+ break;
+ default:
+ panic("x86_get_sdbase");
+ }
+
+
+ return 0;
#endif
}
@@ -653,7 +686,23 @@ x86_get_sdbase(void *arg, char which)
base = sd->sd_hibase << 24 | sd->sd_lobase;
return copyout(&base, arg, sizeof(base));
#else
- return EINVAL;
+ vaddr_t base;
+ struct pcb *pcb;
+
+ pcb = lwp_getpcb(curlwp);
+
+ switch(which) {
+ case 'f':
+ base = pcb->pcb_fs;
+ break;
+ case 'g':
+ base = pcb->pcb_gs;
+ break;
+ default:
+ panic("x86_get_sdbase");
+ }
+
+ return copyout(&base, arg, sizeof(base));
#endif
}
Index: arch/x86/x86/vm_machdep.c
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/x86/x86/vm_machdep.c,v
retrieving revision 1.9
diff -u -p -r1.9 vm_machdep.c
--- arch/x86/x86/vm_machdep.c 23 Apr 2010 16:07:33 -0000 1.9
+++ arch/x86/x86/vm_machdep.c 4 May 2010 14:48:46 -0000
@@ -179,23 +179,16 @@ cpu_lwp_fork(struct lwp *l1, struct lwp
* newly-created child process to go directly to user level with a
* parent return value of 0 from fork(), while the parent process
* returns normally.
- *
- * Also, copy PCB %fs/%gs base from parent.
*/
uv = uvm_lwp_getuarea(l2);
#ifdef __x86_64__
pcb2->pcb_rsp0 = (uv + KSTACK_SIZE - 16) & ~0xf;
tf = (struct trapframe *)pcb2->pcb_rsp0 - 1;
-
- pcb2->pcb_fs = pcb1->pcb_fs;
- pcb2->pcb_gs = pcb1->pcb_gs;
#else
pcb2->pcb_esp0 = (uv + KSTACK_SIZE - 16);
tf = (struct trapframe *)pcb2->pcb_esp0 - 1;
- memcpy(&pcb2->pcb_fsd, &pcb1->pcb_fsd, sizeof(pcb2->pcb_fsd));
- memcpy(&pcb2->pcb_gsd, &pcb1->pcb_gsd, sizeof(pcb2->pcb_gsd));
pcb2->pcb_iomap = NULL;
#endif
l2->l_md.md_regs = tf;
Index: compat/linux/arch/amd64/linux_machdep.c
===================================================================
RCS file:
/home/joerg/repo/netbsd/src/sys/compat/linux/arch/amd64/linux_machdep.c,v
retrieving revision 1.37
diff -u -p -r1.37 linux_machdep.c
--- compat/linux/arch/amd64/linux_machdep.c 23 Nov 2009 00:46:06 -0000
1.37
+++ compat/linux/arch/amd64/linux_machdep.c 4 May 2010 01:51:44 -0000
@@ -490,7 +490,6 @@ linux_sys_arch_prctl(struct lwp *l,
syscallarg(unsigned long) addr;
} */
struct pcb *pcb = lwp_getpcb(l);
- struct trapframe *tf = l->l_md.md_regs;
int error;
uint64_t taddr;
@@ -500,19 +499,12 @@ linux_sys_arch_prctl(struct lwp *l,
if (taddr >= VM_MAXUSER_ADDRESS)
return EINVAL;
pcb->pcb_gs = taddr;
- pcb->pcb_flags |= PCB_GS64;
if (l == curlwp)
wrmsr(MSR_KERNELGSBASE, taddr);
break;
case LINUX_ARCH_GET_GS:
- if (pcb->pcb_flags & PCB_GS64)
- taddr = pcb->pcb_gs;
- else {
- error = memseg_baseaddr(l, tf->tf_fs, NULL, 0, &taddr);
- if (error != 0)
- return error;
- }
+ taddr = pcb->pcb_gs;
error = copyout(&taddr, (char *)SCARG(uap, addr), 8);
if (error != 0)
return error;
@@ -523,19 +515,12 @@ linux_sys_arch_prctl(struct lwp *l,
if (taddr >= VM_MAXUSER_ADDRESS)
return EINVAL;
pcb->pcb_fs = taddr;
- pcb->pcb_flags |= PCB_FS64;
if (l == curlwp)
wrmsr(MSR_FSBASE, taddr);
break;
case LINUX_ARCH_GET_FS:
- if (pcb->pcb_flags & PCB_FS64)
- taddr = pcb->pcb_fs;
- else {
- error = memseg_baseaddr(l, tf->tf_fs, NULL, 0, &taddr);
- if (error != 0)
- return error;
- }
+ taddr = pcb->pcb_fs;
error = copyout(&taddr, (char *)SCARG(uap, addr), 8);
if (error != 0)
return error;
Index: compat/linux32/arch/amd64/linux32_machdep.c
===================================================================
RCS file:
/home/joerg/repo/netbsd/src/sys/compat/linux32/arch/amd64/linux32_machdep.c,v
retrieving revision 1.23
diff -u -p -r1.23 linux32_machdep.c
--- compat/linux32/arch/amd64/linux32_machdep.c 23 Nov 2009 00:46:07 -0000
1.23
+++ compat/linux32/arch/amd64/linux32_machdep.c 4 May 2010 01:48:40 -0000
@@ -296,7 +296,7 @@ linux32_setregs(struct lwp *l, struct ex
netbsd32_adjust_limits(p);
l->l_md.md_flags &= ~MDP_USEDFPU;
- pcb->pcb_flags = 0;
+ pcb->pcb_flags = PCB_COMPAT32;
pcb->pcb_savefpu.fp_fxsave.fx_fcw = __Linux_NPXCW__;
pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;
pcb->pcb_savefpu.fp_fxsave.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
Home |
Main Index |
Thread Index |
Old Index