Port-xen archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

TLS/lwp_private support



Hi all,
attached is a patch to properly implement %fs/%gs handling on amd64.
It is good enough to allow running a simple TLS test case in the linux
emulation after raising the faked kernel version to 2.6.19. I would like
to get both review and testing.

There is some code in cpu_switchto that is commented out as it can
potentially trap. This means that the selector values of 32bit
applications are leaked. Fixing this means either restructuring and
potentially a cli/sti pair in cpu_switchto or penalizing all system
calls. I don't think this is a security concern, so I am opting for the
faster approach here.

A few test cases can be found in
http://www.netbsd.org/~joerg/fsgs-test.tar.bz2.

Joerg
Index: arch/amd64/amd64/genassym.cf
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/amd64/amd64/genassym.cf,v
retrieving revision 1.44
diff -u -p -r1.44 genassym.cf
--- arch/amd64/amd64/genassym.cf        28 Apr 2010 19:17:03 -0000      1.44
+++ arch/amd64/amd64/genassym.cf        4 May 2010 01:24:19 -0000
@@ -188,6 +188,10 @@ define     PCB_RSP0                offsetof(struct pcb, pc
 define PCB_CR0                 offsetof(struct pcb, pcb_cr0)
 define PCB_ONFAULT             offsetof(struct pcb, pcb_onfault)
 define PCB_FPCPU               offsetof(struct pcb, pcb_fpcpu)
+define PCB_FLAGS               offsetof(struct pcb, pcb_flags)
+define PCB_COMPAT32            PCB_COMPAT32
+define PCB_FS                  offsetof(struct pcb, pcb_fs)
+define PCB_GS                  offsetof(struct pcb, pcb_gs)
 
 define TF_RDI                  offsetof(struct trapframe, tf_rdi)
 define TF_RSI                  offsetof(struct trapframe, tf_rsi)
@@ -227,6 +231,7 @@ define      CPU_INFO_RESCHED        offsetof(struct 
 define CPU_INFO_WANT_PMAPLOAD  offsetof(struct cpu_info, ci_want_pmapload)
 define CPU_INFO_PMAP_CPU       offsetof(struct cpu_info, ci_pmap_cpu)
 define CPU_INFO_TLBSTATE       offsetof(struct cpu_info, ci_tlbstate)
+define CPU_INFO_NEED_COMPAT32  offsetof(struct cpu_info, ci_need_compat32)
 define TLBSTATE_VALID          TLBSTATE_VALID
 define TLBSTATE_LAZY           TLBSTATE_LAZY
 define TLBSTATE_STALE          TLBSTATE_STALE
Index: arch/amd64/amd64/locore.S
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/amd64/amd64/locore.S,v
retrieving revision 1.57
diff -u -p -r1.57 locore.S
--- arch/amd64/amd64/locore.S   18 Apr 2010 23:47:50 -0000      1.57
+++ arch/amd64/amd64/locore.S   4 May 2010 19:39:11 -0000
@@ -976,10 +976,48 @@ ENTRY(cpu_switchto)
 
        /* Reloading CR0 is very expensive - avoid if possible. */
 3:     cmpq    %rdx,%rcx
-       je      4f
+       je      6f
        movq    %rcx,%cr0
 #endif
 
+6:     movl    $1, CPUVAR(NEED_COMPAT32)
+       testl   $PCB_COMPAT32, PCB_FLAGS(%r14)
+       jne     4f
+       movl    $0, CPUVAR(NEED_COMPAT32)
+#ifdef notyet
+       movq    L_MD_REGS(%r12), %rbx
+       movw    TF_FS(%rbx), %fs
+#endif
+
+       movl    $MSR_FSBASE, %ecx
+       rdmsr
+       cmpl    PCB_FS(%r14), %eax
+       jne     7f
+       cmpl    4+PCB_FS(%r14), %edx
+       je      8f
+7:     movl    PCB_FS(%r14), %eax
+       movl    4+PCB_FS(%r14), %edx
+       wrmsr
+8:
+
+#ifdef notyet
+       movw    %gs, %ax
+       cmpw    %ax, TF_GS(%rbx)
+       je 9f
+       swapgs
+       movw    TF_GS(%rbx), %gs
+       swapgs
+9:
+#endif
+       movl    $MSR_KERNELGSBASE, %ecx
+       cmpl    PCB_GS(%r14), %eax
+       jne     10f
+       cmpl    4+PCB_GS(%r14), %edx
+       je      4f
+10:    movl    PCB_GS(%r14), %eax
+       movl    4+PCB_GS(%r14), %edx
+       wrmsr
+
        /* Return to the new LWP, returning 'oldlwp' in %rax. */
 4:     movq    %r13,%rax
        popq    %r15
@@ -1088,14 +1126,17 @@ syscall_return:
        cmpl    $IPL_NONE,CPUVAR(ILEVEL)
        jne     3f
 #endif
+       cmpl    $1, CPUVAR(NEED_COMPAT32)
 #ifndef XEN
        swapgs
 #endif
        movw    TF_ES(%rsp),%es
+       jne     11f
        movw    TF_FS(%rsp),%fs
 #ifndef XEN
        movw    TF_GS(%rsp),%gs
 #endif
+11:
        INTR_RESTORE_GPRS
        movw    $(LSEL(LUDATA_SEL, SEL_UPL)),%r11
        movw    %r11,%ds
Index: arch/amd64/amd64/netbsd32_machdep.c
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/amd64/amd64/netbsd32_machdep.c,v
retrieving revision 1.62
diff -u -p -r1.62 netbsd32_machdep.c
--- arch/amd64/amd64/netbsd32_machdep.c 23 Apr 2010 19:18:09 -0000      1.62
+++ arch/amd64/amd64/netbsd32_machdep.c 4 May 2010 01:48:15 -0000
@@ -145,7 +145,7 @@ netbsd32_setregs(struct lwp *l, struct e
        netbsd32_adjust_limits(p);
 
        l->l_md.md_flags &= ~MDP_USEDFPU;
-       pcb->pcb_flags = 0;
+       pcb->pcb_flags = PCB_COMPAT32;
         pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_NPXCW__;
         pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;  
        pcb->pcb_savefpu.fp_fxsave.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
Index: arch/amd64/amd64/vector.S
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/amd64/amd64/vector.S,v
retrieving revision 1.33
diff -u -p -r1.33 vector.S
--- arch/amd64/amd64/vector.S   23 Feb 2010 06:27:40 -0000      1.33
+++ arch/amd64/amd64/vector.S   4 May 2010 13:16:09 -0000
@@ -154,9 +154,12 @@ IDTVEC(trap02)
        movq    %rsp,%rdi
        incl    CPUVAR(NTRAP)
        call    _C_LABEL(trap)
+       cmpl    $1, CPUVAR(NEED_COMPAT32)
        swapgs
+       jne 3f
        movw    TF_GS(%rsp),%gs
        movw    TF_FS(%rsp),%fs
+3:
        movw    TF_ES(%rsp),%es
        movw    TF_DS(%rsp),%ds
        jmp     2f
Index: arch/amd64/include/frameasm.h
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/amd64/include/frameasm.h,v
retrieving revision 1.13
diff -u -p -r1.13 frameasm.h
--- arch/amd64/include/frameasm.h       21 Nov 2008 10:05:41 -0000      1.13
+++ arch/amd64/include/frameasm.h       4 May 2010 13:15:04 -0000
@@ -74,10 +74,12 @@
        testq   $SEL_UPL,TF_CS(%rsp)    /* Interrupted %cs */ ; \
        je      99f                     ; \
        cli                             ; \
+       cmpl    $1, CPUVAR(NEED_COMPAT32); \
        swapgs                          ; \
+       jne 98f                         ; \
        movw    TF_GS(%rsp),%gs         ; \
        movw    TF_FS(%rsp),%fs         ; \
-       movw    TF_ES(%rsp),%es         ; \
+98:    movw    TF_ES(%rsp),%es         ; \
        movw    TF_DS(%rsp),%ds         ; \
 99:    addq    $TF_REGSIZE+16,%rsp     /* + T_xxx and error code */ ; \
        iretq
@@ -101,8 +103,10 @@
        INTR_RESTORE_GPRS               ; \
        testq   $SEL_UPL,TF_CS(%rsp)    ; \
        je      99f                     ; \
+       cmpl    $1, CPUVAR(NEED_COMPAT32); \
+       jne 98f                         ; \
        movw    TF_FS(%rsp),%fs         ; \
-       movw    TF_ES(%rsp),%es         ; \
+98:    movw    TF_ES(%rsp),%es         ; \
        movw    TF_DS(%rsp),%ds         ; \
 99:    addq    $TF_REGSIZE+16,%rsp     /* + T_xxx and error code */ ; \
        iretq
Index: arch/amd64/include/pcb.h
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/amd64/include/pcb.h,v
retrieving revision 1.16
diff -u -p -r1.16 pcb.h
--- arch/amd64/include/pcb.h    27 Oct 2009 03:05:28 -0000      1.16
+++ arch/amd64/include/pcb.h    4 May 2010 15:02:36 -0000
@@ -88,8 +88,7 @@
 struct pcb {
        int       pcb_flags;
 #define        PCB_USER_LDT    0x01            /* has user-set LDT */
-#define PCB_GS64       0x02
-#define PCB_FS64       0x04
+#define        PCB_COMPAT32    0x02
        u_int     pcb_cr0;              /* saved image of CR0 */
        uint64_t pcb_rsp0;
        uint64_t pcb_cr2;               /* page fault address (CR2) */
@@ -101,8 +100,8 @@ struct pcb {
        struct  savefpu pcb_savefpu __aligned(16); /* floating point state */
        void     *pcb_onfault;          /* copyin/out fault recovery */
        struct cpu_info *pcb_fpcpu;     /* cpu holding our fp state. */
-       uint64_t  pcb_gs;
-       uint64_t  pcb_fs;
+       uint64_t  pcb_fs;               /* FSbase. Must be canonical. */
+       uint64_t  pcb_gs;               /* GSbase. Must be canonical. */
        int pcb_iopl;
 };
 
Index: arch/x86/include/cpu.h
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/x86/include/cpu.h,v
retrieving revision 1.21
diff -u -p -r1.21 cpu.h
--- arch/x86/include/cpu.h      18 Apr 2010 23:47:51 -0000      1.21
+++ arch/x86/include/cpu.h      4 May 2010 01:25:04 -0000
@@ -107,6 +107,9 @@ struct cpu_info {
        struct pmap *ci_pmap;           /* current pmap */
        int ci_need_tlbwait;            /* need to wait for TLB invalidations */
        int ci_want_pmapload;           /* pmap_load() is needed */
+#ifdef __x86_64__
+       int ci_need_compat32;           /* %fs and %gs should be restored */
+#endif
        volatile int ci_tlbstate;       /* one of TLBSTATE_ states. see below */
 #define        TLBSTATE_VALID  0       /* all user tlbs are valid */
 #define        TLBSTATE_LAZY   1       /* tlbs are valid but won't be kept 
uptodate */
Index: arch/x86/x86/pmap.c
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/x86/x86/pmap.c,v
retrieving revision 1.107
diff -u -p -r1.107 pmap.c
--- arch/x86/x86/pmap.c 18 Apr 2010 23:47:51 -0000      1.107
+++ arch/x86/x86/pmap.c 4 May 2010 01:19:01 -0000
@@ -2566,13 +2566,6 @@ pmap_activate(struct lwp *l)
 
                pcb = lwp_getpcb(l);
                ci->ci_want_pmapload = 1;
-
-#if defined(__x86_64__)
-               if (pcb->pcb_flags & PCB_GS64)
-                       wrmsr(MSR_KERNELGSBASE, pcb->pcb_gs);
-               if (pcb->pcb_flags & PCB_FS64)
-                       wrmsr(MSR_FSBASE, pcb->pcb_fs);
-#endif /* defined(__x86_64__) */
        }
 }
 
Index: arch/x86/x86/sys_machdep.c
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/x86/x86/sys_machdep.c,v
retrieving revision 1.23
diff -u -p -r1.23 sys_machdep.c
--- arch/x86/x86/sys_machdep.c  23 Apr 2010 16:07:33 -0000      1.23
+++ arch/x86/x86/sys_machdep.c  4 May 2010 01:51:11 -0000
@@ -628,7 +628,40 @@ x86_set_sdbase(void *arg, char which, lw
 
        return 0;
 #else
-       return EINVAL;
+       struct pcb *pcb;
+       vaddr_t base;
+       int error;
+
+       if (direct) {
+               base = (vaddr_t)arg;
+       } else {
+               error = copyin(arg, &base, sizeof(base));
+               if (error != 0)
+                       return error;
+       }
+
+       if (base >= VM_MAXUSER_ADDRESS)
+               return EINVAL;
+
+       pcb = lwp_getpcb(l);
+
+       switch(which) {
+       case 'f':
+               pcb->pcb_fs = base;
+               if (l == curlwp)
+                       wrmsr(MSR_FSBASE, pcb->pcb_fs);
+               break;
+       case 'g':
+               pcb->pcb_gs = base;
+               if (l == curlwp)
+                       wrmsr(MSR_KERNELGSBASE, pcb->pcb_gs);
+               break;
+       default:
+               panic("x86_get_sdbase");
+       }
+
+
+       return 0;
 #endif
 }
 
@@ -653,7 +686,23 @@ x86_get_sdbase(void *arg, char which)
        base = sd->sd_hibase << 24 | sd->sd_lobase;
        return copyout(&base, arg, sizeof(base));
 #else
-       return EINVAL;
+       vaddr_t base;
+       struct pcb *pcb;
+
+       pcb = lwp_getpcb(curlwp);
+
+       switch(which) {
+       case 'f':
+               base = pcb->pcb_fs;
+               break;
+       case 'g':
+               base = pcb->pcb_gs;
+               break;
+       default:
+               panic("x86_get_sdbase");
+       }
+
+       return copyout(&base, arg, sizeof(base));
 #endif
 }
 
Index: arch/x86/x86/vm_machdep.c
===================================================================
RCS file: /home/joerg/repo/netbsd/src/sys/arch/x86/x86/vm_machdep.c,v
retrieving revision 1.9
diff -u -p -r1.9 vm_machdep.c
--- arch/x86/x86/vm_machdep.c   23 Apr 2010 16:07:33 -0000      1.9
+++ arch/x86/x86/vm_machdep.c   4 May 2010 14:48:46 -0000
@@ -179,23 +179,16 @@ cpu_lwp_fork(struct lwp *l1, struct lwp 
         * newly-created child process to go directly to user level with a
         * parent return value of 0 from fork(), while the parent process
         * returns normally.
-        * 
-        * Also, copy PCB %fs/%gs base from parent.
         */
        uv = uvm_lwp_getuarea(l2);
 
 #ifdef __x86_64__
        pcb2->pcb_rsp0 = (uv + KSTACK_SIZE - 16) & ~0xf;
        tf = (struct trapframe *)pcb2->pcb_rsp0 - 1;
-
-       pcb2->pcb_fs = pcb1->pcb_fs;
-       pcb2->pcb_gs = pcb1->pcb_gs;
 #else
        pcb2->pcb_esp0 = (uv + KSTACK_SIZE - 16);
        tf = (struct trapframe *)pcb2->pcb_esp0 - 1;
 
-       memcpy(&pcb2->pcb_fsd, &pcb1->pcb_fsd, sizeof(pcb2->pcb_fsd));
-       memcpy(&pcb2->pcb_gsd, &pcb1->pcb_gsd, sizeof(pcb2->pcb_gsd));
        pcb2->pcb_iomap = NULL;
 #endif
        l2->l_md.md_regs = tf;
Index: compat/linux/arch/amd64/linux_machdep.c
===================================================================
RCS file: 
/home/joerg/repo/netbsd/src/sys/compat/linux/arch/amd64/linux_machdep.c,v
retrieving revision 1.37
diff -u -p -r1.37 linux_machdep.c
--- compat/linux/arch/amd64/linux_machdep.c     23 Nov 2009 00:46:06 -0000      
1.37
+++ compat/linux/arch/amd64/linux_machdep.c     4 May 2010 01:51:44 -0000
@@ -490,7 +490,6 @@ linux_sys_arch_prctl(struct lwp *l,
                syscallarg(unsigned long) addr;
        } */
        struct pcb *pcb = lwp_getpcb(l);
-       struct trapframe *tf = l->l_md.md_regs;
        int error;
        uint64_t taddr;
 
@@ -500,19 +499,12 @@ linux_sys_arch_prctl(struct lwp *l,
                if (taddr >= VM_MAXUSER_ADDRESS)
                        return EINVAL;
                pcb->pcb_gs = taddr;
-               pcb->pcb_flags |= PCB_GS64;
                if (l == curlwp)
                        wrmsr(MSR_KERNELGSBASE, taddr);
                break;
 
        case LINUX_ARCH_GET_GS:
-               if (pcb->pcb_flags & PCB_GS64)
-                       taddr = pcb->pcb_gs;
-               else {
-                       error = memseg_baseaddr(l, tf->tf_fs, NULL, 0, &taddr);
-                       if (error != 0)
-                               return error;
-               }
+               taddr = pcb->pcb_gs;
                error = copyout(&taddr, (char *)SCARG(uap, addr), 8);
                if (error != 0)
                        return error;
@@ -523,19 +515,12 @@ linux_sys_arch_prctl(struct lwp *l,
                if (taddr >= VM_MAXUSER_ADDRESS)
                        return EINVAL;
                pcb->pcb_fs = taddr;
-               pcb->pcb_flags |= PCB_FS64;
                if (l == curlwp)
                        wrmsr(MSR_FSBASE, taddr);
                break;
 
        case LINUX_ARCH_GET_FS:
-               if (pcb->pcb_flags & PCB_FS64)
-                       taddr = pcb->pcb_fs;
-               else {
-                       error = memseg_baseaddr(l, tf->tf_fs, NULL, 0, &taddr);
-                       if (error != 0)
-                               return error;
-               }
+               taddr = pcb->pcb_fs;
                error = copyout(&taddr, (char *)SCARG(uap, addr), 8);
                if (error != 0)
                        return error;
Index: compat/linux32/arch/amd64/linux32_machdep.c
===================================================================
RCS file: 
/home/joerg/repo/netbsd/src/sys/compat/linux32/arch/amd64/linux32_machdep.c,v
retrieving revision 1.23
diff -u -p -r1.23 linux32_machdep.c
--- compat/linux32/arch/amd64/linux32_machdep.c 23 Nov 2009 00:46:07 -0000      
1.23
+++ compat/linux32/arch/amd64/linux32_machdep.c 4 May 2010 01:48:40 -0000
@@ -296,7 +296,7 @@ linux32_setregs(struct lwp *l, struct ex
        netbsd32_adjust_limits(p);
 
        l->l_md.md_flags &= ~MDP_USEDFPU;
-       pcb->pcb_flags = 0;
+       pcb->pcb_flags = PCB_COMPAT32;
        pcb->pcb_savefpu.fp_fxsave.fx_fcw = __Linux_NPXCW__;
        pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;
        pcb->pcb_savefpu.fp_fxsave.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;


Home | Main Index | Thread Index | Old Index