Subject: Re: port-xen/30977: FPU troubles
To: None <port-xen-maintainer@netbsd.org, gnats-admin@netbsd.org,>
From: Paul Ripke <stix@stix.id.au>
List: netbsd-bugs
Date: 12/18/2005 22:35:02
The following reply was made to PR port-xen/30977; it has been noted by GNATS.

From: Paul Ripke <stix@stix.id.au>
To: Manuel Bouyer <bouyer@antioche.eu.org>,
	Quentin Garnier <cube@cubidou.net>
Cc: gnats-bugs@NetBSD.org, port-xen@NetBSD.org
Subject: Re: port-xen/30977: FPU troubles
Date: Mon, 19 Dec 2005 09:34:01 +1100

 OK, I've had a closer look at this, and come up with the included patch,
 which appears to work fine for me, with threaded and non-threaded 
 programs.
 I can't repeat the behaviour of the FPU state being not being restored,
 restored to the wrong thread, or wiped.
 
 Thoughts behind the patch:
 - since clts() is a no-op under xen 2 (in xen 3, 
 HYPERVISOR_fpu_taskswitch
    takes an int parameter allowing set/clear), it makes no sense to 
 stts()
    inside the DNA handler. We can't clear it again.
 - I don't get the npxsave_lwp() calls in the DNA handler. On SMP, it's
    used to dump an LWP's FPU state from a different CPU. On a 
 uniprocessor,
    the FPU state has already been dumped by npxsave_cpu(). SMP support 
 isn't
    relevant for xen 2.
 
 Anyway, please test the patch and see how it goes. I note that my sound
 (SBlive) is still crook, looks like another bug.
 
 Index: sys/arch/xen/i386/machdep.c
 ===================================================================
 RCS file: /export/netbsd/cvsroot/src/sys/arch/xen/i386/machdep.c,v
 retrieving revision 1.13.2.3
 diff -u -d -b -w -r1.13.2.3 machdep.c
 --- sys/arch/xen/i386/machdep.c	25 Aug 2005 20:16:21 -0000	1.13.2.3
 +++ sys/arch/xen/i386/machdep.c	18 Dec 2005 08:00:53 -0000
 @@ -1,4 +1,4 @@
 -/*	$NetBSD$	*/
 +/*	$NetBSD: machdep.c,v 1.13.2.3 2005/08/25 20:16:21 tron Exp $	*/
   /*	NetBSD: machdep.c,v 1.559 2004/07/22 15:12:46 mycroft Exp 	*/
 
   /*-
 @@ -73,7 +73,7 @@
    */
 
   #include <sys/cdefs.h>
 -__KERNEL_RCSID(0, "$NetBSD$");
 +__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.13.2.3 2005/08/25 20:16:21 
 tron Exp $");
 
   #include "opt_beep.h"
   #include "opt_compat_ibcs2.h"
 @@ -1174,8 +1174,10 @@
 
   #if NNPX > 0
   	/* If we were using the FPU, forget about it. */
 -	if (l->l_addr->u_pcb.pcb_fpcpu != NULL)
 +	if (l->l_addr->u_pcb.pcb_fpcpu != NULL) {
   		npxsave_lwp(l, 0);
 +		HYPERVISOR_fpu_taskswitch();
 +	}
   #endif
 
   #ifdef USER_LDT
 @@ -2335,6 +2337,7 @@
   		 */
   		if (l->l_addr->u_pcb.pcb_fpcpu) {
   			npxsave_lwp(l, 1);
 +			HYPERVISOR_fpu_taskswitch();
   		}
   #endif
   		if (i386_use_fxsave) {
 @@ -2418,8 +2421,10 @@
   		/*
   		 * If we were using the FPU, forget that we were.
   		 */
 -		if (l->l_addr->u_pcb.pcb_fpcpu != NULL)
 +		if (l->l_addr->u_pcb.pcb_fpcpu != NULL) {
   			npxsave_lwp(l, 0);
 +			HYPERVISOR_fpu_taskswitch();
 +		}
   #endif
   		if (flags & _UC_FXSAVE) {
   			if (i386_use_fxsave) {
 Index: sys/arch/xen/i386/npx.c
 ===================================================================
 RCS file: /export/netbsd/cvsroot/src/sys/arch/xen/i386/npx.c,v
 retrieving revision 1.3.14.1
 diff -u -d -b -w -r1.3.14.1 npx.c
 --- sys/arch/xen/i386/npx.c	20 Mar 2005 14:38:21 -0000	1.3.14.1
 +++ sys/arch/xen/i386/npx.c	18 Dec 2005 21:40:44 -0000
 @@ -1,4 +1,4 @@
 -/*	$NetBSD$	*/
 +/*	$NetBSD: npx.c,v 1.3.14.1 2005/03/20 14:38:21 tron Exp $	*/
   /*	NetBSD: npx.c,v 1.103 2004/03/21 10:56:24 simonb Exp 	*/
 
   /*-
 @@ -68,10 +68,11 @@
    */
 
   #include <sys/cdefs.h>
 -__KERNEL_RCSID(0, "$NetBSD$");
 +__KERNEL_RCSID(0, "$NetBSD: npx.c,v 1.3.14.1 2005/03/20 14:38:21 tron 
 Exp $");
 
   #if 0
   #define IPRINTF(x)	printf x
 +#define	XXXXENDEBUG_LOW
   #else
   #define	IPRINTF(x)
   #endif
 @@ -575,12 +576,8 @@
   	KDASSERT(ci->ci_fpcurlwp == NULL);
   #ifndef MULTIPROCESSOR
   	KDASSERT(l->l_addr->u_pcb.pcb_fpcpu == NULL);
 -#else
 -	if (l->l_addr->u_pcb.pcb_fpcpu != NULL)
 -		npxsave_lwp(l, 1);
   #endif
   	l->l_addr->u_pcb.pcb_cr0 &= ~CR0_TS;
 -	clts();
   	s = splipi();
   	ci->ci_fpcurlwp = l;
   	l->l_addr->u_pcb.pcb_fpcpu = ci;
 @@ -629,11 +626,9 @@
   	if (ci->ci_fpcurlwp != NULL)
   		npxsave_cpu(ci, 1);
   	else {
 -		clts();
   		IPRINTF(("%s: fp init\n", ci->ci_dev->dv_xname));
   		fninit();
   		fwait();
 -		stts();
   	}
   	splx(s);
 
 @@ -641,12 +636,8 @@
   	KDASSERT(ci->ci_fpcurlwp == NULL);
   #ifndef MULTIPROCESSOR
   	KDASSERT(l->l_addr->u_pcb.pcb_fpcpu == NULL);
 -#else
 -	if (l->l_addr->u_pcb.pcb_fpcpu != NULL)
 -		npxsave_lwp(l, 1);
   #endif
   	l->l_addr->u_pcb.pcb_cr0 &= ~CR0_TS;
 -	clts();
   	s = splipi();
   	ci->ci_fpcurlwp = l;
   	l->l_addr->u_pcb.pcb_fpcpu = ci;
 @@ -710,7 +701,6 @@
   		  * which report FP failures via traps rather than irq13).
   		  * XXX punting for now..
   		  */
 -		clts();
   		ci->ci_fpsaving = 1;
   		fpu_save(&l->l_addr->u_pcb.pcb_savefpu);
   		ci->ci_fpsaving = 0;
 
 --
 Paul Ripke