Subject: port-m68k/7220: m68k FPE precision fixes
To: None <gnats-bugs@gnats.netbsd.org>
From: None <kenn@synap.ne.jp>
List: netbsd-bugs
Date: 03/24/1999 09:34:52
>Number:         7220
>Category:       port-m68k
>Synopsis:       m68k FPE precision bugs fixed
>Confidential:   no
>Severity:       serious
>Priority:       high
>Responsible:    port-m68k-maintainer (NetBSD/m68k Portmasters)
>State:          open
>Class:          change-request
>Submitter-Id:   net
>Arrival-Date:   Tue Mar 23 16:50:00 1999
>Last-Modified:
>Originator:     Ken Nakata
>Organization:
>Release:        current as of 1999-03-14
>Environment:
	All m68k machines
System: NetBSD quark.nakata.com 1.3F NetBSD 1.3F (QUARK) #3: Wed Jul 22 11:30:13 JST 1998     hacker@quark.nakata.com:/s/src/sys/arch/mac68k/compile/QUARK mac68k

>Description:
	I've fixed most (not all) m68k FPE bugs that give bogus
calculation results, esp. fsqrt instruction.  Also, the internal FP
representation has been reduced from 115-bit mantissa to 67-bit
mantissa which reduced the required mantissa operation roughly by one
fourth.  I've done an extensive (though not exhaustive - it's
impossible!) test on the internal routines by feeding them randomly
generated FP numbers, and found that the new code is more precise than 
MC68040 FPU (it seems to have a rounding bug).

NOTE: code to deal with MC68LC040 stack frame has been added but this
has proven ineffective in dealing with the so-called MC68LC040
``random segfaults'' problem.  Further investigation and fixes
required.

>How-To-Repeat:
	N/A
>Fix:
        Diffs follow:

--- ../../src/sys/arch/m68k/fpe/files.fpe	Tue Dec 12 05:35:46 1995
+++ files.fpe	Mon Aug 10 18:56:16 1998
@@ -4,8 +4,8 @@
 # Included by ports that need it.
 
 file	arch/m68k/fpe/fpu_add.c			fpu_emulate
-file	arch/m68k/fpe/fpu_calcea.c		fpu_emulate
 file	arch/m68k/fpe/fpu_div.c			fpu_emulate
+file	arch/m68k/fpe/fpu_ea.c			fpu_emulate
 file	arch/m68k/fpe/fpu_emulate.c		fpu_emulate
 file	arch/m68k/fpe/fpu_exp.c			fpu_emulate
 file	arch/m68k/fpe/fpu_explode.c		fpu_emulate
--- ../../src/sys/arch/m68k/fpe/fpu_add.c	Thu May  2 20:39:38 1996
+++ fpu_add.c	Tue May 19 15:36:36 1998
@@ -63,7 +63,7 @@
 	register struct fpemu *fe;
 {
 	register struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2, *r;
-	register u_int r0, r1, r2, r3;
+	register u_int r0, r1, r2;
 	register int rd;
 
 	/*
@@ -139,8 +139,7 @@
 		 * (but remember to adjust the exponent).
 		 */
 		/* r->fp_mant = x->fp_mant + y->fp_mant */
-		FPU_ADDS(r->fp_mant[3], x->fp_mant[3], y->fp_mant[3]);
-		FPU_ADDCS(r->fp_mant[2], x->fp_mant[2], y->fp_mant[2]);
+		FPU_ADDS(r->fp_mant[2], x->fp_mant[2], y->fp_mant[2]);
 		FPU_ADDCS(r->fp_mant[1], x->fp_mant[1], y->fp_mant[1]);
 		FPU_ADDC(r0, x->fp_mant[0], y->fp_mant[0]);
 		if ((r->fp_mant[0] = r0) >= FP_2) {
@@ -172,13 +171,12 @@
 		 */
 		/* r->fp_mant = x->fp_mant - y->fp_mant */
 		FPU_SET_CARRY(y->fp_sticky);
-		FPU_SUBCS(r3, x->fp_mant[3], y->fp_mant[3]);
 		FPU_SUBCS(r2, x->fp_mant[2], y->fp_mant[2]);
 		FPU_SUBCS(r1, x->fp_mant[1], y->fp_mant[1]);
 		FPU_SUBC(r0, x->fp_mant[0], y->fp_mant[0]);
 		if (r0 < FP_2) {
 			/* cases i and ii */
-			if ((r0 | r1 | r2 | r3) == 0) {
+			if ((r0 | r1 | r2) == 0) {
 				/* case ii */
 				r->fp_class = FPC_ZERO;
 				r->fp_sign = (rd == FPCR_MINF);
@@ -196,12 +194,10 @@
 				panic("fpu_add");
 #endif
 			r->fp_sign = y->fp_sign;
-			FPU_SUBS(r3, 0, r3);
-			FPU_SUBCS(r2, 0, r2);
+			FPU_SUBS(r2, 0, r2);
 			FPU_SUBCS(r1, 0, r1);
 			FPU_SUBC(r0, 0, r0);
 		}
-		r->fp_mant[3] = r3;
 		r->fp_mant[2] = r2;
 		r->fp_mant[1] = r1;
 		r->fp_mant[0] = r0;
--- ../../src/sys/arch/m68k/fpe/fpu_arith.h	Tue Dec 12 05:35:46 1995
+++ fpu_arith.h	Mon Jun  1 13:05:10 1998
@@ -58,46 +58,7 @@
  * for example.
  */
 
-#ifdef sparc
-
-/* set up for extended-precision arithemtic */
-#define	FPU_DECL_CARRY
-
-/*
- * We have three kinds of add:
- *	add with carry:					  r = x + y + c
- *	add (ignoring current carry) and set carry:	c'r = x + y + 0
- *	add with carry and set carry:			c'r = x + y + c
- * The macros use `C' for `use carry' and `S' for `set carry'.
- * Note that the state of the carry is undefined after ADDC and SUBC,
- * so if all you have for these is `add with carry and set carry',
- * that is OK.
- *
- * The same goes for subtract, except that we compute x - y - c.
- *
- * Finally, we have a way to get the carry into a `regular' variable,
- * or set it from a value.  SET_CARRY turns 0 into no-carry, nonzero
- * into carry; GET_CARRY sets its argument to 0 or 1.
- */
-#define	FPU_ADDC(r, x, y) \
-	asm volatile("addx %1,%2,%0" : "=r"(r) : "r"(x), "r"(y))
-#define	FPU_ADDS(r, x, y) \
-	asm volatile("addcc %1,%2,%0" : "=r"(r) : "r"(x), "r"(y))
-#define	FPU_ADDCS(r, x, y) \
-	asm volatile("addxcc %1,%2,%0" : "=r"(r) : "r"(x), "r"(y))
-#define	FPU_SUBC(r, x, y) \
-	asm volatile("subx %1,%2,%0" : "=r"(r) : "r"(x), "r"(y))
-#define	FPU_SUBS(r, x, y) \
-	asm volatile("subcc %1,%2,%0" : "=r"(r) : "r"(x), "r"(y))
-#define	FPU_SUBCS(r, x, y) \
-	asm volatile("subxcc %1,%2,%0" : "=r"(r) : "r"(x), "r"(y))
-
-#define	FPU_GET_CARRY(r) asm volatile("addx %%g0,%%g0,%0" : "=r"(r))
-#define	FPU_SET_CARRY(v) asm volatile("addcc %0,-1,%%g0" : : "r"(v))
-
-#define	FPU_SHL1_BY_ADD	/* shift left 1 faster by ADDC than (a<<1)|(b>>31) */
-
-#else /* non sparc */
+#ifndef FPE_USE_ASM
 
 /* set up for extended-precision arithemtic */
 #define	FPU_DECL_CARRY quad_t fpu_carry, fpu_tmp;
@@ -150,4 +111,64 @@
 #define	FPU_GET_CARRY(r) (r) = (!!fpu_carry)
 #define	FPU_SET_CARRY(v) fpu_carry = ((v) != 0)
 
-#endif
+#else
+
+/* set up for extended-precision arithemtic */
+#define	FPU_DECL_CARRY register int fpu_tmp;
+
+/*
+ * We have three kinds of add:
+ *	add with carry:					  r = x + y + c
+ *	add (ignoring current carry) and set carry:	c'r = x + y + 0
+ *	add with carry and set carry:			c'r = x + y + c
+ * The macros use `C' for `use carry' and `S' for `set carry'.
+ * Note that the state of the carry is undefined after ADDC and SUBC,
+ * so if all you have for these is `add with carry and set carry',
+ * that is OK.
+ *
+ * The same goes for subtract, except that we compute x - y - c.
+ *
+ * Finally, we have a way to get the carry into a `regular' variable,
+ * or set it from a value.  SET_CARRY turns 0 into no-carry, nonzero
+ * into carry; GET_CARRY sets its argument to 0 or 1.
+ */
+#define	FPU_ADDC(r, x, y)						\
+	{								\
+		asm volatile("movel %1,%0" : "=d"(fpu_tmp) : "g"(x));	\
+		asm volatile("addxl %1,%0" : "=d"(fpu_tmp) : "d"(y));	\
+		asm volatile("movel %1,%0" : "=g"(r) : "r"(fpu_tmp));	\
+	}
+#define	FPU_ADDS(r, x, y)						\
+	{								\
+		asm volatile("movel %1,%0" : "=d"(fpu_tmp) : "g"(x));	\
+		asm volatile("addl %1,%0" : "=d"(fpu_tmp) : "g"(y));	\
+		asm volatile("movel %1,%0" : "=g"(r) : "r"(fpu_tmp));	\
+	}
+#define	FPU_ADDCS(r, x, y) FPU_ADDC(r, x, y)
+
+#define	FPU_SUBC(r, x, y)						\
+	{								\
+		asm volatile("movel %1,%0" : "=d"(fpu_tmp) : "g"(x));	\
+		asm volatile("subxl %1,%0" : "=d"(fpu_tmp) : "d"(y));	\
+		asm volatile("movel %1,%0" : "=g"(r) : "r"(fpu_tmp));	\
+	}
+#define	FPU_SUBS(r, x, y)						\
+	{								\
+		asm volatile("movel %1,%0" : "=d"(fpu_tmp) : "g"(x));	\
+		asm volatile("subl %1,%0" : "=d"(fpu_tmp) : "g"(y));	\
+		asm volatile("movel %1,%0" : "=g"(r) : "r"(fpu_tmp));	\
+	}
+#define	FPU_SUBCS(r, x, y) FPU_SUBC(r, x, y)
+
+#define	FPU_GET_CARRY(r)				\
+	{						\
+		asm volatile("moveq #0,%0" : "=d"(r));	\
+		asm volatile("addxl %0,%0" : "+d"(r));	\
+	}
+#define	FPU_SET_CARRY(v)						\
+	{								\
+		asm volatile("moveq #0,%0" : "=d"(fpu_tmp));		\
+		asm volatile("subl %1,%0" : "=d"(fpu_tmp) : "g"(v));	\
+	}
+
+#endif /* FPE_USE_ASM */
--- ../../src/sys/arch/m68k/fpe/fpu_div.c	Tue Dec 12 05:35:49 1995
+++ fpu_div.c	Tue May 19 15:38:11 1998
@@ -156,7 +156,7 @@
 {
 	register struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2;
 	register u_int q, bit;
-	register u_int r0, r1, r2, r3, d0, d1, d2, d3, y0, y1, y2, y3;
+	register u_int r0, r1, r2, d0, d1, d2, y0, y1, y2;
 	FPU_DECL_CARRY
 
 	fe->fe_fpsr &= ~FPSR_EXCP; /* clear all exceptions */
@@ -205,7 +205,7 @@
 	 */
 
 #define	SUBTRACT		/* D = R - Y */ \
-	FPU_SUBS(d3, r3, y3); FPU_SUBCS(d2, r2, y2); \
+	FPU_SUBS(d2, r2, y2); \
 	FPU_SUBCS(d1, r1, y1); FPU_SUBC(d0, r0, y0)
 
 #define	NONNEGATIVE		/* D >= 0 */ \
@@ -213,12 +213,12 @@
 
 #ifdef FPU_SHL1_BY_ADD
 #define	SHL1			/* R <<= 1 */ \
-	FPU_ADDS(r3, r3, r3); FPU_ADDCS(r2, r2, r2); \
+	FPU_ADDS(r2, r2, r2); \
 	FPU_ADDCS(r1, r1, r1); FPU_ADDC(r0, r0, r0)
 #else
 #define	SHL1 \
 	r0 = (r0 << 1) | (r1 >> 31), r1 = (r1 << 1) | (r2 >> 31), \
-	r2 = (r2 << 1) | (r3 >> 31), r3 <<= 1
+	r2 <<= 1
 #endif
 
 #define	LOOP			/* do ... while (bit >>= 1) */ \
@@ -227,7 +227,7 @@
 		SUBTRACT; \
 		if (NONNEGATIVE) { \
 			q |= bit; \
-			r0 = d0, r1 = d1, r2 = d2, r3 = d3; \
+			r0 = d0, r1 = d1, r2 = d2; \
 		} \
 	} while ((bit >>= 1) != 0)
 
@@ -241,17 +241,15 @@
 	r0 = x->fp_mant[0];
 	r1 = x->fp_mant[1];
 	r2 = x->fp_mant[2];
-	r3 = x->fp_mant[3];
 	y0 = y->fp_mant[0];
 	y1 = y->fp_mant[1];
 	y2 = y->fp_mant[2];
-	y3 = y->fp_mant[3];
 
 	bit = FP_1;
 	SUBTRACT;
 	if (NONNEGATIVE) {
 		x->fp_exp -= y->fp_exp;
-		r0 = d0, r1 = d1, r2 = d2, r3 = d3;
+		r0 = d0, r1 = d1, r2 = d2;
 		q = bit;
 		bit >>= 1;
 	} else {
@@ -262,8 +260,7 @@
 	x->fp_mant[0] = q;
 	WORD(x, 1);
 	WORD(x, 2);
-	WORD(x, 3);
-	x->fp_sticky = r0 | r1 | r2 | r3;
+	x->fp_sticky = r0 | r1 | r2;
 
 	return (x);
 }
--- ../../src/sys/arch/m68k/fpe/fpu_emulate.c	Sun Jul  5 20:10:01 1998
+++ fpu_emulate.c	Wed Jun  3 16:17:08 1998
@@ -1,4 +1,4 @@
-/*	$NetBSD: fpu_emulate.c,v 1.20 1998/07/04 22:18:27 jonathan Exp $	*/
+/*	$NetBSD: fpu_emulate.c,v 1.19 1997/11/03 11:10:48 is Exp $	*/
 
 /*
  * Copyright (c) 1995 Gordon W. Ross
@@ -36,14 +36,12 @@
  * XXX - Just a start at it for now...
  */
 
-#include "opt_ddb.h"
-
 #include <sys/types.h>
 #include <sys/signal.h>
 #include <sys/systm.h>
 #include <machine/frame.h>
 
-#if defined(DDB) && defined(DEBUG)
+#if defined(DDB) && defined(DEBUG_FPE)
 # include <m68k/db_machdep.h>
 #endif
 
@@ -57,29 +55,13 @@
 static int test_cc __P((struct fpemu *fe, int pred));
 static struct fpn *fpu_cmp __P((struct fpemu *fe));
 
-#if !defined(DL_DEFAULT)
-#  if defined(DEBUG_WITH_FPU)
-#    define DL_DEFAULT DL_ALL
-#  else
-#    define DL_DEFAULT 0
-#  endif
-#endif
-
-int fpu_debug_level;
-#if DEBUG
-static int global_debug_level = DL_DEFAULT;
-#endif
-
-#define DUMP_INSN(insn)							\
-if (fpu_debug_level & DL_DUMPINSN) {					\
-    printf("  fpu_emulate: insn={adv=%d,siz=%d,op=%04x,w1=%04x}\n",	\
+#if DEBUG_FPE
+#  define DUMP_INSN(insn)						\
+    printf("fpu_emulate: insn={adv=%d,siz=%d,op=%04x,w1=%04x}\n",	\
 	   (insn)->is_advance, (insn)->is_datasize,			\
-	   (insn)->is_opcode, (insn)->is_word1);			\
-}
-
-#ifdef DEBUG_WITH_FPU
-/* mock fpframe for FPE - it's never overwritten by the real fpframe */
-struct fpframe mockfpf;
+	   (insn)->is_opcode, (insn)->is_word1)
+#else
+#  define DUMP_INSN(insn)
 #endif
 
 /*
@@ -94,37 +76,28 @@
 {
     static struct instruction insn;
     static struct fpemu fe;
+#if 0
     u_int savedpc = 0;	/* XXX work around gcc -O lossage */
+#endif
     int word, optype, sig;
 
-#ifdef DEBUG
+
     /* initialize insn.is_datasize to tell it is *not* initialized */
     insn.is_datasize = -1;
-#endif
+
     fe.fe_frame = frame;
-#ifdef DEBUG_WITH_FPU
-    fe.fe_fpframe = &mockfpf;
-    fe.fe_fpsr = mockfpf.fpf_fpsr;
-    fe.fe_fpcr = mockfpf.fpf_fpcr;
-#else
     fe.fe_fpframe = fpf;
     fe.fe_fpsr = fpf->fpf_fpsr;
     fe.fe_fpcr = fpf->fpf_fpcr;
-#endif
 
-#ifdef DEBUG
-    if ((fpu_debug_level = (fe.fe_fpcr >> 16) & 0x0000ffff) == 0) {
-	/* set the default */
-	fpu_debug_level = global_debug_level;
-    }
+#if DEBUG_FPE
+    printf("ENTERING fpu_emulate: FPSR=%08x, FPCR=%08x\n",
+	   fe.fe_fpsr, fe.fe_fpcr);
 #endif
 
-    if (fpu_debug_level & DL_VERBOSE) {
-	printf("ENTERING fpu_emulate: FPSR=%08x, FPCR=%08x\n",
-	       fe.fe_fpsr, fe.fe_fpcr);
-    }
     /* always set this (to avoid a warning) */
-    savedpc = frame->f_pc;
+    insn.is_pc = frame->f_pc;
+    insn.is_nextpc = 0;
     if (frame->f_format == 4) {
 	/*
 	 * A format 4 is generated by the 68{EC,LC}040.  The PC is
@@ -137,33 +110,28 @@
 	 * sizes of all instructions we run across.  This may not
 	 * be true, so we save the PC in order to restore it later.
 	 */
-	frame->f_pc = frame->f_fmt4.f_fslw;
+	insn.is_pc = frame->f_fmt4.f_fslw;
+	insn.is_nextpc = frame->f_pc;
     }
 
-    word = fusword((void *) (frame->f_pc));
+    word = fusword((void *) (insn.is_pc));
     if (word < 0) {
 #ifdef DEBUG
-	printf("  fpu_emulate: fault reading opcode\n");
+	printf("fpu_emulate: fault reading opcode\n");
 #endif
 	return SIGSEGV;
     }
 
     if ((word & 0xf000) != 0xf000) {
 #ifdef DEBUG
-	printf("  fpu_emulate: not coproc. insn.: opcode=0x%x\n", word);
+	printf("fpu_emulate: not coproc. insn.: opcode=0x%x\n", word);
 #endif
 	return SIGILL;
     }
 
-    if (
-#ifdef  DEBUG_WITH_FPU
-	(word & 0x0E00) != 0x0c00 /* accept fake ID == 6 */
-#else
-	(word & 0x0E00) != 0x0200
-#endif
-	) {
+    if ((word & 0x0E00) != 0x0200) {
 #ifdef DEBUG
-	printf("  fpu_emulate: bad coproc. id: opcode=0x%x\n", word);
+	printf("fpu_emulate: bad coproc. id: opcode=0x%x\n", word);
 #endif
 	return SIGILL;
     }
@@ -171,10 +139,10 @@
     insn.is_opcode = word;
     optype = (word & 0x01C0);
 
-    word = fusword((void *) (frame->f_pc + 2));
+    word = fusword((void *) (insn.is_pc + 2));
     if (word < 0) {
 #ifdef DEBUG
-	printf("  fpu_emulate: fault reading word1\n");
+	printf("fpu_emulate: fault reading word1\n");
 #endif
 	return SIGSEGV;
     }
@@ -192,49 +160,58 @@
     if (optype == 0x0000) {
 	/* type=0: generic */
 	if ((word & 0xc000) == 0xc000) {
-	    if (fpu_debug_level & DL_INSN)
-		printf("  fpu_emulate: fmovm FPr\n");
+#if DEBUG_FPE
+	    printf("fpu_emulate: fmovm FPr\n");
+#endif
 	    sig = fpu_emul_fmovm(&fe, &insn);
 	} else if ((word & 0xc000) == 0x8000) {
-	    if (fpu_debug_level & DL_INSN)
-		printf("  fpu_emulate: fmovm FPcr\n");
+#if DEBUG_FPE
+	    printf("fpu_emulate: fmovm FPcr\n");
+#endif
 	    sig = fpu_emul_fmovmcr(&fe, &insn);
 	} else if ((word & 0xe000) == 0x6000) {
 	    /* fstore = fmove FPn,mem */
-	    if (fpu_debug_level & DL_INSN)
-		printf("  fpu_emulate: fmove to mem\n");
+#if DEBUG_FPE
+	    printf("fpu_emulate: fmove to mem\n");
+#endif
 	    sig = fpu_emul_fstore(&fe, &insn);
 	} else if ((word & 0xfc00) == 0x5c00) {
 	    /* fmovecr */
-	    if (fpu_debug_level & DL_INSN)
-		printf("  fpu_emulate: fmovecr\n");
+#if DEBUG_FPE
+	    printf("fpu_emulate: fmovecr\n");
+#endif
 	    sig = fpu_emul_fmovecr(&fe, &insn);
 	} else if ((word & 0xa07f) == 0x26) {
 	    /* fscale */
-	    if (fpu_debug_level & DL_INSN)
-		printf("  fpu_emulate: fscale\n");
+#if DEBUG_FPE
+	    printf("fpu_emulate: fscale\n");
+#endif
 	    sig = fpu_emul_fscale(&fe, &insn);
 	} else {
-	    if (fpu_debug_level & DL_INSN)
-		printf("  fpu_emulte: other type0\n");
+#if DEBUG_FPE
+	    printf("fpu_emulate: other type0\n");
+#endif
 	    /* all other type0 insns are arithmetic */
 	    sig = fpu_emul_arith(&fe, &insn);
 	}
 	if (sig == 0) {
-	    if (fpu_debug_level & DL_VERBOSE)
-		printf("  fpu_emulate: type 0 returned 0\n");
+#if DEBUG_FPE
+	    printf("fpu_emulate: type 0 returned 0\n");
+#endif
 	    sig = fpu_upd_excp(&fe);
 	}
     } else if (optype == 0x0080 || optype == 0x00C0) {
 	/* type=2 or 3: fbcc, short or long disp. */
-	if (fpu_debug_level & DL_INSN)
-	    printf("  fpu_emulate: fbcc %s\n",
-		   (optype & 0x40) ? "long" : "short");
+#if DEBUG_FPE
+	printf("fpu_emulate: fbcc %s\n",
+	       (optype & 0x40) ? "long" : "short");
+#endif
 	sig = fpu_emul_brcc(&fe, &insn);
     } else if (optype == 0x0040) {
 	/* type=1: fdbcc, fscc, ftrapcc */
-	if (fpu_debug_level & DL_INSN)
-	    printf("  fpu_emulate: type1\n");
+#if DEBUG_FPE
+	printf("fpu_emulate: type1\n");
+#endif
 	sig = fpu_emul_type1(&fe, &insn);
     } else {
 	/* type=4: fsave    (privileged) */
@@ -242,7 +219,7 @@
 	/* type=6: reserved */
 	/* type=7: reserved */
 #ifdef DEBUG
-	printf(" fpu_emulate: bad opcode type: opcode=0x%x\n", insn.is_opcode);
+	printf("fpu_emulate: bad opcode type: opcode=0x%x\n", insn.is_opcode);
 #endif
 	sig = SIGILL;
     }
@@ -258,17 +235,19 @@
 	frame->f_pc += insn.is_advance;
 #if defined(DDB) && defined(DEBUG)
     else {
-	printf(" fpu_emulate: sig=%d, opcode=%x, word1=%x\n",
+	printf("fpu_emulate: sig=%d, opcode=%x, word1=%x\n",
 	       sig, insn.is_opcode, insn.is_word1);
 	kdb_trap(-1, (db_regs_t *)&frame);
     }
 #endif
     if (frame->f_format == 4)
-	frame->f_pc = savedpc;	/* XXX Restore PC -- 68{EC,LC}040 only */
+	/* XXX Restore PC -- 68{EC,LC}040 only */
+	frame->f_pc = insn.is_nextpc;
 
-    if (fpu_debug_level & DL_VERBOSE)
-	printf("EXITING fpu_emulate: w/FPSR=%08x, FPCR=%08x\n",
-	       fe.fe_fpsr, fe.fe_fpcr);
+#if DEBUG_FPE
+    printf("EXITING fpu_emulate: w/FPSR=%08x, FPCR=%08x\n",
+	   fe.fe_fpsr, fe.fe_fpcr);
+#endif
 
     return (sig);
 }
@@ -314,56 +293,64 @@
 {
     u_int fpsr;
 
-    if (fpu_debug_level & DL_RESULT)
-	printf("  fpu_upd_fpsr: previous fpsr=%08x\n", fe->fe_fpsr);
-
+#if DEBUG_FPE
+    printf("fpu_upd_fpsr: previous fpsr=%08x\n", fe->fe_fpsr);
+#endif
     /* clear all condition code */
     fpsr = fe->fe_fpsr & ~FPSR_CCB;
 
-    if (fpu_debug_level & DL_RESULT)
-	printf("  fpu_upd_fpsr: result is a ");
-
+#if DEBUG_FPE
+    printf("fpu_upd_fpsr: result is a ");
+#endif
     if (fp->fp_sign) {
-	if (fpu_debug_level & DL_RESULT)
-	    printf("negative ");
+#if DEBUG_FPE
+	printf("negative ");
+#endif
 	fpsr |= FPSR_NEG;
+#if DEBUG_FPE
     } else {
-	if (fpu_debug_level & DL_RESULT)
-	    printf("positive ");
+	printf("positive ");
+#endif
     }
 
     switch (fp->fp_class) {
     case FPC_SNAN:
-	if (fpu_debug_level & DL_RESULT)
-	    printf("signaling NAN\n");
+#if DEBUG_FPE
+	printf("signaling NAN\n");
+#endif
 	fpsr |= (FPSR_NAN | FPSR_SNAN);
 	break;
     case FPC_QNAN:
-	if (fpu_debug_level & DL_RESULT)
-	    printf("quiet NAN\n");
+#if DEBUG_FPE
+	printf("quiet NAN\n");
+#endif
 	fpsr |= FPSR_NAN;
 	break;
     case FPC_ZERO:
-	if (fpu_debug_level & DL_RESULT)
-	    printf("Zero\n");
+#if DEBUG_FPE
+	printf("Zero\n");
+#endif
 	fpsr |= FPSR_ZERO;
 	break;
     case FPC_INF:
-	if (fpu_debug_level & DL_RESULT)
-	    printf("Inf\n");
+#if DEBUG_FPE
+	printf("Inf\n");
+#endif
 	fpsr |= FPSR_INF;
 	break;
     default:
-	if (fpu_debug_level & DL_RESULT)
-	    printf("Number\n");
+#if DEBUG_FPE
+	printf("Number\n");
+#endif
 	/* anything else is treated as if it is a number */
 	break;
     }
 
     fe->fe_fpsr = fe->fe_fpframe->fpf_fpsr = fpsr;
 
-    if (fpu_debug_level & DL_RESULT)
-	printf("  fpu_upd_fpsr: new fpsr=%08x\n", fe->fe_fpframe->fpf_fpsr);
+#if DEBUG_FPE
+    printf("fpu_upd_fpsr: new fpsr=%08x\n", fe->fe_fpframe->fpf_fpsr);
+#endif
 
     return fpsr;
 }
@@ -386,34 +373,34 @@
 
     insn->is_datasize = 4;
     insn->is_advance = 4;
-    sig = fpu_decode_ea(frame, insn, &insn->is_ea0, insn->is_opcode);
+    sig = fpu_decode_ea(frame, insn, &insn->is_ea, insn->is_opcode);
     if (sig) { return sig; }
 
     if (reglist != 1 && reglist != 2 && reglist != 4 &&
-	(insn->is_ea0.ea_flags & EA_DIRECT)) {
+	(insn->is_ea.ea_flags & EA_DIRECT)) {
 	/* attempted to copy more than one FPcr to CPU regs */
 #ifdef DEBUG
-	printf("  fpu_emul_fmovmcr: tried to copy too many FPcr\n");
+	printf("fpu_emul_fmovmcr: tried to copy too many FPcr\n");
 #endif
 	return SIGILL;
     }
 
     if (reglist & 4) {
 	/* fpcr */
-	if ((insn->is_ea0.ea_flags & EA_DIRECT) &&
-	    insn->is_ea0.ea_regnum >= 8 /* address reg */) {
+	if ((insn->is_ea.ea_flags & EA_DIRECT) &&
+	    insn->is_ea.ea_regnum >= 8 /* address reg */) {
 	    /* attempted to copy FPCR to An */
 #ifdef DEBUG
-	    printf("  fpu_emul_fmovmcr: tried to copy FPCR from/to A%d\n",
-		   insn->is_ea0.ea_regnum & 7);
+	    printf("fpu_emul_fmovmcr: tried to copy FPCR from/to A%d\n",
+		   insn->is_ea.ea_regnum & 7);
 #endif
 	    return SIGILL;
 	}
 	if (fpu_to_mem) {
-	    sig = fpu_store_ea(frame, insn, &insn->is_ea0,
+	    sig = fpu_store_ea(frame, insn, &insn->is_ea,
 			       (char *)&fpf->fpf_fpcr);
 	} else {
-	    sig = fpu_load_ea(frame, insn, &insn->is_ea0,
+	    sig = fpu_load_ea(frame, insn, &insn->is_ea,
 			      (char *)&fpf->fpf_fpcr);
 	}
     }
@@ -421,20 +408,20 @@
 
     if (reglist & 2) {
 	/* fpsr */
-	if ((insn->is_ea0.ea_flags & EA_DIRECT) &&
-	    insn->is_ea0.ea_regnum >= 8 /* address reg */) {
+	if ((insn->is_ea.ea_flags & EA_DIRECT) &&
+	    insn->is_ea.ea_regnum >= 8 /* address reg */) {
 	    /* attempted to copy FPSR to An */
 #ifdef DEBUG
-	    printf("  fpu_emul_fmovmcr: tried to copy FPSR from/to A%d\n",
-		   insn->is_ea0.ea_regnum & 7);
+	    printf("fpu_emul_fmovmcr: tried to copy FPSR from/to A%d\n",
+		   insn->is_ea.ea_regnum & 7);
 #endif
 	    return SIGILL;
 	}
 	if (fpu_to_mem) {
-	    sig = fpu_store_ea(frame, insn, &insn->is_ea0,
+	    sig = fpu_store_ea(frame, insn, &insn->is_ea,
 			       (char *)&fpf->fpf_fpsr);
 	} else {
-	    sig = fpu_load_ea(frame, insn, &insn->is_ea0,
+	    sig = fpu_load_ea(frame, insn, &insn->is_ea,
 			      (char *)&fpf->fpf_fpsr);
 	}
     }
@@ -443,10 +430,10 @@
     if (reglist & 1) {
 	/* fpiar - can be moved to/from An */
 	if (fpu_to_mem) {
-	    sig = fpu_store_ea(frame, insn, &insn->is_ea0,
+	    sig = fpu_store_ea(frame, insn, &insn->is_ea,
 			       (char *)&fpf->fpf_fpiar);
 	} else {
-	    sig = fpu_load_ea(frame, insn, &insn->is_ea0,
+	    sig = fpu_load_ea(frame, insn, &insn->is_ea,
 			      (char *)&fpf->fpf_fpiar);
 	}
     }
@@ -500,13 +487,13 @@
     reglist &= 0xFF;
 
     /* Get effective address. (modreg=opcode&077) */
-    sig = fpu_decode_ea(frame, insn, &insn->is_ea0, insn->is_opcode);
+    sig = fpu_decode_ea(frame, insn, &insn->is_ea, insn->is_opcode);
     if (sig) { return sig; }
 
     /* Get address of soft coprocessor regs. */
     fpregs = &fpf->fpf_regs[0];
 
-    if (insn->is_ea0.ea_flags & EA_PREDECR) {
+    if (insn->is_ea.ea_flags & EA_PREDECR) {
 	regnum = 7;
 	order = -1;
     } else {
@@ -514,30 +501,30 @@
 	order = 1;
     }
 
+    regmask = 0x80;
     while ((0 <= regnum) && (regnum < 8)) {
-	if (w1_post_incr)
-	    regmask = 0x80 >> regnum;
-	else
-	    regmask = 1 << regnum;
 	if (regmask & reglist) {
 	    if (fpu_to_mem) {
-		sig = fpu_store_ea(frame, insn, &insn->is_ea0,
+		sig = fpu_store_ea(frame, insn, &insn->is_ea,
 				   (char*)&fpregs[regnum * 3]);
-		if (fpu_debug_level & DL_RESULT)
-		    printf("  fpu_emul_fmovm: FP%d (%08x,%08x,%08x) saved\n",
-			   regnum, fpregs[regnum * 3], fpregs[regnum * 3 + 1],
-			   fpregs[regnum * 3 + 2]);
+#if DEBUG_FPE
+		printf("fpu_emul_fmovm: FP%d (%08x,%08x,%08x) saved\n",
+		       regnum, fpregs[regnum * 3], fpregs[regnum * 3 + 1],
+		       fpregs[regnum * 3 + 2]);
+#endif
 	    } else {		/* mem to fpu */
-		sig = fpu_load_ea(frame, insn, &insn->is_ea0,
+		sig = fpu_load_ea(frame, insn, &insn->is_ea,
 				  (char*)&fpregs[regnum * 3]);
-		if (fpu_debug_level & DL_RESULT)
-		    printf("  fpu_emul_fmovm: FP%d (%08x,%08x,%08x) loaded\n",
-			   regnum, fpregs[regnum * 3], fpregs[regnum * 3 + 1],
-			   fpregs[regnum * 3 + 2]);
+#if DEBUG_FPE
+		printf("fpu_emul_fmovm: FP%d (%08x,%08x,%08x) loaded\n",
+		       regnum, fpregs[regnum * 3], fpregs[regnum * 3 + 1],
+		       fpregs[regnum * 3 + 2]);
+#endif
 	    }
 	    if (sig) { break; }
 	}
 	regnum += order;
+	regmask >>= 1;
     }
 
     return sig;
@@ -608,41 +595,44 @@
     int regnum, format;
     int discard_result = 0;
     u_int buf[3];
+#if DEBUG_FPE
     int flags;
     char regname;
+#endif
 
     fe->fe_fpsr &= ~FPSR_EXCP;
 
     DUMP_INSN(insn);
 
-    if (fpu_debug_level & DL_ARITH) {
-	printf("  fpu_emul_arith: FPSR = %08x, FPCR = %08x\n",
-	       fe->fe_fpsr, fe->fe_fpcr);
-    }
+#if DEBUG_FPE
+    printf("fpu_emul_arith: FPSR = %08x, FPCR = %08x\n",
+	   fe->fe_fpsr, fe->fe_fpcr);
+#endif
 
     word1 = insn->is_word1;
     format = (word1 >> 10) & 7;
     regnum = (word1 >> 7) & 7;
 
     /* fetch a source operand : may not be used */
-    if (fpu_debug_level & DL_ARITH) {
-	printf("  fpu_emul_arith: dst/src FP%d=%08x,%08x,%08x\n",
-	       regnum, fpregs[regnum*3], fpregs[regnum*3+1],
-	       fpregs[regnum*3+2]);
-    }
+#if DEBUG_FPE
+    printf("fpu_emul_arith: dst/src FP%d=%08x,%08x,%08x\n",
+	   regnum, fpregs[regnum*3], fpregs[regnum*3+1],
+	   fpregs[regnum*3+2]);
+#endif
+
     fpu_explode(fe, &fe->fe_f1, FTYPE_EXT, &fpregs[regnum * 3]);
 
     DUMP_INSN(insn);
 
     /* get the other operand which is always the source */
     if ((word1 & 0x4000) == 0) {
-	if (fpu_debug_level & DL_ARITH) {
-	    printf("  fpu_emul_arith: FP%d op FP%d => FP%d\n",
-		   format, regnum, regnum);
-	    printf("  fpu_emul_arith: src opr FP%d=%08x,%08x,%08x\n",
-		   format, fpregs[format*3], fpregs[format*3+1],
-		   fpregs[format*3+2]);
-	}
+#if DEBUG_FPE
+	printf("fpu_emul_arith: FP%d op FP%d => FP%d\n",
+	       format, regnum, regnum);
+	printf("fpu_emul_arith: src opr FP%d=%08x,%08x,%08x\n",
+	       format, fpregs[format*3], fpregs[format*3+1],
+	       fpregs[format*3+2]);
+#endif
 	fpu_explode(fe, &fe->fe_f2, FTYPE_EXT, &fpregs[format * 3]);
     } else {
 	/* the operand is in memory */
@@ -663,52 +653,52 @@
 	}
 
 	/* Get effective address. (modreg=opcode&077) */
-	sig = fpu_decode_ea(frame, insn, &insn->is_ea0, insn->is_opcode);
+	sig = fpu_decode_ea(frame, insn, &insn->is_ea, insn->is_opcode);
 	if (sig) {
-	    if (fpu_debug_level & DL_ARITH) {
-		printf("  fpu_emul_arith: error in fpu_decode_ea\n");
-	    }
+#if DEBUG_FPE
+	    printf("fpu_emul_arith: error in fpu_decode_ea\n");
+#endif
 	    return sig;
 	}
 
 	DUMP_INSN(insn);
 
-	if (fpu_debug_level & DL_ARITH) {
-	    printf("  fpu_emul_arith: addr mode = ");
-	    flags = insn->is_ea0.ea_flags;
-	    regname = (insn->is_ea0.ea_regnum & 8) ? 'a' : 'd';
-
-	    if (flags & EA_DIRECT) {
-		printf("%c%d\n",
-		       regname, insn->is_ea0.ea_regnum & 7);
-	    } else if (flags & EA_PC_REL) {
-		if (flags & EA_OFFSET) {
-		    printf("pc@(%d)\n", insn->is_ea0.ea_offset);
-		} else if (flags & EA_INDEXED) {
-		    printf("pc@(...)\n");
-		}
-	    } else if (flags & EA_PREDECR) {
-		printf("%c%d@-\n",
-		       regname, insn->is_ea0.ea_regnum & 7);
-	    } else if (flags & EA_POSTINCR) {
-		printf("%c%d@+\n", regname, insn->is_ea0.ea_regnum & 7);
-	    } else if (flags & EA_OFFSET) {
-		printf("%c%d@(%d)\n", regname, insn->is_ea0.ea_regnum & 7,
-		       insn->is_ea0.ea_offset);
+#if DEBUG_FPE
+	printf("fpu_emul_arith: addr mode = ");
+	flags = insn->is_ea.ea_flags;
+	regname = (insn->is_ea.ea_regnum & 8) ? 'a' : 'd';
+
+	if (flags & EA_DIRECT) {
+	    printf("%c%d\n",
+		   regname, insn->is_ea.ea_regnum & 7);
+	} else if (flags & EA_PC_REL) {
+	    if (flags & EA_OFFSET) {
+		printf("pc@(%d)\n", insn->is_ea.ea_offset);
 	    } else if (flags & EA_INDEXED) {
-		printf("%c%d@(...)\n", regname, insn->is_ea0.ea_regnum & 7);
-	    } else if (flags & EA_ABS) {
-		printf("0x%08x\n", insn->is_ea0.ea_absaddr);
-	    } else if (flags & EA_IMMED) {
-
-		printf("#0x%08x,%08x,%08x\n", insn->is_ea0.ea_immed[0],
-		       insn->is_ea0.ea_immed[1], insn->is_ea0.ea_immed[2]);
-	    } else {
-		printf("%c%d@\n", regname, insn->is_ea0.ea_regnum & 7);
+		printf("pc@(...)\n");
 	    }
-	} /* if (fpu_debug_level & DL_ARITH) */
+	} else if (flags & EA_PREDECR) {
+	    printf("%c%d@-\n",
+		   regname, insn->is_ea.ea_regnum & 7);
+	} else if (flags & EA_POSTINCR) {
+	    printf("%c%d@+\n", regname, insn->is_ea.ea_regnum & 7);
+	} else if (flags & EA_OFFSET) {
+	    printf("%c%d@(%d)\n", regname, insn->is_ea.ea_regnum & 7,
+		   insn->is_ea.ea_offset);
+	} else if (flags & EA_INDEXED) {
+	    printf("%c%d@(...)\n", regname, insn->is_ea.ea_regnum & 7);
+	} else if (flags & EA_ABS) {
+	    printf("0x%08x\n", insn->is_ea.ea_absaddr);
+	} else if (flags & EA_IMMED) {
+
+	    printf("#0x%08x,%08x,%08x\n", insn->is_ea.ea_immed[0],
+		   insn->is_ea.ea_immed[1], insn->is_ea.ea_immed[2]);
+	} else {
+	    printf("%c%d@\n", regname, insn->is_ea.ea_regnum & 7);
+	}
+#endif /* DEBUG_FPE */
 
-	fpu_load_ea(frame, insn, &insn->is_ea0, (char*)buf);
+	fpu_load_ea(frame, insn, &insn->is_ea, (char*)buf);
 	if (format == FTYPE_WRD) {
 	    /* sign-extend */
 	    buf[0] &= 0xffff;
@@ -724,10 +714,10 @@
 	    }
 	    format = FTYPE_LNG;
 	}
-	if (fpu_debug_level & DL_ARITH) {
-	    printf("  fpu_emul_arith: src = %08x %08x %08x, siz = %d\n",
-		   buf[0], buf[1], buf[2], insn->is_datasize);
-	}
+#if DEBUG_FPE
+	printf("fpu_emul_arith: src = %08x %08x %08x, siz = %d\n",
+	       buf[0], buf[1], buf[2], insn->is_datasize);
+#endif
 	fpu_explode(fe, &fe->fe_f2, format, buf);
     }
 
@@ -904,7 +894,7 @@
 
     default:
 #ifdef DEBUG
-	printf("  fpu_emul_arith: bad opcode=0x%x, word1=0x%x\n",
+	printf("fpu_emul_arith: bad opcode=0x%x, word1=0x%x\n",
 	       insn->is_opcode, insn->is_word1);
 #endif
 	sig = SIGILL;
@@ -912,29 +902,29 @@
 
     if (!discard_result && sig == 0) {
 	fpu_implode(fe, res, FTYPE_EXT, &fpregs[regnum * 3]);
-	if (fpu_debug_level & DL_ARITH) {
-	    printf("  fpu_emul_arith: %08x,%08x,%08x stored in FP%d\n",
-		   fpregs[regnum*3], fpregs[regnum*3+1],
-		   fpregs[regnum*3+2], regnum);
-	}
-    } else if (sig == 0 && fpu_debug_level & DL_ARITH) {
+#if DEBUG_FPE
+	printf("fpu_emul_arith: %08x,%08x,%08x stored in FP%d\n",
+	       fpregs[regnum*3], fpregs[regnum*3+1],
+	       fpregs[regnum*3+2], regnum);
+    } else if (sig == 0) {
 	static char *class_name[] = { "SNAN", "QNAN", "ZERO", "NUM", "INF" };
-	printf("  fpu_emul_arith: result(%s,%c,%d,%08x,%08x,%08x,%08x) discarded\n",
+	printf("fpu_emul_arith: result(%s,%c,%d,%08x,%08x,%08x) discarded\n",
 	       class_name[res->fp_class + 2],
 	       res->fp_sign ? '-' : '+', res->fp_exp,
 	       res->fp_mant[0], res->fp_mant[1],
-	       res->fp_mant[2], res->fp_mant[3]);
-    } else if (fpu_debug_level & DL_ARITH) {
-	printf("  fpu_emul_arith: received signal %d\n", sig);
+	       res->fp_mant[2]);
+    } else {
+	printf("fpu_emul_arith: received signal %d\n", sig);
+#endif
     }
 
     /* update fpsr according to the result of operation */
     fpu_upd_fpsr(fe, res);
 
-    if (fpu_debug_level & DL_ARITH) {
-	printf("  fpu_emul_arith: FPSR = %08x, FPCR = %08x\n",
-	       fe->fe_fpsr, fe->fe_fpcr);
-    }
+#if DEBUG_FPE
+    printf("fpu_emul_arith: FPSR = %08x, FPCR = %08x\n",
+	   fe->fe_fpsr, fe->fe_fpcr);
+#endif
 
     DUMP_INSN(insn);
 
@@ -956,86 +946,86 @@
     fpsr = fe->fe_fpsr;
     invert = 0;
     fpsr &= ~FPSR_EXCP;		/* clear all exceptions */
-    if (fpu_debug_level & DL_TESTCC) {
-	printf("  test_cc: fpsr=0x%08x\n", fpsr);
-    }
+#if DEBUG_FPE
+    printf("test_cc: fpsr=0x%08x\n", fpsr);
+#endif
     pred &= 0x3f;		/* lowest 6 bits */
 
-    if (fpu_debug_level & DL_TESTCC) {
-	printf("  test_cc: ");
-    }
+#if DEBUG_FPE
+    printf("test_cc: ");
+#endif
 
-    if (pred >= 040) {
+    if (pred >= 0x20) {
 	return SIGILL;
     } else if (pred & 0x10) {
 	/* IEEE nonaware tests */
 	sig_bsun = 1;
-	pred &= 017;		/* lower 4 bits */
+	pred &= 0x0f;		/* lower 4 bits */
     } else {
 	/* IEEE aware tests */
-	if (fpu_debug_level & DL_TESTCC) {
-	    printf("IEEE ");
-	}
+#if DEBUG_FPE
+	printf("IEEE ");
+#endif
 	sig_bsun = 0;
     }
 
-    if (pred >= 010) {
-	if (fpu_debug_level & DL_TESTCC) {
-	    printf("Not ");
-	}
+    if (pred & 0x08) {
+#if DEBUG_FPE
+	printf("Not ");
+#endif
 	/* predicate is "NOT ..." */
 	pred ^= 0xf;		/* invert */
 	invert = -1;
     }
     switch (pred) {
     case 0:			/* (Signaling) False */
-	if (fpu_debug_level & DL_TESTCC) {
-	    printf("False");
-	}
+#if DEBUG_FPE
+	printf("False");
+#endif
 	result = 0;
 	break;
     case 1:			/* (Signaling) Equal */
-	if (fpu_debug_level & DL_TESTCC) {
-	    printf("Equal");
-	}
+#if DEBUG_FPE
+	printf("Equal");
+#endif
 	result = -((fpsr & FPSR_ZERO) == FPSR_ZERO);
 	break;
     case 2:			/* Greater Than */
-	if (fpu_debug_level & DL_TESTCC) {
-	    printf("GT");
-	}
+#if DEBUG_FPE
+	printf("GT");
+#endif
 	result = -((fpsr & (FPSR_NAN|FPSR_ZERO|FPSR_NEG)) == 0);
 	break;
     case 3:			/* Greater or Equal */
-	if (fpu_debug_level & DL_TESTCC) {
-	    printf("GE");
-	}
+#if DEBUG_FPE
+	printf("GE");
+#endif
 	result = -((fpsr & FPSR_ZERO) ||
 		   (fpsr & (FPSR_NAN|FPSR_NEG)) == 0);
 	break;
     case 4:			/* Less Than */
-	if (fpu_debug_level & DL_TESTCC) {
-	    printf("LT");
-	}
+#if DEBUG_FPE
+	printf("LT");
+#endif
 	result = -((fpsr & (FPSR_NAN|FPSR_ZERO|FPSR_NEG)) == FPSR_NEG);
 	break;
     case 5:			/* Less or Equal */
-	if (fpu_debug_level & DL_TESTCC) {
-	    printf("LE");
-	}
+#if DEBUG_FPE
+	printf("LE");
+#endif
 	result = -((fpsr & FPSR_ZERO) ||
 		   ((fpsr & (FPSR_NAN|FPSR_NEG)) == FPSR_NEG));
 	break;
     case 6:			/* Greater or Less than */
-	if (fpu_debug_level & DL_TESTCC) {
-	    printf("GLT");
-	}
+#if DEBUG_FPE
+	printf("GLT");
+#endif
 	result = -((fpsr & (FPSR_NAN|FPSR_ZERO)) == 0);
 	break;
     case 7:			/* Greater, Less or Equal */
-	if (fpu_debug_level & DL_TESTCC) {
-	    printf("GLE");
-	}
+#if DEBUG_FPE
+	printf("GLE");
+#endif
 	result = -((fpsr & FPSR_NAN) == 0);
 	break;
     default:
@@ -1044,9 +1034,9 @@
     }
     result ^= invert;		/* if the predicate is "NOT ...", then
 				   invert the result */
-    if (fpu_debug_level & DL_TESTCC) {
-	printf(" => %s (%d)\n", result ? "true" : "false", result);
-    }
+#if DEBUG_FPE
+    printf("=> %s (%d)\n", result ? "true" : "false", result);
+#endif
     /* if it's an IEEE unaware test and NAN is set, BSUN is set */
     if (sig_bsun && (fpsr & FPSR_NAN)) {
 	fpsr |= FPSR_BSUN;
@@ -1087,10 +1077,10 @@
 	    u_int16_t count = frame->f_regs[insn->is_opcode & 7];
 
 	    if (count-- != 0) {
-		displ = fusword((void *) (frame->f_pc + insn->is_advance));
+		displ = fusword((void *) (insn->is_pc + insn->is_advance));
 		if (displ < 0) {
 #ifdef DEBUG
-		    printf("  fpu_emul_type1: fault reading displacement\n");
+		    printf("fpu_emul_type1: fault reading displacement\n");
 #endif
 		    return SIGSEGV;
 		}
@@ -1140,13 +1130,13 @@
     default:			/* fscc */
 	insn->is_advance = 4;
 	insn->is_datasize = 1;	/* always byte */
-	sig = fpu_decode_ea(frame, insn, &insn->is_ea0, insn->is_opcode);
+	sig = fpu_decode_ea(frame, insn, &insn->is_ea, insn->is_opcode);
 	if (sig) {
 	    break;
 	}
 	if (branch == -1 || branch == 0) {
 	    /* set result */
-	    sig = fpu_store_ea(frame, insn, &insn->is_ea0, (char *)&branch);
+	    sig = fpu_store_ea(frame, insn, &insn->is_ea, (char *)&branch);
 	} else {
 	    /* got an exception */
 	    sig = branch;
@@ -1166,7 +1156,6 @@
      struct fpemu *fe;
      struct instruction *insn;
 {
-    struct frame *frame = fe->fe_frame;
     int displ, word2;
     int sig;
 
@@ -1177,10 +1166,10 @@
     displ = insn->is_word1;
 
     if (insn->is_opcode & 0x40) {
-	word2 = fusword((void *) (frame->f_pc + insn->is_advance));
+	word2 = fusword((void *) (insn->is_pc + insn->is_advance));
 	if (word2 < 0) {
 #ifdef DEBUG
-	    printf("  fpu_emul_brcc: fault reading word2\n");
+	    printf("fpu_emul_brcc: fault reading word2\n");
 #endif
 	    return SIGSEGV;
 	}
@@ -1191,7 +1180,7 @@
         if (displ & 0x8000)
 	    displ |= 0xFFFF0000;
 
-    /* XXX: If CC, frame->f_pc += displ */
+    /* XXX: If CC, insn->is_pc += displ */
     sig = test_cc(fe, insn->is_opcode);
     fe->fe_fpframe->fpf_fpsr = fe->fe_fpsr;
 
@@ -1201,14 +1190,15 @@
     if (sig == -1) {
 	/* branch does take place; 2 is the offset to the 1st disp word */
 	insn->is_advance = displ + 2;
+	insn->is_nextpc = insn->is_pc + insn->is_advance;
     } else if (sig) {
 	return SIGILL;		/* got a signal */
     }
-    if (fpu_debug_level & DL_BRANCH) {
-	printf("  fpu_emul_brcc: %s insn @ %x (%x+%x) (disp=%x)\n",
-	       (sig == -1) ? "BRANCH to" : "NEXT",
-	       frame->f_pc + insn->is_advance, frame->f_pc, insn->is_advance,
-	       displ);
-    }
+#if DEBUG_FPE
+    printf("fpu_emul_brcc: %s insn @ %x (%x+%x) (disp=%x)\n",
+	   (sig == -1) ? "BRANCH to" : "NEXT",
+	   insn->is_pc + insn->is_advance, insn->is_pc, insn->is_advance,
+	   displ);
+#endif
     return 0;
 }
--- ../../src/sys/arch/m68k/fpe/fpu_emulate.h	Thu May  2 20:39:40 1996
+++ fpu_emulate.h	Tue Jun  2 19:34:46 1998
@@ -44,7 +44,7 @@
  * or `unpacked' form consisting of:
  *	- sign
  *	- unbiased exponent
- *	- mantissa (`1.' + 112-bit fraction + guard + round)
+ *	- mantissa (`1.' + 63-bit fraction + guard + round)
  *	- sticky bit
  * Any implied `1' bit is inserted, giving a 113-bit mantissa that is
  * always nonzero.  Additional low-order `guard' and `round' bits are
@@ -76,10 +76,10 @@
 	int	fp_sign;		/* 0 => positive, 1 => negative */
 	int	fp_exp;			/* exponent (unbiased) */
 	int	fp_sticky;		/* nonzero bits lost at right end */
-	u_int	fp_mant[4];		/* 115-bit mantissa */
+	u_int	fp_mant[3];		/* 66-bit mantissa */
 };
 
-#define	FP_NMANT	115		/* total bits in mantissa (incl g,r) */
+#define	FP_NMANT	67		/* total bits in mantissa (incl g,r) */
 #define	FP_NG		2		/* number of low-order guard bits */
 #define	FP_LG		((FP_NMANT - 1) & 31)	/* log2(1.0) for fp_mant[0] */
 #define	FP_QUIETBIT	(1 << (FP_LG - 1))	/* Quiet bit in NaNs (0.5) */
@@ -95,7 +95,6 @@
     (dst)->fp_mant[0] = (src)->fp_mant[0];				\
     (dst)->fp_mant[1] = (src)->fp_mant[1];				\
     (dst)->fp_mant[2] = (src)->fp_mant[2];				\
-    (dst)->fp_mant[3] = (src)->fp_mant[3];				\
 }
 
 /*
@@ -170,7 +169,9 @@
 #define	EA_IMMED	0x080	/* mode (7,4): #immed */
 #define EA_MEM_INDIR	0x100	/* mode 6 or (7,3): APC@(Xn:*:*,*)@(*) etc */
 #define EA_BASE_SUPPRSS	0x200	/* mode 6 or (7,3): base register suppressed */
-    int	ea_tdisp;		/* temp. displ. used to xfer many words */
+#define EA_FRAME_EA	0x400	/* MC68LC040 only: precalculated EA from
+				   format 4 stack frame */
+    int	ea_moffs;		/* offset used for fmoveMulti */
 };
 
 #define ea_offset	ea_ext[0]	/* mode 5: offset word */
@@ -179,13 +180,16 @@
 #define ea_basedisp	ea_ext[0]	/* mode 6: base displacement */
 #define ea_outerdisp	ea_ext[1]	/* mode 6: outer displacement */
 #define	ea_idxreg	ea_ext[2]	/* mode 6: index register number */
+#define ea_fea		ea_ext[0]	/* MC68LC040 only: frame EA */
 
 struct instruction {
-    int		is_advance;	/* length of instruction */
-    int		is_datasize;	/* byte, word, long, float, double, ... */
-    int		is_opcode;	/* opcode word */
-    int		is_word1;	/* second word */
-    struct	insn_ea	is_ea0;	/* decoded effective address mode */
+    u_int		is_pc;		/* insn's address */
+    u_int		is_nextpc;	/* next PC */
+    int			is_advance;	/* length of instruction */
+    int			is_datasize;	/* size of memory operand */
+    int			is_opcode;	/* opcode word */
+    int			is_word1;	/* second word */
+    struct insn_ea	is_ea;	/* decoded effective address mode */
 };
 
 /*
@@ -307,33 +311,8 @@
 /* fpu_subr.c */
 void fpu_norm __P((register struct fpn *fp));
 
-/* declarations for debugging */
-
-extern int fpu_debug_level;
-
-/* debug classes */
-#define DL_DUMPINSN 0x0001
-#define DL_DECODEEA 0x0002
-#define DL_LOADEA   0x0004
-#define DL_STOREEA  0x0008
-#define DL_OPERANDS 0x0010
-#define DL_RESULT   0x0020
-#define DL_TESTCC   0x0040
-#define DL_BRANCH   0x0080
-#define DL_FSTORE   0x0100
-#define DL_FSCALE   0x0200
-#define DL_ARITH    0x0400
-#define DL_INSN     0x0800
-#define DL_FMOVEM   0x1000
-/* not defined yet
-#define DL_2000     0x2000
-#define DL_4000     0x4000
-*/
-#define DL_VERBOSE  0x8000
-/* composit debug classes */
-#define DL_EA       (DL_DECODEEA|DL_LOADEA|DL_STOREEA)
-#define DL_VALUES   (DL_OPERANDS|DL_RESULT)
-#define DL_COND     (DL_TESTCC|DL_BRANCH)
-#define DL_ALL      0xffff
+#if !defined(FPE_DEBUG)
+#  define FPE_DEBUG 0
+#endif
 
 #endif /* _FPU_EMULATE_H_ */
--- ../../src/sys/arch/m68k/fpe/fpu_explode.c	Thu May  2 20:39:41 1996
+++ fpu_explode.c	Tue May 19 17:15:15 1998
@@ -105,7 +105,6 @@
 	fp->fp_mant[0] = (int)i < 0 ? -i : i;
 	fp->fp_mant[1] = 0;
 	fp->fp_mant[2] = 0;
-	fp->fp_mant[3] = 0;
 	fpu_norm(fp);
 	return (FPC_NUM);
 }
@@ -124,7 +123,6 @@
 		fp->fp_mant[0] = f0; \
 		fp->fp_mant[1] = f1; \
 		fp->fp_mant[2] = f2; \
-		fp->fp_mant[3] = f3; \
 		fpu_norm(fp); \
 		return (FPC_NUM); \
 	} \
@@ -134,14 +132,12 @@
 		fp->fp_mant[0] = f0; \
 		fp->fp_mant[1] = f1; \
 		fp->fp_mant[2] = f2; \
-		fp->fp_mant[3] = f3; \
 		return (FPC_QNAN); \
 	} \
 	fp->fp_exp = exp - expbias; \
 	fp->fp_mant[0] = FP_1 | f0; \
 	fp->fp_mant[1] = f1; \
 	fp->fp_mant[2] = f2; \
-	fp->fp_mant[3] = f3; \
 	return (FPC_NUM)
 
 /*
@@ -213,7 +209,6 @@
 		fp->fp_mant[0] = f0;
 		fp->fp_mant[1] = f1;
 		fp->fp_mant[2] = f2;
-		fp->fp_mant[3] = 0;
 		fpu_norm(fp);
 		return (FPC_NUM);
 	}
@@ -223,14 +218,12 @@
 		fp->fp_mant[0] = f0;
 		fp->fp_mant[1] = f1;
 		fp->fp_mant[2] = f2;
-		fp->fp_mant[3] = 0;
 		return (FPC_QNAN);
 	}
 	fp->fp_exp = exp - EXT_EXP_BIAS;
 	fp->fp_mant[0] = FP_1 | f0;
 	fp->fp_mant[1] = f1;
 	fp->fp_mant[2] = f2;
-	fp->fp_mant[3] = 0;
 	return (FPC_NUM);
 }
 
--- ../../src/sys/arch/m68k/fpe/fpu_fmovecr.c	Tue Oct 15 01:10:10 1996
+++ fpu_fmovecr.c	Tue May 19 17:08:26 1998
@@ -39,28 +39,28 @@
 
 static struct fpn constrom[] = {
     /* fp_class, fp_sign, fp_exp, fp_sticky, fp_mant[0] ... [3] */
-    { FPC_NUM, 0, 1, 0, { 0x6487e, 0xd5110b46, 0x11a80000, 0x0 } },
-    { FPC_NUM, 0, -2, 0, { 0x4d104, 0xd427de7f, 0xbcc00000, 0x0 } },
-    { FPC_NUM, 0, 1, 0, { 0x56fc2, 0xa2c515da, 0x54d00000, 0x0 } },
-    { FPC_NUM, 0, 0, 0, { 0x5c551, 0xd94ae0bf, 0x85e00000, 0x0 } },
-    { FPC_NUM, 0, -2, 0, { 0x6f2de, 0xc549b943, 0x8ca80000, 0x0 } },
-    { FPC_ZERO, 0, 0, 0, { 0x0, 0x0, 0x0, 0x0 } },
-    { FPC_NUM, 0, -1, 0, { 0x58b90, 0xbfbe8e7b, 0xcd600000, 0x0 } },
-    { FPC_NUM, 0, 1, 0, { 0x49aec, 0x6eed5545, 0x60b80000, 0x0 } },
-    { FPC_NUM, 0, 0, 0, { 0x40000, 0x0, 0x0, 0x0 } },
-    { FPC_NUM, 0, 3, 0, { 0x50000, 0x0, 0x0, 0x0 } },
-    { FPC_NUM, 0, 6, 0, { 0x64000, 0x0, 0x0, 0x0 } },
-    { FPC_NUM, 0, 13, 0, { 0x4e200, 0x0, 0x0, 0x0 } },
-    { FPC_NUM, 0, 26, 0, { 0x5f5e1, 0x0, 0x0, 0x0 } },
-    { FPC_NUM, 0, 53, 0, { 0x470de, 0x4df82000, 0x0, 0x0 } },
-    { FPC_NUM, 0, 106, 0, { 0x4ee2d, 0x6d415b85, 0xacf00000, 0x0 } },
-    { FPC_NUM, 0, 212, 0, { 0x613c0, 0xfa4ffe7d, 0x36a80000, 0x0 } },
-    { FPC_NUM, 0, 425, 0, { 0x49dd2, 0x3e4c074c, 0x67000000, 0x0 } },
-    { FPC_NUM, 0, 850, 0, { 0x553f7, 0x5fdcefce, 0xf4700000, 0x0 } },
-    { FPC_NUM, 0, 1700, 0, { 0x718cd, 0x5753074, 0x8e380000, 0x0 } },
-    { FPC_NUM, 0, 3401, 0, { 0x64bb3, 0xac340ba8, 0x60b80000, 0x0 } },
-    { FPC_NUM, 0, 6803, 0, { 0x4f459, 0xdaee29ea, 0xef280000, 0x0 } },
-    { FPC_NUM, 0, 13606, 0, { 0x62302, 0x90145104, 0xbcd80000, 0x0 } },
+    { FPC_NUM, 0, 1, 0, { 0x6487e, 0xd5110b46, 0x11a80000 } },
+    { FPC_NUM, 0, -2, 0, { 0x4d104, 0xd427de7f, 0xbcc00000 } },
+    { FPC_NUM, 0, 1, 0, { 0x56fc2, 0xa2c515da, 0x54d00000 } },
+    { FPC_NUM, 0, 0, 0, { 0x5c551, 0xd94ae0bf, 0x85e00000 } },
+    { FPC_NUM, 0, -2, 0, { 0x6f2de, 0xc549b943, 0x8ca80000 } },
+    { FPC_ZERO, 0, 0, 0, { 0x0, 0x0, 0x0 } },
+    { FPC_NUM, 0, -1, 0, { 0x58b90, 0xbfbe8e7b, 0xcd600000 } },
+    { FPC_NUM, 0, 1, 0, { 0x49aec, 0x6eed5545, 0x60b80000 } },
+    { FPC_NUM, 0, 0, 0, { 0x40000, 0x0, 0x0 } },
+    { FPC_NUM, 0, 3, 0, { 0x50000, 0x0, 0x0 } },
+    { FPC_NUM, 0, 6, 0, { 0x64000, 0x0, 0x0 } },
+    { FPC_NUM, 0, 13, 0, { 0x4e200, 0x0, 0x0 } },
+    { FPC_NUM, 0, 26, 0, { 0x5f5e1, 0x0, 0x0 } },
+    { FPC_NUM, 0, 53, 0, { 0x470de, 0x4df82000, 0x0 } },
+    { FPC_NUM, 0, 106, 0, { 0x4ee2d, 0x6d415b85, 0xacf00000 } },
+    { FPC_NUM, 0, 212, 0, { 0x613c0, 0xfa4ffe7d, 0x36a80000 } },
+    { FPC_NUM, 0, 425, 0, { 0x49dd2, 0x3e4c074c, 0x67000000 } },
+    { FPC_NUM, 0, 850, 0, { 0x553f7, 0x5fdcefce, 0xf4700000 } },
+    { FPC_NUM, 0, 1700, 0, { 0x718cd, 0x5753074, 0x8e380000 } },
+    { FPC_NUM, 0, 3401, 0, { 0x64bb3, 0xac340ba8, 0x60b80000 } },
+    { FPC_NUM, 0, 6803, 0, { 0x4f459, 0xdaee29ea, 0xef280000 } },
+    { FPC_NUM, 0, 13606, 0, { 0x62302, 0x90145104, 0xbcd80000 } },
 };
 
 struct fpn *
@@ -106,10 +106,10 @@
     (void)fpu_const(&fe->fe_f3, offset);
     (void)fpu_upd_fpsr(fe, &fe->fe_f3);
     fpu_implode(fe, &fe->fe_f3, FTYPE_EXT, &fpreg[dstreg * 3]);
-    if (fpu_debug_level & DL_RESULT) {
-	printf("  fpu_emul_fmovecr: result %08x,%08x,%08x to FP%d\n",
-	       fpreg[dstreg * 3], fpreg[dstreg * 3 + 1], fpreg[dstreg * 3 + 2],
-	       dstreg);
-    }
+#if DEBUG_FPE
+    printf("  fpu_emul_fmovecr: result %08x,%08x,%08x to FP%d\n",
+	   fpreg[dstreg * 3], fpreg[dstreg * 3 + 1], fpreg[dstreg * 3 + 2],
+	   dstreg);
+#endif
     return 0;
 }
--- ../../src/sys/arch/m68k/fpe/fpu_fscale.c	Tue Jan 13 22:42:24 1998
+++ fpu_fscale.c	Tue May 19 17:08:06 1998
@@ -56,17 +56,19 @@
     int scale, sign, exp;
     u_int m0, m1;
     u_int buf[3], fpsr;
+#if DEBUG_FPE
     int flags;
     char regname;
+#endif
 
     scale = sig = 0;
     frame = fe->fe_frame;
     fpregs = &(fe->fe_fpframe->fpf_regs[0]);
     /* clear all exceptions and conditions */
     fpsr = fe->fe_fpsr & ~FPSR_EXCP & ~FPSR_CCB;
-    if (fpu_debug_level & DL_FSCALE) {
-	printf("  fpu_emul_fscale: FPSR = %08x, FPCR = %08x\n", fpsr, fe->fe_fpcr);
-    }
+#if DEBUG_FPE
+    printf("fpu_emul_fscale: FPSR = %08x, FPCR = %08x\n", fpsr, fe->fe_fpcr);
+#endif
 
     word1 = insn->is_word1;
     format = (word1 >> 10) & 7;
@@ -77,16 +79,14 @@
 
     /* get the source operand */
     if ((word1 & 0x4000) == 0) {
-	if (fpu_debug_level & DL_FSCALE) {
-	    printf("  fpu_emul_fscale: FP%d op FP%d => FP%d\n",
-		   format, regnum, regnum);
-	}
+#if DEBUG_FPE
+	printf("fpu_emul_fscale: FP%d op FP%d => FP%d\n",
+	       format, regnum, regnum);
 	/* the operand is an FP reg */
-	if (fpu_debug_level & DL_FSCALE) {
-	    printf("  fpu_emul_scale: src opr FP%d=%08x%08x%08x\n",
-		   format, fpregs[format*3], fpregs[format*3+1],
-		   fpregs[format*3+2]);
-	}
+	printf("fpu_emul_scale: src opr FP%d=%08x%08x%08x\n",
+	       format, fpregs[format*3], fpregs[format*3+1],
+	       fpregs[format*3+2]);
+#endif
 	fpu_explode(fe, &fe->fe_f2, FTYPE_EXT, &fpregs[format * 3]);
 	fpu_implode(fe, &fe->fe_f2, FTYPE_LNG, buf);
       scale = buf[0];
@@ -109,48 +109,48 @@
 	}
 
 	/* Get effective address. (modreg=opcode&077) */
-	sig = fpu_decode_ea(frame, insn, &insn->is_ea0, insn->is_opcode);
+	sig = fpu_decode_ea(frame, insn, &insn->is_ea, insn->is_opcode);
 	if (sig) {
-	    if (fpu_debug_level & DL_FSCALE) {
-		printf("  fpu_emul_fscale: error in decode_ea\n");
-	    }
+#if DEBUG_FPE
+	    printf("fpu_emul_fscale: error in decode_ea\n");
+#endif
 	    return sig;
 	}
 
-	if (fpu_debug_level & DL_FSCALE) {
-	    printf("  fpu_emul_fscale: addr mode = ");
-	    flags = insn->is_ea0.ea_flags;
-	    regname = (insn->is_ea0.ea_regnum & 8) ? 'a' : 'd';
-
-	    if (flags & EA_DIRECT) {
-		printf("%c%d\n", regname, insn->is_ea0.ea_regnum & 7);
-	    } else if (insn->is_ea0.ea_flags & EA_PREDECR) {
-		printf("%c%d@-\n", regname, insn->is_ea0.ea_regnum & 7);
-	    } else if (insn->is_ea0.ea_flags & EA_POSTINCR) {
-		printf("%c%d@+\n", regname, insn->is_ea0.ea_regnum & 7);
-	    } else if (insn->is_ea0.ea_flags & EA_OFFSET) {
-		printf("%c%d@(%d)\n", regname, insn->is_ea0.ea_regnum & 7,
-		       insn->is_ea0.ea_offset);
-	    } else if (insn->is_ea0.ea_flags & EA_INDEXED) {
-		printf("%c%d@(...)\n", regname, insn->is_ea0.ea_regnum & 7);
-	    } else if (insn->is_ea0.ea_flags & EA_ABS) {
-		printf("0x%08x\n", insn->is_ea0.ea_absaddr);
-	    } else if (insn->is_ea0.ea_flags & EA_PC_REL) {
-		printf("pc@(%d)\n", insn->is_ea0.ea_offset);
-	    } else if (flags & EA_IMMED) {
-		printf("#0x%08x%08x%08x\n",
-		       insn->is_ea0.ea_immed[0], insn->is_ea0.ea_immed[1],
-		       insn->is_ea0.ea_immed[2]);
-	    } else {
-		printf("%c%d@\n", regname, insn->is_ea0.ea_regnum & 7);
-	    }
+#if DEBUG_FPE
+	printf("fpu_emul_fscale: addr mode = ");
+	flags = insn->is_ea.ea_flags;
+	regname = (insn->is_ea.ea_regnum & 8) ? 'a' : 'd';
+
+	if (flags & EA_DIRECT) {
+	    printf("%c%d\n", regname, insn->is_ea.ea_regnum & 7);
+	} else if (flags & EA_PREDECR) {
+	    printf("%c%d@-\n", regname, insn->is_ea.ea_regnum & 7);
+	} else if (flags & EA_POSTINCR) {
+		printf("%c%d@+\n", regname, insn->is_ea.ea_regnum & 7);
+	} else if (flags & EA_OFFSET) {
+	    printf("%c%d@(%d)\n", regname, insn->is_ea.ea_regnum & 7,
+		   insn->is_ea.ea_offset);
+	} else if (flags & EA_INDEXED) {
+	    printf("%c%d@(...)\n", regname, insn->is_ea.ea_regnum & 7);
+	} else if (flags & EA_ABS) {
+		printf("0x%08x\n", insn->is_ea.ea_absaddr);
+	} else if (flags & EA_PC_REL) {
+	    printf("pc@(%d)\n", insn->is_ea.ea_offset);
+	} else if (flags & EA_IMMED) {
+	    printf("#0x%08x%08x%08x\n",
+		       insn->is_ea.ea_immed[0], insn->is_ea.ea_immed[1],
+		   insn->is_ea.ea_immed[2]);
+	} else {
+	    printf("%c%d@\n", regname, insn->is_ea.ea_regnum & 7);
 	}
-	fpu_load_ea(frame, insn, &insn->is_ea0, (char*)buf);
+#endif
+	fpu_load_ea(frame, insn, &insn->is_ea, (char*)buf);
 
-	if (fpu_debug_level & DL_FSCALE) {
-	    printf(" fpu_emul_fscale: src = %08x%08x%08x, siz = %d\n",
-		   buf[0], buf[1], buf[2], insn->is_datasize);
-	}
+#if DEBUG_FPE
+	printf("fpu_emul_fscale: src = %08x%08x%08x, siz = %d\n",
+	       buf[0], buf[1], buf[2], insn->is_datasize);
+#endif
 	if (format == FTYPE_LNG) {
 	    /* nothing */
           scale = buf[0];
@@ -307,7 +307,7 @@
 	break;
     default:
 #ifdef DEBUG
-	panic("  fpu_emul_fscale: invalid fp class");
+	panic("fpu_emul_fscale: invalid fp class");
 #endif
 	break;
     }
@@ -324,10 +324,10 @@
     /* update fpsr according to the result of operation */
     fe->fe_fpframe->fpf_fpsr = fe->fe_fpsr = fpsr;
 
-    if (fpu_debug_level & DL_FSCALE) {
-	printf("  fpu_emul_fscale: FPSR = %08x, FPCR = %08x\n",
-	       fe->fe_fpsr, fe->fe_fpcr);
-    }
+#if DEBUG_FPE
+    printf("fpu_emul_fscale: FPSR = %08x, FPCR = %08x\n",
+	   fe->fe_fpsr, fe->fe_fpcr);
+#endif
 
     return (fpsr & fe->fe_fpcr & FPSR_EXCP) ? SIGFPE : sig;
 }
--- ../../src/sys/arch/m68k/fpe/fpu_fstore.c	Sun Jul 20 09:50:34 1997
+++ fpu_fstore.c	Tue May 19 17:15:48 1998
@@ -50,10 +50,10 @@
     int format;
     u_int buf[3];
 
-    if (fpu_debug_level & DL_FSTORE) {
-	printf("  fpu_emul_fstore: frame at %p fpframe at %p\n",
-	       frame, fe->fe_fpframe);
-    }
+#if DEBUG_FPE
+    printf("  fpu_emul_fstore: frame at %p fpframe at %p\n",
+	   frame, fe->fe_fpframe);
+#endif
 
     word1 = insn->is_word1;
     format = (word1 >> 10) & 7;
@@ -75,28 +75,28 @@
 	insn->is_datasize = 12;
     } else {
 	/* invalid or unsupported operand format */
-	if (fpu_debug_level & DL_FSTORE) {
-	    printf("  fpu_emul_fstore: invalid format %d\n", format);
-	}
+#if DEBUG_FPE
+	printf("  fpu_emul_fstore: invalid format %d\n", format);
+#endif
 	sig = SIGFPE;
     }
-    if (fpu_debug_level & DL_FSTORE) {
-	printf("  fpu_emul_fstore: format %d, size %d\n",
-	       format, insn->is_datasize);
-    }
+#if DEBUG_FPE
+    printf("  fpu_emul_fstore: format %d, size %d\n",
+	   format, insn->is_datasize);
+#endif
 
     fe->fe_fpsr &= ~FPSR_EXCP;
 
     /* Get effective address. (modreg=opcode&077) */
-    sig = fpu_decode_ea(frame, insn, &insn->is_ea0, insn->is_opcode);
+    sig = fpu_decode_ea(frame, insn, &insn->is_ea, insn->is_opcode);
     if (sig) {
-	if (fpu_debug_level & DL_FSTORE) {
-	    printf("  fpu_emul_fstore: failed in decode_ea sig=%d\n", sig);
-	}
+#if DEBUG_FPE
+	printf("  fpu_emul_fstore: failed in decode_ea sig=%d\n", sig);
+#endif
 	return sig;
     }
 
-    if (insn->is_datasize > 4 && insn->is_ea0.ea_flags == EA_DIRECT) {
+    if (insn->is_datasize > 4 && insn->is_ea.ea_flags == EA_DIRECT) {
 	/* trying to store dbl or ext into a data register */
 #ifdef DEBUG
 	printf("  fpu_fstore: attempted to store dbl/ext to reg\n");
@@ -104,25 +104,29 @@
 	return SIGILL;
     }
 
-    if (fpu_debug_level & DL_OPERANDS)
-	printf("  fpu_emul_fstore: saving FP%d (%08x,%08x,%08x)\n",
-	       regnum, fpregs[regnum * 3], fpregs[regnum * 3 + 1],
-	       fpregs[regnum * 3 + 2]);
+#if DEBUG_FPE
+    printf("  fpu_emul_fstore: saving FP%d (%08x,%08x,%08x)\n",
+	   regnum, fpregs[regnum * 3], fpregs[regnum * 3 + 1],
+	   fpregs[regnum * 3 + 2]);
+#endif
     fpu_explode(fe, &fe->fe_f3, FTYPE_EXT, &fpregs[regnum * 3]);
-    if (fpu_debug_level & DL_VALUES) {
+#if DEBUG_FPE
+    {
 	static char *class_name[] = { "SNAN", "QNAN", "ZERO", "NUM", "INF" };
-	printf("  fpu_emul_fstore: fpn (%s,%c,%d,%08x,%08x,%08x,%08x)\n",
+	printf("  fpu_emul_fstore: fpn (%s,%c,%d,%08x,%08x,%08x)\n",
 	       class_name[fe->fe_f3.fp_class + 2],
 	       fe->fe_f3.fp_sign ? '-' : '+', fe->fe_f3.fp_exp,
 	       fe->fe_f3.fp_mant[0], fe->fe_f3.fp_mant[1],
-	       fe->fe_f3.fp_mant[2], fe->fe_f3.fp_mant[3]);
+	       fe->fe_f3.fp_mant[2]);
     }
+#endif
     fpu_implode(fe, &fe->fe_f3, format, buf);
 
-    fpu_store_ea(frame, insn, &insn->is_ea0, (char *)buf);
-    if (fpu_debug_level & DL_RESULT)
-	printf("  fpu_emul_fstore: %08x,%08x,%08x size %d\n",
-	       buf[0], buf[1], buf[2], insn->is_datasize);
+    fpu_store_ea(frame, insn, &insn->is_ea, (char *)buf);
+#if DEBUG_FPE
+    printf("  fpu_emul_fstore: %08x,%08x,%08x size %d\n",
+	   buf[0], buf[1], buf[2], insn->is_datasize);
+#endif
 
     return 0;
 }
--- ../../src/sys/arch/m68k/fpe/fpu_implode.c	Sun Jul 20 09:50:35 1997
+++ fpu_implode.c	Sun May 31 17:11:10 1998
@@ -79,18 +79,16 @@
 int
 round(register struct fpemu *fe, register struct fpn *fp)
 {
-	register u_int m0, m1, m2, m3;
+	register u_int m0, m1, m2;
 	register int gr, s;
 
 	m0 = fp->fp_mant[0];
 	m1 = fp->fp_mant[1];
 	m2 = fp->fp_mant[2];
-	m3 = fp->fp_mant[3];
-	gr = m3 & 3;
+	gr = m2 & 3;
 	s = fp->fp_sticky;
 
 	/* mant >>= FP_NG */
-	m3 = (m3 >> FP_NG) | (m2 << (32 - FP_NG));
 	m2 = (m2 >> FP_NG) | (m1 << (32 - FP_NG));
 	m1 = (m1 >> FP_NG) | (m0 << (32 - FP_NG));
 	m0 >>= FP_NG;
@@ -112,7 +110,7 @@
 		 */
 		if ((gr & 2) == 0)
 			goto rounddown;
-		if ((gr & 1) || fp->fp_sticky || (m3 & 1))
+		if ((gr & 1) || fp->fp_sticky || (m2 & 1))
 			break;
 		goto rounddown;
 
@@ -134,26 +132,19 @@
 	}
 
 	/* Bump low bit of mantissa, with carry. */
-#ifdef sparc /* ``cheating'' (left out FPU_DECL_CARRY; know this is faster) */
-	FPU_ADDS(m3, m3, 1);
-	FPU_ADDCS(m2, m2, 0);
-	FPU_ADDCS(m1, m1, 0);
-	FPU_ADDC(m0, m0, 0);
-#else
-	if (++m3 == 0 && ++m2 == 0 && ++m1 == 0)
+	if (++m2 == 0 && ++m1 == 0)
 		m0++;
-#endif
+	fp->fp_sticky = 0;
 	fp->fp_mant[0] = m0;
 	fp->fp_mant[1] = m1;
 	fp->fp_mant[2] = m2;
-	fp->fp_mant[3] = m3;
 	return (1);
 
 rounddown:
+	fp->fp_sticky = 0;
 	fp->fp_mant[0] = m0;
 	fp->fp_mant[1] = m1;
 	fp->fp_mant[2] = m2;
-	fp->fp_mant[3] = m3;
 	return (0);
 }
 
@@ -228,7 +219,7 @@
 			/* m68881/2 do not underflow when
 			   converting to integer */;
 		round(fe, fp);
-		i = fp->fp_mant[3];
+		i = fp->fp_mant[2];
 		if (i >= ((u_int)0x80000000 + sign))
 			break;
 		return (sign ? -i : i);
@@ -297,20 +288,20 @@
 		fe->fe_fpsr |= FPSR_UNFL;
 		/* -NG for g,r; -SNG_FRACBITS-exp for fraction */
 		(void) fpu_shr(fp, FP_NMANT - FP_NG - SNG_FRACBITS - exp);
-		if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(1))
+		if (round(fe, fp) && fp->fp_mant[2] == SNG_EXP(1))
 			return (sign | SNG_EXP(1) | 0);
 		if (fe->fe_fpsr & FPSR_INEX2)
 			fe->fe_fpsr |= FPSR_UNFL
 			/* mc68881/2 don't underflow when converting */;
-		return (sign | SNG_EXP(0) | fp->fp_mant[3]);
+		return (sign | SNG_EXP(0) | fp->fp_mant[2]);
 	}
 	/* -FP_NG for g,r; -1 for implied 1; -SNG_FRACBITS for fraction */
 	(void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - SNG_FRACBITS);
 #ifdef DIAGNOSTIC
-	if ((fp->fp_mant[3] & SNG_EXP(1 << FP_NG)) == 0)
+	if ((fp->fp_mant[2] & SNG_EXP(1 << FP_NG)) == 0)
 		panic("fpu_ftos");
 #endif
-	if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(2))
+	if (round(fe, fp) && fp->fp_mant[2] == SNG_EXP(2))
 		exp++;
 	if (exp >= SNG_EXP_INFNAN) {
 		/* overflow to inf or to max single */
@@ -321,7 +312,7 @@
 	}
 done:
 	/* phew, made it */
-	return (sign | SNG_EXP(exp) | (fp->fp_mant[3] & SNG_MASK));
+	return (sign | SNG_EXP(exp) | (fp->fp_mant[2] & SNG_MASK));
 }
 
 /*
@@ -360,7 +351,7 @@
 	if ((exp = fp->fp_exp + DBL_EXP_BIAS) <= 0) {
 		fe->fe_fpsr |= FPSR_UNFL;
 		(void) fpu_shr(fp, FP_NMANT - FP_NG - DBL_FRACBITS - exp);
-		if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(1)) {
+		if (round(fe, fp) && fp->fp_mant[1] == DBL_EXP(1)) {
 			res[1] = 0;
 			return (sign | DBL_EXP(1) | 0);
 		}
@@ -371,7 +362,7 @@
 		goto done;
 	}
 	(void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - DBL_FRACBITS);
-	if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(2))
+	if (round(fe, fp) && fp->fp_mant[1] == DBL_EXP(2))
 		exp++;
 	if (exp >= DBL_EXP_INFNAN) {
 		fe->fe_fpsr |= FPSR_OPERR | FPSR_INEX2 | FPSR_OVFL;
@@ -383,8 +374,8 @@
 		return (sign | DBL_EXP(DBL_EXP_INFNAN) | DBL_MASK);
 	}
 done:
-	res[1] = fp->fp_mant[3];
-	return (sign | DBL_EXP(exp) | (fp->fp_mant[2] & DBL_MASK));
+	res[1] = fp->fp_mant[2];
+	return (sign | DBL_EXP(exp) | (fp->fp_mant[1] & DBL_MASK));
 }
 
 /*
@@ -404,10 +395,16 @@
 	register int exp;
 
 #define	EXT_EXP(e)	((e) << 16)
-#define	EXT_MASK	(EXT_EXP(1) - 1)
+/*
+ * on m68k extended prec, significand does not share the same long
+ * word with exponent
+ */
+#define	EXT_MASK	0
+#define EXT_EXPLICIT1	(1UL << (63 & 31))
+#define EXT_EXPLICIT2	(1UL << (64 & 31))
 
 	if (ISNAN(fp)) {
-		(void) fpu_shr(fp, FP_NMANT - 1 - EXT_FRACBITS);
+		(void) fpu_shr(fp, FP_NMANT - EXT_FRACBITS);
 		exp = EXT_EXP_INFNAN;
 		goto done;
 	}
@@ -421,12 +418,12 @@
 		return (sign);
 	}
 
-	if ((exp = fp->fp_exp + EXT_EXP_BIAS) <= 0) {
+	if ((exp = fp->fp_exp + EXT_EXP_BIAS) < 0) {
 		fe->fe_fpsr |= FPSR_UNFL;
 		/* I'm not sure about this <=... exp==0 doesn't mean
 		   it's a denormal in extended format */
 		(void) fpu_shr(fp, FP_NMANT - FP_NG - EXT_FRACBITS - exp);
-		if (round(fe, fp) && fp->fp_mant[2] == EXT_EXP(1)) {
+		if (round(fe, fp) && fp->fp_mant[1] == EXT_EXPLICIT1) {
 			res[1] = res[2] = 0;
 			return (sign | EXT_EXP(1) | 0);
 		}
@@ -436,8 +433,10 @@
 		exp = 0;
 		goto done;
 	}
+#if (FP_NMANT - FP_NG - EXT_FRACBITS) > 0
 	(void) fpu_shr(fp, FP_NMANT - FP_NG - EXT_FRACBITS);
-	if (round(fe, fp) && fp->fp_mant[2] == EXT_EXP(2))
+#endif
+	if (round(fe, fp) && fp->fp_mant[0] == EXT_EXPLICIT2)
 		exp++;
 	if (exp >= EXT_EXP_INFNAN) {
 		fe->fe_fpsr |= FPSR_OPERR | FPSR_INEX2 | FPSR_OVFL;
@@ -449,8 +448,8 @@
 		return (sign | EXT_EXP(EXT_EXP_INFNAN) | EXT_MASK);
 	}
 done:
-	res[1] = fp->fp_mant[2];
-	res[2] = fp->fp_mant[3];
+	res[1] = fp->fp_mant[1];
+	res[2] = fp->fp_mant[2];
 	return (sign | EXT_EXP(exp));
 }
 
--- ../../src/sys/arch/m68k/fpe/fpu_int.c	Tue Dec 12 05:35:52 1995
+++ fpu_int.c	Tue May 19 17:20:20 1998
@@ -50,7 +50,7 @@
   /* when |x| < 1.0 */
   if (x->fp_exp < 0) {
     x->fp_class = FPC_ZERO;
-    x->fp_mant[0] = x->fp_mant[1] = x->fp_mant[2] = x->fp_mant[3] = 0;
+    x->fp_mant[0] = x->fp_mant[1] = x->fp_mant[2] = 0;
     return x;
   }
 
@@ -63,7 +63,7 @@
   clr = 3 - sh / 32;
   mask = (0xffffffff << (sh % 32));
 
-  for (i = 3; i > clr; i--) {
+  for (i = 2; i > clr; i--) {
     x->fp_mant[i] = 0;
   }
   x->fp_mant[i] &= mask;
@@ -87,7 +87,7 @@
      that the result >= 1.0 when mantissa ~= 1.0 and rounded up */
   if (x->fp_exp < -1) {
     x->fp_class = FPC_ZERO;
-    x->fp_mant[0] = x->fp_mant[1] = x->fp_mant[2] = x->fp_mant[3] = 0;
+    x->fp_mant[0] = x->fp_mant[1] = x->fp_mant[2] = 0;
     return x;
   }
 
@@ -100,7 +100,7 @@
   fpu_shr(x, rsh - FP_NG);	/* shift to the right */
 
   if (round(fe, x) == 1 /* rounded up */ &&
-      x->fp_mant[3 - (FP_NMANT-rsh)/32] & (1 << ((FP_NMANT-rsh)%32))
+      x->fp_mant[2 - (FP_NMANT-rsh)/32] & (1 << ((FP_NMANT-rsh)%32))
       /* x >= 2.0 */) {
     rsh--;			/* reduce shift count by 1 */
     x->fp_exp++;		/* adjust exponent */
@@ -110,11 +110,11 @@
   wsh = rsh / 32;
   lsh = rsh % 32;
   rsh = 32 - lsh;
-  for (i = 0; i + wsh < 3; i++) {
+  for (i = 0; i + wsh < 2; i++) {
     x->fp_mant[i] = (x->fp_mant[i+wsh] << lsh) | (x->fp_mant[i+wsh+1] >> rsh);
   }
   x->fp_mant[i++] = (x->fp_mant[i+wsh] << lsh);
-  for (; i < 4; i++) {
+  for (; i < 3; i++) {
     x->fp_mant[i] = 0;
   }
 
--- ../../src/sys/arch/m68k/fpe/fpu_log.c	Tue Oct 15 01:10:15 1996
+++ fpu_log.c	Tue May 19 18:14:21 1998
@@ -204,8 +204,9 @@
     if ((-1 == X.fp_exp && (0xf07d0000U >> (31 - FP_LG)) <= X.fp_mant[0]) ||
 	(0 == X.fp_exp && X.fp_mant[0] <= (0x88410000U >> (31 - FP_LG)))) {
 	/* log near 1 */
-	if (fpu_debug_level & DL_ARITH)
-	    printf("__fpu_logn: log near 1\n");
+#if FPE_DEBUG
+	printf("__fpu_logn: log near 1\n");
+#endif
 
 	fpu_const(&fe->fe_f1, 0x32);
 	/* X+1 */
@@ -286,9 +287,10 @@
 	/* U+U*V*(B1+W*(B3+W*B5)+V*(B2+W*B4)) */
 	d = fpu_add(fe);
     } else /* the usual case */ {
-	if (fpu_debug_level & DL_ARITH)
-	    printf("__fpu_logn: the usual case. X=(%d,%08x,%08x...)\n",
-		   X.fp_exp, X.fp_mant[0], X.fp_mant[1]);
+#if FPE_DEBUG
+	printf("__fpu_logn: the usual case. X=(%d,%08x,%08x...)\n",
+	       X.fp_exp, X.fp_mant[0], X.fp_mant[1]);
+#endif
 
 	k = X.fp_exp;
 	/* X <- Y */
@@ -300,22 +302,24 @@
 	F.fp_exp = X.fp_exp;
 	F.fp_mant[0] = X.fp_mant[0] & (0xfe000000U >> (31 - FP_LG));
 	F.fp_mant[0] |= (0x01000000U >> (31 - FP_LG));
-	F.fp_mant[1] = F.fp_mant[2] = F.fp_mant[3] = 0;
+	F.fp_mant[1] = F.fp_mant[2] = 0;
 	F.fp_sticky = 0;
 
-	if (fpu_debug_level & DL_ARITH) {
-	    printf("__fpu_logn: X=Y*2^k=(%d,%08x,%08x...)*2^%d\n",
-		   fe->fe_f2.fp_exp, fe->fe_f2.fp_mant[0],
-		   fe->fe_f2.fp_mant[1], k);
-	    printf("__fpu_logn: F=(%d,%08x,%08x...)\n",
-		   F.fp_exp, F.fp_mant[0], F.fp_mant[1]);
-	}
+#if FPE_DEBUG
+	printf("__fpu_logn: X=Y*2^k=(%d,%08x,%08x...)*2^%d\n",
+	       fe->fe_f2.fp_exp, fe->fe_f2.fp_mant[0],
+	       fe->fe_f2.fp_mant[1], k);
+	printf("__fpu_logn: F=(%d,%08x,%08x...)\n",
+	       F.fp_exp, F.fp_mant[0], F.fp_mant[1]);
+#endif
 
 	/* index to the table */
-	i = (F.fp_mant[0] >> (FP_LG - 7)) & 0x7e;
+	i = ((F.fp_mant[0] << (7 - FP_LG)) |
+	     (F.fp_mant[1] >> (32 - (7 - FP_LG)))) & 0x7e;
 
-	if (fpu_debug_level & DL_ARITH)
-	    printf("__fpu_logn: index to logtbl i=%d(%x)\n", i, i);
+#if FPE_DEBUG
+	printf("__fpu_logn: index to logtbl i=%d(%x)\n", i, i);
+#endif
 
 	CPYFPN(&fe->fe_f1, &F);
 	/* -F */
@@ -326,16 +330,17 @@
 
 	/* fe_f2 = 1/F */
 	fe->fe_f2.fp_class = FPC_NUM;
-	fe->fe_f2.fp_sign = fe->fe_f2.fp_sticky = fe->fe_f2.fp_mant[3] = 0;
+	fe->fe_f2.fp_sign = fe->fe_f2.fp_sticky = fe->fe_f2.fp_mant[2] = 0;
 	fe->fe_f2.fp_exp = logtbl[i].sp_exp;
 	fe->fe_f2.fp_mant[0] = (logtbl[i].sp_m0 >> (31 - FP_LG));
 	fe->fe_f2.fp_mant[1] = (logtbl[i].sp_m0 << (FP_LG + 1)) |
 	    (logtbl[i].sp_m1 >> (31 - FP_LG));
 	fe->fe_f2.fp_mant[2] = (u_int)(logtbl[i].sp_m1 << (FP_LG + 1));
 
-	if (fpu_debug_level & DL_ARITH)
-	    printf("__fpu_logn: 1/F=(%d,%08x,%08x...)\n", fe->fe_f2.fp_exp,
-		   fe->fe_f2.fp_mant[0], fe->fe_f2.fp_mant[1]);
+#if FPE_DEBUG
+	printf("__fpu_logn: 1/F=(%d,%08x,%08x...)\n", fe->fe_f2.fp_exp,
+	       fe->fe_f2.fp_mant[0], fe->fe_f2.fp_mant[1]);
+#endif
 
 	/* U = (Y-F) * (1/F) */
 	d = fpu_mul(fe);
@@ -345,12 +350,12 @@
 	/* fe_f1 == (fpn)k */
 	fpu_explode(fe, &fe->fe_f1, FTYPE_LNG, &k);
 	(void)fpu_const(&fe->fe_f2, 0x30 /* ln(2) */);
-	if (fpu_debug_level & DL_ARITH) {
-	    printf("__fpu_logn: fp(k)=(%d,%08x,%08x...)\n", fe->fe_f1.fp_exp,
-		   fe->fe_f1.fp_mant[0], fe->fe_f1.fp_mant[1]);
-	    printf("__fpu_logn: ln(2)=(%d,%08x,%08x...)\n", fe->fe_f2.fp_exp,
-		   fe->fe_f2.fp_mant[0], fe->fe_f2.fp_mant[1]);
-	}
+#if FPE_DEBUG
+	printf("__fpu_logn: fp(k)=(%d,%08x,%08x...)\n", fe->fe_f1.fp_exp,
+	       fe->fe_f1.fp_mant[0], fe->fe_f1.fp_mant[1]);
+	printf("__fpu_logn: ln(2)=(%d,%08x,%08x...)\n", fe->fe_f2.fp_exp,
+	       fe->fe_f2.fp_mant[0], fe->fe_f2.fp_mant[1]);
+#endif
 	/* K * LOGOF2 */
 	d = fpu_mul(fe);
 	CPYFPN(&KLOG2, d);
@@ -428,16 +433,17 @@
 	i++;
 	/* fe_f2 = logtbl[i+1] (== LOG(F)) */
 	fe->fe_f2.fp_class = FPC_NUM;
-	fe->fe_f2.fp_sign = fe->fe_f2.fp_sticky = fe->fe_f2.fp_mant[3] = 0;
+	fe->fe_f2.fp_sign = fe->fe_f2.fp_sticky = fe->fe_f2.fp_mant[2] = 0;
 	fe->fe_f2.fp_exp = logtbl[i].sp_exp;
 	fe->fe_f2.fp_mant[0] = (logtbl[i].sp_m0 >> (31 - FP_LG));
 	fe->fe_f2.fp_mant[1] = (logtbl[i].sp_m0 << (FP_LG + 1)) |
 	    (logtbl[i].sp_m1 >> (31 - FP_LG));
 	fe->fe_f2.fp_mant[2] = (logtbl[i].sp_m1 << (FP_LG + 1));
 
-	if (fpu_debug_level & DL_ARITH)
-	    printf("__fpu_logn: ln(F)=(%d,%08x,%08x,...)\n", fe->fe_f2.fp_exp,
-		   fe->fe_f2.fp_mant[0], fe->fe_f2.fp_mant[1]);
+#if FPE_DEBUG
+	printf("__fpu_logn: ln(F)=(%d,%08x,%08x,...)\n", fe->fe_f2.fp_exp,
+	       fe->fe_f2.fp_mant[0], fe->fe_f2.fp_mant[1]);
+#endif
 
 	/* LOG(F)+U*V*(A2+V*(A4+V*A6)) */
 	d = fpu_add(fe);
@@ -446,10 +452,11 @@
 	/* LOG(F)+U+V*(A1+V*(A3+V*A5))+U*V*(A2+V*(A4+V*A6)) */
 	d = fpu_add(fe);
 
-	if (fpu_debug_level & DL_ARITH)
-	    printf("__fpu_logn: ln(Y)=(%c,%d,%08x,%08x,%08x,%08x)\n",
-		   d->fp_sign ? '-' : '+', d->fp_exp,
-		   d->fp_mant[0], d->fp_mant[1], d->fp_mant[2], d->fp_mant[3]);
+#if FPE_DEBUG
+	printf("__fpu_logn: ln(Y)=(%c,%d,%08x,%08x,%08x)\n",
+	       d->fp_sign ? '-' : '+', d->fp_exp,
+	       d->fp_mant[0], d->fp_mant[1], d->fp_mant[2]);
+#endif
 
 	CPYFPN(&fe->fe_f1, d);
 	CPYFPN(&fe->fe_f2, &KLOG2);
@@ -514,7 +521,7 @@
 	} else if (fp->fp_class == FPC_NUM) {
 	    /* the real work here */
 	    if (fp->fp_mant[0] == FP_1 && fp->fp_mant[1] == 0 &&
-		fp->fp_mant[2] == 0 && fp->fp_mant[3] == 0) {
+		fp->fp_mant[2] == 0) {
 		/* fp == 2.0 ^ exp <--> log2(fp) == exp */
 		fpu_explode(fe, &fe->fe_f3, FTYPE_LNG, &fp->fp_exp);
 		fp = &fe->fe_f3;
--- ../../src/sys/arch/m68k/fpe/fpu_mul.c	Tue Dec 12 05:35:52 1995
+++ fpu_mul.c	Tue May 19 15:40:02 1998
@@ -104,7 +104,7 @@
 	register struct fpemu *fe;
 {
 	register struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2;
-	register u_int a3, a2, a1, a0, x3, x2, x1, x0, bit, m;
+	register u_int a2, a1, a0, x2, x1, x0, bit, m;
 	register int sticky;
 	FPU_DECL_CARRY
 
@@ -145,24 +145,22 @@
 	 * mantissa byte from y.  The variable `bit' denotes the bit
 	 * within m.  We also define some macros to deal with everything.
 	 */
-	x3 = x->fp_mant[3];
 	x2 = x->fp_mant[2];
 	x1 = x->fp_mant[1];
 	x0 = x->fp_mant[0];
-	sticky = a3 = a2 = a1 = a0 = 0;
+	sticky = a2 = a1 = a0 = 0;
 
 #define	ADD	/* A += X */ \
-	FPU_ADDS(a3, a3, x3); \
-	FPU_ADDCS(a2, a2, x2); \
+	FPU_ADDS(a2, a2, x2); \
 	FPU_ADDCS(a1, a1, x1); \
 	FPU_ADDC(a0, a0, x0)
 
 #define	SHR1	/* A >>= 1, with sticky */ \
-	sticky |= a3 & 1, a3 = (a3 >> 1) | (a2 << 31), \
+	sticky |= a2 & 1, \
 	a2 = (a2 >> 1) | (a1 << 31), a1 = (a1 >> 1) | (a0 << 31), a0 >>= 1
 
 #define	SHR32	/* A >>= 32, with sticky */ \
-	sticky |= a3, a3 = a2, a2 = a1, a1 = a0, a0 = 0
+	sticky |= a2, a2 = a1, a1 = a0, a0 = 0
 
 #define	STEP	/* each 1-bit step of the multiplication */ \
 	SHR1; if (bit & m) { ADD; }; bit <<= 1
@@ -175,7 +173,7 @@
 	 * The last word of y has its highest 1-bit in position FP_NMANT-1,
 	 * so we stop the loop when we move past that bit.
 	 */
-	if ((m = y->fp_mant[3]) == 0) {
+	if ((m = y->fp_mant[2]) == 0) {
 		/* SHR32; */			/* unneeded since A==0 */
 	} else {
 		bit = 1 << FP_NG;
@@ -183,14 +181,6 @@
 			STEP;
 		} while (bit != 0);
 	}
-	if ((m = y->fp_mant[2]) == 0) {
-		SHR32;
-	} else {
-		bit = 1;
-		do {
-			STEP;
-		} while (bit != 0);
-	}
 	if ((m = y->fp_mant[1]) == 0) {
 		SHR32;
 	} else {
@@ -218,7 +208,6 @@
 	x->fp_sign ^= y->fp_sign;
 	x->fp_exp = m;
 	x->fp_sticky = sticky;
-	x->fp_mant[3] = a3;
 	x->fp_mant[2] = a2;
 	x->fp_mant[1] = a1;
 	x->fp_mant[0] = a0;
--- ../../src/sys/arch/m68k/fpe/fpu_rem.c	Thu May  2 20:39:42 1996
+++ fpu_rem.c	Tue May 19 17:23:07 1998
@@ -125,14 +125,12 @@
 	 */
 	while (y->fp_exp != r->fp_exp || y->fp_mant[0] != r->fp_mant[0] ||
 	       y->fp_mant[1] != r->fp_mant[1] ||
-	       y->fp_mant[2] != r->fp_mant[2] ||
-	       y->fp_mant[3] != r->fp_mant[3]) {
+	       y->fp_mant[2] != r->fp_mant[2]) {
 
 	    /* Step 3.2 */
 	    if (y->fp_exp < r->fp_exp || y->fp_mant[0] < r->fp_mant[0] ||
 		y->fp_mant[1] < r->fp_mant[1] ||
-		y->fp_mant[2] < r->fp_mant[2] ||
-		y->fp_mant[3] < r->fp_mant[3]) {
+		y->fp_mant[2] < r->fp_mant[2]) {
 		CPYFPN(&fe->fe_f1, r);
 		CPYFPN(&fe->fe_f2, y);
 		fe->fe_f2.fp_sign = 1;
@@ -165,13 +163,13 @@
     if (r->fp_exp + 1 < y->fp_exp ||
 	(r->fp_exp + 1 == y->fp_exp &&
 	 (r->fp_mant[0] < y->fp_mant[0] || r->fp_mant[1] < y->fp_mant[1] ||
-	  r->fp_mant[2] < y->fp_mant[3] || r->fp_mant[4] < y->fp_mant[4])))
+	  r->fp_mant[2] < y->fp_mant[2])))
 	/* if r < y/2 */
 	goto Step6;
     /* Step 5.2 */
     if (r->fp_exp + 1 != y->fp_exp ||
 	r->fp_mant[0] != y->fp_mant[0] || r->fp_mant[1] != y->fp_mant[1] ||
-	r->fp_mant[2] != y->fp_mant[2] || r->fp_mant[3] != y->fp_mant[3]) {
+	r->fp_mant[2] != y->fp_mant[2]) {
 	/* if (!(r < y/2) && !(r == y/2)) */
 	Last_Subtract = 1;
 	q++;
--- ../../src/sys/arch/m68k/fpe/fpu_sqrt.c	Tue Dec 12 05:35:52 1995
+++ fpu_sqrt.c	Tue Jun  2 19:57:34 1998
@@ -192,9 +192,9 @@
 {
 	register struct fpn *x = &fe->fe_f2;
 	register u_int bit, q, tt;
-	register u_int x0, x1, x2, x3;
-	register u_int y0, y1, y2, y3;
-	register u_int d0, d1, d2, d3;
+	register u_int x0, x1, x2;
+	register u_int y0, y1, y2;
+	register u_int d0, d1, d2;
 	register int e;
 	FPU_DECL_CARRY
 
@@ -224,13 +224,13 @@
 	 */
 #ifdef FPU_SHL1_BY_ADD
 #define	DOUBLE_X { \
-	FPU_ADDS(x3, x3, x3); FPU_ADDCS(x2, x2, x2); \
+	FPU_ADDS(x2, x2, x2); \
 	FPU_ADDCS(x1, x1, x1); FPU_ADDC(x0, x0, x0); \
 }
 #else
 #define	DOUBLE_X { \
 	x0 = (x0 << 1) | (x1 >> 31); x1 = (x1 << 1) | (x2 >> 31); \
-	x2 = (x2 << 1) | (x3 >> 31); x3 <<= 1; \
+	x2 <<= 1; \
 }
 #endif
 #if (FP_NMANT & 1) != 0
@@ -243,7 +243,6 @@
 	x0 = x->fp_mant[0];
 	x1 = x->fp_mant[1];
 	x2 = x->fp_mant[2];
-	x3 = x->fp_mant[3];
 	e = x->fp_exp;
 	if (e & 1)		/* exponent is odd; use sqrt(2mant) */
 		DOUBLE_X;
@@ -262,7 +261,7 @@
 	 * outside of each per-word loop.
 	 *
 	 * The calculation `t = y + bit' breaks down into `t0 = y0, ...,
-	 * t3 = y3, t? |= bit' for the appropriate word.  Since the bit
+	 * t2 = y2, t? |= bit' for the appropriate word.  Since the bit
 	 * is always a `new' one, this means that three of the `t?'s are
 	 * just the corresponding `y?'; we use `#define's here for this.
 	 * The variable `tt' holds the actual `t?' variable.
@@ -355,44 +354,10 @@
 	x->fp_mant[2] = q;
 #undef t2
 
-	/* calculate q3.  y0, t0, y1, t1 all fixed; y2, t2, almost done. */
-#define t2 y2
-#define t3 tt
-	q = 0;
-	y3 = 0;
-	bit = 1 << 31;
-	EVEN_DOUBLE;
-	t3 = bit;
-	FPU_SUBS(d3, x3, t3);
-	FPU_SUBCS(d2, x2, t2);
-	FPU_SUBCS(d1, x1, t1);
-	FPU_SUBC(d0, x0, t0);
-	ODD_DOUBLE;
-	if ((int)d0 >= 0) {
-		x0 = d0, x1 = d1, x2 = d2;
-		q |= bit;
-		y2 |= 1;
-	}
-	while ((bit >>= 1) != 0) {
-		EVEN_DOUBLE;
-		t3 = y3 | bit;
-		FPU_SUBS(d3, x3, t3);
-		FPU_SUBCS(d2, x2, t2);
-		FPU_SUBCS(d1, x1, t1);
-		FPU_SUBC(d0, x0, t0);
-		if ((int)d0 >= 0) {
-			x0 = d0, x1 = d1, x2 = d2;
-			q |= bit;
-			y3 |= bit << 1;
-		}
-		ODD_DOUBLE;
-	}
-	x->fp_mant[3] = q;
-
 	/*
 	 * The result, which includes guard and round bits, is exact iff
 	 * x is now zero; any nonzero bits in x represent sticky bits.
 	 */
-	x->fp_sticky = x0 | x1 | x2 | x3;
+	x->fp_sticky = x0 | x1 | x2;
 	return (x);
 }
--- ../../src/sys/arch/m68k/fpe/fpu_subr.c	Thu May  2 20:39:42 1996
+++ fpu_subr.c	Fri May 22 19:50:59 1998
@@ -65,31 +65,29 @@
 int
 fpu_shr(register struct fpn *fp, register int rsh)
 {
-	register u_int m0, m1, m2, m3, s;
+	register u_int m0, m1, m2, s;
 	register int lsh;
 
 #ifdef DIAGNOSTIC
-	if (rsh <= 0 || (fp->fp_class != FPC_NUM && !ISNAN(fp)))
+	if (rsh < 0 || (fp->fp_class != FPC_NUM && !ISNAN(fp)))
 		panic("fpu_rightshift 1");
 #endif
 
 	m0 = fp->fp_mant[0];
 	m1 = fp->fp_mant[1];
 	m2 = fp->fp_mant[2];
-	m3 = fp->fp_mant[3];
 
 	/* If shifting all the bits out, take a shortcut. */
 	if (rsh >= FP_NMANT) {
 #ifdef DIAGNOSTIC
-		if ((m0 | m1 | m2 | m3) == 0)
+		if ((m0 | m1 | m2) == 0)
 			panic("fpu_rightshift 2");
 #endif
 		fp->fp_mant[0] = 0;
 		fp->fp_mant[1] = 0;
 		fp->fp_mant[2] = 0;
-		fp->fp_mant[3] = 0;
 #ifdef notdef
-		if ((m0 | m1 | m2 | m3) == 0)
+		if ((m0 | m1 | m2) == 0)
 			fp->fp_class = FPC_ZERO;
 		else
 #endif
@@ -99,22 +97,18 @@
 
 	/* Squish out full words. */
 	s = fp->fp_sticky;
-	if (rsh >= 32 * 3) {
-		s |= m3 | m2 | m1;
-		m3 = m0, m2 = 0, m1 = 0, m0 = 0;
-	} else if (rsh >= 32 * 2) {
-		s |= m3 | m2;
-		m3 = m1, m2 = m0, m1 = 0, m0 = 0;
+	if (rsh >= 32 * 2) {
+		s |= m2 | m1;
+		m2 = m0, m1 = 0, m0 = 0;
 	} else if (rsh >= 32) {
-		s |= m3;
-		m3 = m2, m2 = m1, m1 = m0, m0 = 0;
+		s |= m2;
+		m2 = m1, m1 = m0, m0 = 0;
 	}
 
 	/* Handle any remaining partial word. */
 	if ((rsh &= 31) != 0) {
 		lsh = 32 - rsh;
-		s |= m3 << lsh;
-		m3 = (m3 >> rsh) | (m2 << lsh);
+		s |= m2 << lsh;
 		m2 = (m2 >> rsh) | (m1 << lsh);
 		m1 = (m1 >> rsh) | (m0 << lsh);
 		m0 >>= rsh;
@@ -122,7 +116,6 @@
 	fp->fp_mant[0] = m0;
 	fp->fp_mant[1] = m1;
 	fp->fp_mant[2] = m2;
-	fp->fp_mant[3] = m3;
 	fp->fp_sticky = s;
 	return (s);
 }
@@ -134,29 +127,32 @@
  *
  * Internally, this may use a `supernormal' -- a number whose fp_mant
  * is greater than or equal to 2.0 -- so as a side effect you can hand it
- * a supernormal and it will fix it (provided fp->fp_mant[3] == 0).
+ * a supernormal and it will fix it (provided fp->fp_mant[2] == 0).
  */
 void
 fpu_norm(register struct fpn *fp)
 {
-	register u_int m0, m1, m2, m3, top, sup, nrm;
+	register u_int m0, m1, m2, sup, nrm;
 	register int lsh, rsh, exp;
 
 	exp = fp->fp_exp;
 	m0 = fp->fp_mant[0];
 	m1 = fp->fp_mant[1];
 	m2 = fp->fp_mant[2];
-	m3 = fp->fp_mant[3];
 
 	/* Handle severe subnormals with 32-bit moves. */
 	if (m0 == 0) {
-		if (m1)
-			m0 = m1, m1 = m2, m2 = m3, m3 = 0, exp -= 32;
-		else if (m2)
-			m0 = m2, m1 = m3, m2 = 0, m3 = 0, exp -= 2 * 32;
-		else if (m3)
-			m0 = m3, m1 = 0, m2 = 0, m3 = 0, exp -= 3 * 32;
-		else {
+		if (m1) {
+			m0 = m1;
+			m1 = m2;
+			m2 = 0;
+			exp -= 32;
+		} else if (m2) {
+			m0 = m2;
+			m1 = 0;
+			m2 = 0;
+			exp -= 2 * 32;
+		} else {
 			fp->fp_class = FPC_ZERO;
 			return;
 		}
@@ -168,36 +164,33 @@
 	if (m0 >= sup) {
 		/*
 		 * We have a supernormal number.  We need to shift it right.
-		 * We may assume m3==0.
+		 * We may assume m2==0.
 		 */
-		for (rsh = 1, top = m0 >> 1; top >= sup; rsh++)	/* XXX slow */
-			top >>= 1;
+		asm volatile("bfffo %1{#0:#32},%0" : "=d"(rsh) : "g"(m0));
+		rsh = 31 - rsh - FP_LG;
 		exp += rsh;
 		lsh = 32 - rsh;
-		m3 = m2 << lsh;
-		m2 = (m2 >> rsh) | (m1 << lsh);
+		m2 = m1 << lsh;
 		m1 = (m1 >> rsh) | (m0 << lsh);
-		m0 = top;
+		m0 = (m0 >> rsh);
 	} else if (m0 < nrm) {
 		/*
 		 * We have a regular denorm (a subnormal number), and need
 		 * to shift it left.
 		 */
-		for (lsh = 1, top = m0 << 1; top < nrm; lsh++)	/* XXX slow */
-			top <<= 1;
+		asm volatile("bfffo %1{#0:#32},%0" : "=d"(lsh) : "g"(m0));
+		lsh = FP_LG - 31 + lsh;
 		exp -= lsh;
 		rsh = 32 - lsh;
-		m0 = top | (m1 >> rsh);
+		m0 = (m0 << lsh) | (m1 >> rsh);
 		m1 = (m1 << lsh) | (m2 >> rsh);
-		m2 = (m2 << lsh) | (m3 >> rsh);
-		m3 <<= lsh;
+		m2 <<= lsh;
 	}
 
 	fp->fp_exp = exp;
 	fp->fp_mant[0] = m0;
 	fp->fp_mant[1] = m1;
 	fp->fp_mant[2] = m2;
-	fp->fp_mant[3] = m3;
 }
 
 /*
@@ -214,6 +207,6 @@
 	fp->fp_class = FPC_QNAN;
 	fp->fp_sign = 0;
 	fp->fp_mant[0] = FP_1 - 1;
-	fp->fp_mant[1] = fp->fp_mant[2] = fp->fp_mant[3] = ~0;
+	fp->fp_mant[1] = fp->fp_mant[2] = ~0;
 	return (fp);
 }
--- /dev/null	Wed Mar  3 22:12:51 1999
+++ fpu_ea.c	Tue May 19 17:07:43 1998
@@ -0,0 +1,776 @@
+/*	$NetBSD: fpu_calcea.c,v 1.7 1996/10/16 06:27:05 scottr Exp $	*/
+
+/*
+ * Copyright (c) 1995 Gordon W. Ross
+ * portion Copyright (c) 1995 Ken Nakata
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ * 4. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Gordon Ross
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/signal.h>
+#include <sys/systm.h>
+#include <machine/frame.h>
+
+#include "fpu_emulate.h"
+
+/*
+ * Prototypes of static functions
+ */
+static int decode_ea6 __P((struct frame *frame, struct instruction *insn,
+			   struct insn_ea *ea, int modreg));
+static int fetch_immed __P((struct frame *frame, struct instruction *insn,
+			    int *dst));
+static int fetch_disp __P((struct frame *frame, struct instruction *insn,
+			   int size, int *res));
+static int calc_ea __P((struct insn_ea *ea, char *ptr, char **eaddr));
+
+/*
+ * Helper routines for dealing with "effective address" values.
+ */
+
+/*
+ * Decode an effective address into internal form.
+ * Returns zero on success, else signal number.
+ */
+int
+fpu_decode_ea(frame, insn, ea, modreg)
+     struct frame *frame;
+     struct instruction *insn;
+     struct insn_ea *ea;
+     int modreg;
+{
+    int sig;
+
+#ifdef DEBUG
+    if (insn->is_datasize < 0) {
+	panic("decode_ea: called with uninitialized datasize\n");
+    }
+#endif
+
+    sig = 0;
+
+    /* Set the most common value here. */
+    ea->ea_regnum = 8 + (modreg & 7);
+
+    if ((modreg & 060) == 0) {
+	/* register direct */
+	ea->ea_regnum = modreg & 0xf;
+	ea->ea_flags = EA_DIRECT;
+#ifdef DEBUG_FPE
+	printf("decode_ea: register direct reg=%d\n", ea->ea_regnum);
+#endif
+    } else if (modreg == 074) {
+	/* immediate */
+	ea->ea_flags = EA_IMMED;
+	sig = fetch_immed(frame, insn, &ea->ea_immed[0]);
+#ifdef DEBUG_FPE
+	printf("decode_ea: immediate size=%d\n", insn->is_datasize);
+#endif
+    }
+    /*
+     * rest of the address modes need to be separately
+     * handled for the LC040 and the others.
+     */
+    else if (frame->f_format == 4) {
+	/* LC040 */
+	ea->ea_flags = EA_FRAME_EA;
+	ea->ea_fea = frame->f_fmt4.f_fa;
+#ifdef DEBUG_FPE
+	printf("decode_ea: 68LC040 - in-frame EA (%p)\n", (void *)ea->ea_fea);
+#endif
+	if ((modreg & 070) == 030) {
+	    /* postincrement mode */
+	    ea->ea_flags |= EA_POSTINCR;
+	} else if ((modreg & 070) == 040) {
+	    /* predecrement mode */
+	    ea->ea_flags |= EA_PREDECR;
+	}
+    } else {
+	/* 020/030 */
+	switch (modreg & 070) {
+
+	case 020:			/* (An) */
+	    ea->ea_flags = 0;
+#ifdef DEBUG_FPE
+	    printf("decode_ea: register indirect reg=%d\n", ea->ea_regnum);
+#endif
+	    break;
+
+	case 030:			/* (An)+ */
+	    ea->ea_flags = EA_POSTINCR;
+#ifdef DEBUG_FPE
+	    printf("decode_ea: reg indirect postincrement reg=%d\n",
+		   ea->ea_regnum);
+#endif
+	    break;
+
+	case 040:			/* -(An) */
+	    ea->ea_flags = EA_PREDECR;
+#ifdef DEBUG_FPE
+	    printf("decode_ea: reg indirect predecrement reg=%d\n",
+		   ea->ea_regnum);
+#endif
+	    break;
+
+	case 050:			/* (d16,An) */
+	    ea->ea_flags = EA_OFFSET;
+	    sig = fetch_disp(frame, insn, 1, &ea->ea_offset);
+#ifdef DEBUG_FPE
+	    printf("decode_ea: reg indirect with displacement reg=%d\n",
+		   ea->ea_regnum);
+#endif
+	    break;
+
+	case 060:			/* (d8,An,Xn) */
+	    ea->ea_flags = EA_INDEXED;
+	    sig = decode_ea6(frame, insn, ea, modreg);
+	    break;
+
+	case 070:			/* misc. */
+	    ea->ea_regnum = (modreg & 7);
+	    switch (modreg & 7) {
+
+	    case 0:			/* (xxxx).W */
+		ea->ea_flags = EA_ABS;
+		sig = fetch_disp(frame, insn, 1, &ea->ea_absaddr);
+#ifdef DEBUG_FPE
+		printf("decode_ea: absolute address (word)\n");
+#endif
+		break;
+
+	    case 1:			/* (xxxxxxxx).L */
+		ea->ea_flags = EA_ABS;
+		sig = fetch_disp(frame, insn, 2, &ea->ea_absaddr);
+#ifdef DEBUG_FPE
+		printf("decode_ea: absolute address (long)\n");
+#endif
+		break;
+
+	    case 2:			/* (d16,PC) */
+		ea->ea_flags = EA_PC_REL | EA_OFFSET;
+		sig = fetch_disp(frame, insn, 1, &ea->ea_absaddr);
+#ifdef DEBUG_FPE
+		printf("decode_ea: pc relative word displacement\n");
+#endif
+		break;
+
+	    case 3:			/* (d8,PC,Xn) */
+		ea->ea_flags = EA_PC_REL | EA_INDEXED;
+		sig = decode_ea6(frame, insn, ea, modreg);
+		break;
+
+	    case 4:			/* #data */
+		/* it should have been taken care of earlier */
+	    default:
+#ifdef DEBUG_FPE
+		printf("decode_ea: invalid addr mode (7,%d)\n", modreg & 7);
+#endif
+		return SIGILL;
+	    } /* switch for mode 7 */
+	    break;
+	} /* switch mode */
+    }
+    ea->ea_moffs = 0;
+
+    return sig;
+}
+
+/*
+ * Decode Mode=6 address modes
+ */
+static int
+decode_ea6(frame, insn, ea, modreg)
+     struct frame *frame;
+     struct instruction *insn;
+     struct insn_ea *ea;
+     int modreg;
+{
+    int extword, idx;
+    int basedisp, outerdisp;
+    int bd_size, od_size;
+    int sig;
+
+    extword = fusword((void *) (insn->is_pc + insn->is_advance));
+    if (extword < 0) {
+	return SIGSEGV;
+    }
+    insn->is_advance += 2;
+
+    /* get register index */
+    ea->ea_idxreg = (extword >> 12) & 0xf;
+    idx = frame->f_regs[ea->ea_idxreg];
+    if ((extword & 0x0800) == 0) {
+	/* if word sized index, sign-extend */
+	idx &= 0xffff;
+	if (idx & 0x8000) {
+	    idx |= 0xffff0000;
+	}
+    }
+    /* scale register index */
+    idx <<= ((extword >>9) & 3);
+
+    if ((extword & 0x100) == 0) {
+	/* brief extention word - sign-extend the displacement */
+	basedisp = (extword & 0xff);
+	if (basedisp & 0x80) {
+	    basedisp |= 0xffffff00;
+	}
+
+	ea->ea_basedisp = idx + basedisp;
+	ea->ea_outerdisp = 0;
+#if DEBUG_FPE
+	printf("decode_ea6: brief ext word idxreg=%d, basedisp=%08x\n",
+	       ea->ea_idxreg, ea->ea_basedisp);
+#endif
+    } else {
+	/* full extention word */
+	if (extword & 0x80) {
+	    ea->ea_flags |= EA_BASE_SUPPRSS;
+	}
+	bd_size = ((extword >> 4) & 3) - 1;
+	od_size = (extword & 3) - 1;
+	sig = fetch_disp(frame, insn, bd_size, &basedisp);
+	if (sig) {
+	    return sig;
+	}
+	if (od_size >= 0) {
+	    ea->ea_flags |= EA_MEM_INDIR;
+	}
+	sig = fetch_disp(frame, insn, od_size, &outerdisp);
+	if (sig) {
+	    return sig;
+	}
+
+	switch (extword & 0x44) {
+	case 0:			/* preindexed */
+	    ea->ea_basedisp = basedisp + idx;
+	    ea->ea_outerdisp = outerdisp;
+	    break;
+	case 4:			/* postindexed */
+	    ea->ea_basedisp = basedisp;
+	    ea->ea_outerdisp = outerdisp + idx;
+	    break;
+	case 0x40:		/* no index */
+	    ea->ea_basedisp = basedisp;
+	    ea->ea_outerdisp = outerdisp;
+	    break;
+	default:
+#ifdef DEBUG
+	    printf("decode_ea6: invalid indirect mode: ext word %04x\n",
+		   extword);
+#endif
+	    return SIGILL;
+	    break;
+	}
+#if DEBUG_FPE
+	printf("decode_ea6: full ext idxreg=%d, basedisp=%x, outerdisp=%x\n",
+	       ea->ea_idxreg, ea->ea_basedisp, ea->ea_outerdisp);
+#endif
+    }
+#if DEBUG_FPE
+    printf("decode_ea6: regnum=%d, flags=%x\n",
+	   ea->ea_regnum, ea->ea_flags);
+#endif
+    return 0;
+}
+
+/*
+ * Load a value from an effective address.
+ * Returns zero on success, else signal number.
+ */
+int
+fpu_load_ea(frame, insn, ea, dst)
+     struct frame *frame;
+     struct instruction *insn;
+     struct insn_ea *ea;
+     char *dst;
+{
+    int *reg;
+    char *src;
+    int len, step;
+    int sig;
+
+#ifdef DIAGNOSTIC
+    if (ea->ea_regnum & ~0xF) {
+	panic("load_ea: bad regnum");
+    }
+#endif
+
+#ifdef DEBUG_FPE
+    printf("load_ea: frame at %p\n", frame);
+#endif
+    /* dst is always int or larger. */
+    len = insn->is_datasize;
+    if (len < 4) {
+	dst += (4 - len);
+    }
+    step = (len == 1 && ea->ea_regnum == 15 /* sp */) ? 2 : len;
+
+    if (ea->ea_flags & EA_FRAME_EA) {
+	/* Using LC040 frame EA */
+#ifdef DEBUG_FPE
+	if (ea->ea_flags & (EA_PREDECR|EA_POSTINCR)) {
+	    printf("load_ea: frame ea %08x w/r%d\n",
+		   ea->ea_fea, ea->ea_regnum);
+	} else {
+	    printf("load_ea: frame ea %08x\n", ea->ea_fea);
+	}
+#endif
+	src = (char *)ea->ea_fea;
+	copyin(src + ea->ea_moffs, dst, len);
+	if (ea->ea_flags & EA_PREDECR) {
+	    frame->f_regs[ea->ea_regnum] = ea->ea_fea;
+	    ea->ea_fea -= step;
+	    ea->ea_moffs = 0;
+	} else if (ea->ea_flags & EA_POSTINCR) {
+	    ea->ea_fea += step;
+	    frame->f_regs[ea->ea_regnum] = ea->ea_fea;
+	    ea->ea_moffs = 0;
+	} else {
+	    ea->ea_moffs += step;
+	}
+	/* That's it, folks */
+    } else if (ea->ea_flags & EA_DIRECT) {
+	if (len > 4) {
+#ifdef DEBUG
+	    printf("load_ea: operand doesn't fit cpu reg\n");
+#endif
+	    return SIGILL;
+	}
+	if (ea->ea_moffs > 0) {
+#ifdef DEBUG
+	    printf("load_ea: more than one move from cpu reg\n");
+#endif
+	    return SIGILL;
+	}
+	src = (char *)&frame->f_regs[ea->ea_regnum];
+	/* The source is an int. */
+	if (len < 4) {
+	    src += (4 - len);
+#ifdef DEBUG_FPE
+	    printf("load_ea: short/byte opr - addr adjusted\n");
+#endif
+	}
+#ifdef DEBUG_FPE
+	printf("load_ea: src %p\n", src);
+#endif
+	bcopy(src, dst, len);
+    } else if (ea->ea_flags & EA_IMMED) {
+#ifdef DEBUG_FPE
+	printf("load_ea: immed %08x%08x%08x size %d\n",
+	       ea->ea_immed[0], ea->ea_immed[1], ea->ea_immed[2], len);
+#endif
+	src = (char *)&ea->ea_immed[0];
+	if (len < 4) {
+	    src += (4 - len);
+#ifdef DEBUG_FPE
+	    printf("load_ea: short/byte immed opr - addr adjusted\n");
+#endif
+	}
+	bcopy(src, dst, len);
+    } else if (ea->ea_flags & EA_ABS) {
+#ifdef DEBUG_FPE
+	printf("load_ea: abs addr %08x\n", ea->ea_absaddr);
+#endif
+	src = (char *)ea->ea_absaddr;
+	copyin(src, dst, len);
+    } else /* register indirect */ { 
+	if (ea->ea_flags & EA_PC_REL) {
+#ifdef DEBUG_FPE
+	    printf("load_ea: using PC\n");
+#endif
+	    reg = NULL;
+	    /* Grab the register contents. 4 is offset to the first
+	       extention word from the opcode */
+	    src = (char *)insn->is_pc + 4;
+#ifdef DEBUG_FPE
+	    printf("load_ea: pc relative pc+4 = %p\n", src);
+#endif
+	} else /* not PC relative */ {
+#ifdef DEBUG_FPE
+	    printf("load_ea: using register %c%d\n",
+		   (ea->ea_regnum >= 8) ? 'a' : 'd', ea->ea_regnum & 7);
+#endif
+	    /* point to the register */
+	    reg = &frame->f_regs[ea->ea_regnum];
+
+	    if (ea->ea_flags & EA_PREDECR) {
+#ifdef DEBUG_FPE
+		printf("load_ea: predecr mode - reg decremented\n");
+#endif
+		*reg -= step;
+		ea->ea_moffs = 0;
+	    }
+
+	    /* Grab the register contents. */
+	    src = (char *)*reg;
+#ifdef DEBUG_FPE
+	    printf("load_ea: reg indirect reg = %p\n", src);
+#endif
+	}
+
+	sig = calc_ea(ea, src, &src);
+	if (sig)
+	    return sig;
+
+	copyin(src + ea->ea_moffs, dst, len);
+
+	/* do post-increment */
+	if (ea->ea_flags & EA_POSTINCR) {
+	    if (ea->ea_flags & EA_PC_REL) {
+#ifdef DEBUG
+		printf("load_ea: tried to postincrement PC\n");
+#endif
+		return SIGILL;
+	    }
+	    *reg += step;
+	    ea->ea_moffs = 0;
+#ifdef DEBUG_FPE
+	    printf("load_ea: postinc mode - reg incremented\n");
+#endif
+	} else {
+	    ea->ea_moffs += len;
+	}
+    }
+
+    return 0;
+}
+
+/*
+ * Store a value at the effective address.
+ * Returns zero on success, else signal number.
+ */
+int
+fpu_store_ea(frame, insn, ea, src)
+     struct frame *frame;
+     struct instruction *insn;
+     struct insn_ea *ea;
+     char *src;
+{
+    int *reg;
+    char *dst;
+    int len, step;
+    int sig;
+
+#ifdef	DIAGNOSTIC
+    if (ea->ea_regnum & ~0xf) {
+	panic("store_ea: bad regnum");
+    }
+#endif
+
+    if (ea->ea_flags & (EA_IMMED|EA_PC_REL)) {
+	/* not alterable address mode */
+#ifdef DEBUG
+	printf("store_ea: not alterable address mode\n");
+#endif
+	return SIGILL;
+    }
+
+    /* src is always int or larger. */
+    len = insn->is_datasize;
+    if (len < 4) {
+	src += (4 - len);
+    }
+    step = (len == 1 && ea->ea_regnum == 15 /* sp */) ? 2 : len;
+
+    if (ea->ea_flags & EA_FRAME_EA) {
+	/* Using LC040 frame EA */
+#ifdef DEBUG_FPE
+	if (ea->ea_flags & (EA_PREDECR|EA_POSTINCR)) {
+	    printf("store_ea: frame ea %08x w/r%d\n",
+		   ea->ea_fea, ea->ea_regnum);
+	} else {
+	    printf("store_ea: frame ea %08x\n", ea->ea_fea);
+	}
+#endif
+	dst = (char *)ea->ea_fea;
+	copyout(src, dst + ea->ea_moffs, len);
+	if (ea->ea_flags & EA_PREDECR) {
+	    frame->f_regs[ea->ea_regnum] = ea->ea_fea;
+	    ea->ea_fea -= step;
+	    ea->ea_moffs = 0;
+	} else if (ea->ea_flags & EA_POSTINCR) {
+	    ea->ea_fea += step;
+	    frame->f_regs[ea->ea_regnum] = ea->ea_fea;
+	    ea->ea_moffs = 0;
+	} else {
+	    ea->ea_moffs += step;
+	}
+	/* That's it, folks */
+    } else if (ea->ea_flags & EA_ABS) {
+#ifdef DEBUG_FPE
+	printf("store_ea: abs addr %08x\n", ea->ea_absaddr);
+#endif
+	dst = (char *)ea->ea_absaddr;
+	copyout(src, dst + ea->ea_moffs, len);
+	ea->ea_moffs += len;
+    } else if (ea->ea_flags & EA_DIRECT) {
+	if (len > 4) {
+#ifdef DEBUG
+	    printf("store_ea: operand doesn't fit cpu reg\n");
+#endif
+	    return SIGILL;
+	}
+	if (ea->ea_moffs > 0) {
+#ifdef DEBUG
+	    printf("store_ea: more than one move to cpu reg\n");
+#endif
+	    return SIGILL;
+	}
+	dst = (char*)&frame->f_regs[ea->ea_regnum];
+	/* The destination is an int. */
+	if (len < 4) {
+	    dst += (4 - len);
+#ifdef DEBUG_FPE
+	    printf("store_ea: short/byte opr - dst addr adjusted\n");
+#endif
+	}
+#ifdef DEBUG_FPE
+	printf("store_ea: dst %p\n", dst);
+#endif
+	bcopy(src, dst, len);
+    } else /* One of MANY indirect forms... */ {
+#ifdef DEBUG_FPE
+	printf("store_ea: using register %c%d\n",
+	       (ea->ea_regnum >= 8) ? 'a' : 'd', ea->ea_regnum & 7);
+#endif
+	/* point to the register */
+	reg = &(frame->f_regs[ea->ea_regnum]);
+
+	/* do pre-decrement */
+	if (ea->ea_flags & EA_PREDECR) {
+#ifdef DEBUG_FPE
+	    printf("store_ea: predecr mode - reg decremented\n");
+#endif
+	    *reg -= step;
+	    ea->ea_moffs = 0;
+	}
+
+	/* calculate the effective address */
+	sig = calc_ea(ea, (char *)*reg, &dst);
+	if (sig)
+	    return sig;
+
+#ifdef DEBUG_FPE
+	printf("store_ea: dst addr=%p+%d\n", dst, ea->ea_moffs);
+#endif
+	copyout(src, dst + ea->ea_moffs, len);
+
+	/* do post-increment */
+	if (ea->ea_flags & EA_POSTINCR) {
+	    *reg += step;
+	    ea->ea_moffs = 0;
+#ifdef DEBUG_FPE
+	    printf("store_ea: postinc mode - reg incremented\n");
+#endif
+	} else {
+	    ea->ea_moffs += len;
+	}
+    }
+
+    return 0;
+}
+
+/*
+ * fetch_immed: fetch immediate operand
+ */
+static int
+fetch_immed(frame, insn, dst)
+     struct frame *frame;
+     struct instruction *insn;
+     int *dst;
+{
+    int data, ext_bytes;
+
+    ext_bytes = insn->is_datasize;
+
+    if (0 < ext_bytes) {
+	data = fusword((void *) (insn->is_pc + insn->is_advance));
+	if (data < 0) {
+	    return SIGSEGV;
+	}
+	if (ext_bytes == 1) {
+	    /* sign-extend byte to long */
+	    data &= 0xff;
+	    if (data & 0x80) {
+		data |= 0xffffff00;
+	    }
+	} else if (ext_bytes == 2) {
+	    /* sign-extend word to long */
+	    data &= 0xffff;
+	    if (data & 0x8000) {
+		data |= 0xffff0000;
+	    }
+	}
+	insn->is_advance += 2;
+	dst[0] = data;
+    }
+    if (2 < ext_bytes) {
+	data = fusword((void *) (insn->is_pc + insn->is_advance));
+	if (data < 0) {
+	    return SIGSEGV;
+	}
+	insn->is_advance += 2;
+	dst[0] <<= 16;
+	dst[0] |= data;
+    }
+    if (4 < ext_bytes) {
+	data = fusword((void *) (insn->is_pc + insn->is_advance));
+	if (data < 0) {
+	    return SIGSEGV;
+	}
+	dst[1] = data << 16;
+	data = fusword((void *) (insn->is_pc + insn->is_advance + 2));
+	if (data < 0) {
+	    return SIGSEGV;
+	}
+	insn->is_advance += 4;
+	dst[1] |= data;
+    }
+    if (8 < ext_bytes) {
+	data = fusword((void *) (insn->is_pc + insn->is_advance));
+	if (data < 0) {
+	    return SIGSEGV;
+	}
+	dst[2] = data << 16;
+	data = fusword((void *) (insn->is_pc + insn->is_advance + 2));
+	if (data < 0) {
+	    return SIGSEGV;
+	}
+	insn->is_advance += 4;
+	dst[2] |= data;
+    }
+
+    return 0;
+}
+
+/*
+ * fetch_disp: fetch displacement in full extention words
+ */
+static int
+fetch_disp(frame, insn, size, res)
+     struct frame *frame;
+     struct instruction *insn;
+     int size, *res;
+{
+    int disp, word;
+
+    if (size == 1) {
+	word = fusword((void *) (insn->is_pc + insn->is_advance));
+	if (word < 0) {
+	    return SIGSEGV;
+	}
+	disp = word & 0xffff;
+	if (disp & 0x8000) {
+	    /* sign-extend */
+	    disp |= 0xffff0000;
+	}
+	insn->is_advance += 2;
+    } else if (size == 2) {
+	word = fusword((void *) (insn->is_pc + insn->is_advance));
+	if (word < 0) {
+	    return SIGSEGV;
+	}
+	disp = word << 16;
+	word = fusword((void *) (insn->is_pc + insn->is_advance + 2));
+	if (word < 0) {
+	    return SIGSEGV;
+	}
+	disp |= (word & 0xffff);
+	insn->is_advance += 4;
+    } else {
+	disp = 0;
+    }
+    *res = disp;
+    return 0;
+}
+
+/*
+ * Calculates an effective address for all address modes except for
+ * register direct, absolute, and immediate modes.  However, it does
+ * not take care of predecrement/postincrement of register content.
+ * Returns a signal value (0 == no error).
+ */
+static int
+calc_ea(ea, ptr, eaddr)
+     struct insn_ea *ea;
+     char *ptr;		/* base address (usually a register content) */
+     char **eaddr;	/* pointer to result pointer */
+{
+    int data, word;
+
+#if DEBUG_FPE
+    printf("calc_ea: reg indirect (reg) = %p\n", ptr);
+#endif
+
+    if (ea->ea_flags & EA_OFFSET) {
+	/* apply the signed offset */
+#if DEBUG_FPE
+	printf("calc_ea: offset %d\n", ea->ea_offset);
+#endif
+	ptr += ea->ea_offset;
+    } else if (ea->ea_flags & EA_INDEXED) {
+#if DEBUG_FPE
+	printf("calc_ea: indexed mode\n");
+#endif
+
+	if (ea->ea_flags & EA_BASE_SUPPRSS) {
+	    /* base register is suppressed */
+	    ptr = (char *)ea->ea_basedisp;
+	} else {
+	    ptr += ea->ea_basedisp;
+	}
+
+	if (ea->ea_flags & EA_MEM_INDIR) {
+#if DEBUG_FPE
+	    printf("calc_ea: mem indir mode: basedisp=%08x, outerdisp=%08x\n",
+		   ea->ea_basedisp, ea->ea_outerdisp);
+	    printf("calc_ea: addr fetched from %p\n", ptr);
+#endif
+	    /* memory indirect modes */
+	    word = fusword(ptr);
+	    if (word < 0) {
+		return SIGSEGV;
+	    }
+	    word <<= 16;
+	    data = fusword(ptr + 2);
+	    if (data < 0) {
+		return SIGSEGV;
+	    }
+	    word |= data;
+#if DEBUG_FPE
+	    printf("calc_ea: fetched ptr 0x%08x\n", word);
+#endif
+	    ptr = (char *)word + ea->ea_outerdisp;
+	}
+    }
+
+    *eaddr = ptr;
+
+    return 0;
+}

>Audit-Trail:
>Unformatted: