Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/alpha Various minor cleanups and bug fixes to the F...
details: https://anonhg.NetBSD.org/src/rev/1a46bc98a2c7
branches: trunk
changeset: 984766:1a46bc98a2c7
user: thorpej <thorpej%NetBSD.org@localhost>
date: Thu Jul 22 01:39:18 2021 +0000
description:
Various minor cleanups and bug fixes to the FP software completion code:
- Use __CTASSERT() instead of rolling our own compile-time assertion
using cpp.
- Use __BIT() &c instead of rolling our own.
- Improve some comments.
- Define a default FP_C and FPCR value that is self-consistent, and
initialize it properly at process creation time.
- Fix signal information when the trap shadow cannot be resolved.
- Use defined constants rather than magic numbers for the exception
summary bits.
- Add a machdep sysctl to enable FP software-completion debugging.
diffstat:
sys/arch/alpha/alpha/fp_complete.c | 192 ++++++++++++++++++++++++++++++------
sys/arch/alpha/alpha/machdep.c | 18 ++-
sys/arch/alpha/include/cpu.h | 3 +-
sys/arch/alpha/include/fpu.h | 102 ++++++++++++-------
4 files changed, 235 insertions(+), 80 deletions(-)
diffs (truncated from 529 to 300 lines):
diff -r 3cee136a6599 -r 1a46bc98a2c7 sys/arch/alpha/alpha/fp_complete.c
--- a/sys/arch/alpha/alpha/fp_complete.c Thu Jul 22 01:38:45 2021 +0000
+++ b/sys/arch/alpha/alpha/fp_complete.c Thu Jul 22 01:39:18 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: fp_complete.c,v 1.24 2020/09/01 08:22:36 thorpej Exp $ */
+/* $NetBSD: fp_complete.c,v 1.25 2021/07/22 01:39:18 thorpej Exp $ */
/*-
* Copyright (c) 2001 Ross Harvey
@@ -33,9 +33,11 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include "opt_ddb.h"
+
#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */
-__KERNEL_RCSID(0, "$NetBSD: fp_complete.c,v 1.24 2020/09/01 08:22:36 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fp_complete.c,v 1.25 2021/07/22 01:39:18 thorpej Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -51,6 +53,30 @@
#include <lib/libkern/softfloat.h>
+/*
+ * Validate our assumptions about bit positions.
+ */
+__CTASSERT(ALPHA_AESR_INV == (FP_X_INV << 1));
+__CTASSERT(ALPHA_AESR_DZE == (FP_X_DZ << 1));
+__CTASSERT(ALPHA_AESR_OVF == (FP_X_OFL << 1));
+__CTASSERT(ALPHA_AESR_UNF == (FP_X_UFL << 1));
+__CTASSERT(ALPHA_AESR_INE == (FP_X_IMP << 1));
+__CTASSERT(ALPHA_AESR_IOV == (FP_X_IOV << 1));
+
+__CTASSERT(IEEE_TRAP_ENABLE_INV == (FP_X_INV << 1));
+__CTASSERT(IEEE_TRAP_ENABLE_DZE == (FP_X_DZ << 1));
+__CTASSERT(IEEE_TRAP_ENABLE_OVF == (FP_X_OFL << 1));
+__CTASSERT(IEEE_TRAP_ENABLE_UNF == (FP_X_UFL << 1));
+__CTASSERT(IEEE_TRAP_ENABLE_INE == (FP_X_IMP << 1));
+
+__CTASSERT((uint64_t)FP_X_IMP << (61 - 3) == FPCR_INED);
+__CTASSERT((uint64_t)FP_X_UFL << (61 - 3) == FPCR_UNFD);
+__CTASSERT((uint64_t)FP_X_OFL << (49 - 0) == FPCR_OVFD);
+__CTASSERT((uint64_t)FP_X_DZ << (49 - 0) == FPCR_DZED);
+__CTASSERT((uint64_t)FP_X_INV << (49 - 0) == FPCR_INVD);
+
+__CTASSERT(FP_C_ALLBITS == MDLWP_FP_C);
+
#define TSWINSIZE 4 /* size of trap shadow window in uint32_t units */
/* Set Name Opcodes AARM C.* Symbols */
@@ -351,11 +377,12 @@
* it is necessary to initially set a sticky bit.
*/
- fpcr &= FPCR_DYN(3);
+ fpcr &= FPCR_DYN_RM;
/*
- * enable traps = case where flag bit is clear OR program wants a trap
- * enables = ~flags | mask
+ * enable traps = case where flag bit is clear AND program wants a trap
+ *
+ * enables = ~flags & mask
* disables = ~(~flags | mask)
* disables = flags & ~mask. Thank you, Augustus De Morgan (1806-1871)
*/
@@ -364,18 +391,6 @@
fpcr |= (disables & (FP_X_IMP | FP_X_UFL)) << (61 - 3);
fpcr |= (disables & (FP_X_OFL | FP_X_DZ | FP_X_INV)) << (49 - 0);
-# if !(FP_X_INV == 1 && FP_X_DZ == 2 && FP_X_OFL == 4 && \
- FP_X_UFL == 8 && FP_X_IMP == 16 && FP_X_IOV == 32 && \
- FP_X_UFL << (61 - 3) == FPCR_UNFD && \
- FP_X_IMP << (61 - 3) == FPCR_INED && \
- FP_X_OFL << (49 - 0) == FPCR_OVFD)
-# error "Assertion failed"
- /*
- * We don't care about the other built-in bit numbers because they
- * have been architecturally specified.
- */
-# endif
-
fpcr |= fp_c & FP_C_MIRRORED << (FPCR_MIR_START - FP_C_MIR_START);
fpcr |= (fp_c & IEEE_MAP_DMZ) << 36;
if (fp_c & FP_C_MIRRORED)
@@ -407,6 +422,11 @@
alpha_pal_wrfen(1);
fp_c_to_fpcr(l);
alpha_pal_wrfen(0);
+ } else {
+ struct pcb *pcb = l->l_addr;
+
+ pcb->pcb_fp.fpr_cr =
+ fp_c_to_fpcr_1(pcb->pcb_fp.fpr_cr, l->l_md.md_flags);
}
kpreempt_enable();
}
@@ -502,12 +522,47 @@
*/
static void
+print_fp_instruction(alpha_instruction *pc, struct lwp *l, uint32_t bits)
+{
+#if defined(DDB)
+ char buf[32];
+ struct alpha_print_instruction_context ctx = {
+ .insn.bits = bits,
+ .pc = (unsigned long)pc,
+ .buf = buf,
+ .bufsize = sizeof(buf),
+ };
+
+ (void) alpha_print_instruction(&ctx);
+
+ printf("INSN [%s:%d] @0x%lx -> %s\n",
+ l->l_proc->p_comm, l->l_proc->p_pid, ctx.pc, ctx.buf);
+#else
+ alpha_instruction insn = {
+ .bits = bits,
+ };
+ printf("INSN [%s:%d] @0x%lx -> opc=0x%x func=0x%x fa=%d fb=%d fc=%d\n",
+ l->l_proc->p_comm, l->l_proc->p_pid, (unsigned long)pc,
+ insn.float_format.opcode, insn.float_format.function,
+ insn.float_format.fa, insn.float_format.fb, insn.float_format.fc);
+ printf("INSN [%s:%d] @0x%lx -> trp=0x%x rnd=0x%x src=0x%x fn=0x%x\n",
+ l->l_proc->p_comm, l->l_proc->p_pid, (unsigned long)pc,
+ insn.float_detail.trp, insn.float_detail.rnd,
+ insn.float_detail.src, insn.float_detail.opclass);
+#endif /* DDB */
+}
+
+static void
alpha_fp_interpret(alpha_instruction *pc, struct lwp *l, uint32_t bits)
{
s_float sfa, sfb, sfc;
t_float tfa, tfb, tfc;
alpha_instruction inst;
+ if (alpha_fp_complete_debug) {
+ print_fp_instruction(pc, l, bits);
+ }
+
inst.bits = bits;
switch(inst.generic_format.opcode) {
default:
@@ -588,15 +643,21 @@
}
alpha_pal_wrfen(1);
/*
- * If necessary, lie about the dynamic rounding mode so emulation
- * software need go to only one place for it, and so we don't have to
- * lock any memory locations or pass a third parameter to every
- * SoftFloat entry point.
+ * Alpha FLOAT instructions can override the rounding mode on a
+ * per-instruction basis. If necessary, lie about the dynamic
+ * rounding mode so emulation software need go to only one place
+ * for it, and so we don't have to lock any memory locations or
+ * pass a third parameter to every SoftFloat entry point.
+ *
+ * N.B. the rounding mode field of the the FLOAT format instructions
+ * matches that of the FPCR *except* for the value 3, which means
+ * "dynamic" rounding mode (i.e. what is programmed into the FPCR).
*/
orig_fpcr = fpcr = alpha_read_fpcr();
rm = inst.float_detail.rnd;
- if (__predict_false(rm != 3 /* dynamic */ && rm != (fpcr >> 58 & 3))) {
- fpcr = (fpcr & ~FPCR_DYN(3)) | FPCR_DYN(rm);
+ if (__predict_false(rm != 3 /* dynamic */ &&
+ rm != __SHIFTOUT(fpcr, FPCR_DYN_RM))) {
+ fpcr = (fpcr & ~FPCR_DYN_RM) | __SHIFTIN(rm, FPCR_DYN_RM);
alpha_write_fpcr(fpcr);
}
orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags);
@@ -630,20 +691,33 @@
alpha_instruction *trigger_pc, *usertrap_pc;
alpha_instruction *pc, *win_begin, tsw[TSWINSIZE];
- sig = SIGFPE;
+ if (alpha_fp_complete_debug) {
+ printf("%s: [%s:%d] a0[AESR]=0x%lx a1[regmask]=0x%lx "
+ "FPCR=0x%lx FP_C=0x%lx\n",
+ __func__, l->l_proc->p_comm, l->l_proc->p_pid,
+ a0, a1, alpha_read_fpcr(),
+ l->l_md.md_flags & (MDLWP_FP_C|MDLWP_FPACTIVE));
+ }
+
pc = (alpha_instruction *)l->l_md.md_tf->tf_regs[FRAME_PC];
trigger_pc = pc - 1; /* for ALPHA_AMASK_PAT case */
+
+ /*
+ * Start out with the code mirroring the exception flags
+ * (FP_X_*). Shift right 1 bit to discard SWC to achive
+ * this.
+ */
+ *ucode = a0 >> 1;
+
if (cpu_amask & ALPHA_AMASK_PAT) {
- /* SWC | INV */
- if (a0 & 3 || alpha_fp_sync_complete) {
+ if ((a0 & (ALPHA_AESR_SWC | ALPHA_AESR_INV)) != 0 ||
+ alpha_fp_sync_complete) {
sig = alpha_fp_complete_at(trigger_pc, l, ucode);
- goto done;
+ goto resolved;
}
}
- *ucode = a0;
- /* SWC | INV */
- if (!(a0 & 3))
- return sig;
+ if ((a0 & (ALPHA_AESR_SWC | ALPHA_AESR_INV)) == 0)
+ goto unresolved;
/*
* At this point we are somewhere in the trap shadow of one or more instruc-
* tions that have trapped with software completion specified. We have a mask
@@ -666,8 +740,13 @@
if (copyin(win_begin, tsw, sizeof tsw)) {
/* sigh, try to get just one */
win_begin = pc;
- if (copyin(win_begin, tsw, 4))
+ if (copyin(win_begin, tsw, 4)) {
+ /*
+ * We're off the rails here; don't
+ * bother updating the FP_C.
+ */
return SIGSEGV;
+ }
}
}
assert(win_begin <= pc && !((long)pc & 3));
@@ -695,17 +774,48 @@
if (__predict_true(trigger_pc != 0 && a1 == 0)) {
++alpha_shadow.resolved;
sig = alpha_fp_complete_at(trigger_pc, l, ucode);
+ goto resolved;
} else {
++alpha_shadow.unresolved;
- return sig;
}
-done:
+
+ unresolved: /* obligatory statement */;
+ /*
+ * *ucode contains the exception bits (FP_X_*). We need to
+ * update the FP_C and FPCR, and send a signal for any new
+ * trap that is enabled.
+ */
+ uint64_t orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags);
+ uint64_t new_flags = orig_flags | *ucode;
+ uint64_t changed_flags = orig_flags ^ new_flags;
+ KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */
+
+ l->l_md.md_flags |= NETBSD_FLAG_TO_FP_C(new_flags);
+
+ kpreempt_disable();
+ if ((curlwp->l_md.md_flags & MDLWP_FPACTIVE) == 0) {
+ fpu_load();
+ }
+ alpha_pal_wrfen(1);
+ uint64_t orig_fpcr = alpha_read_fpcr();
+ alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, l->l_md.md_flags));
+ uint64_t needsig =
+ changed_flags & FP_C_TO_NETBSD_MASK(l->l_md.md_flags);
+ alpha_pal_wrfen(0);
+ kpreempt_enable();
+
+ if (__predict_false(needsig)) {
+ *ucode = needsig;
+ return SIGFPE;
+ }
+ return 0;
+
+ resolved:
if (sig) {
usertrap_pc = trigger_pc + 1;
l->l_md.md_tf->tf_regs[FRAME_PC] = (unsigned long)usertrap_pc;
- return sig;
}
- return 0;
+ return sig;
}
/*
@@ -746,6 +856,11 @@
atomic_inc_ulong(&fpevent_reuse.ev_count);
}
+ if (alpha_fp_complete_debug) {
+ printf("%s: [%s:%d] loading FPCR=0x%lx\n",
+ __func__, l->l_proc->p_comm, l->l_proc->p_pid,
+ pcb->pcb_fp.fpr_cr);
+ }
alpha_pal_wrfen(1);
restorefpstate(&pcb->pcb_fp);
alpha_pal_wrfen(0);
@@ -765,6 +880,11 @@
alpha_pal_wrfen(1);
Home |
Main Index |
Thread Index |
Old Index