Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/m68k/fpe Improve the exponential and hyperbolic fun...
details: https://anonhg.NetBSD.org/src/rev/eecd80a61dd7
branches: trunk
changeset: 349305:eecd80a61dd7
user: isaki <isaki%NetBSD.org@localhost>
date: Mon Dec 05 15:31:01 2016 +0000
description:
Improve the exponential and hyperbolic function's performance
10..100 times faster.
PR port-m68k/51645 from rin@ (and modified by me)
diffstat:
sys/arch/m68k/fpe/fpu_exp.c | 62 +++++++++++++++----
sys/arch/m68k/fpe/fpu_hyperb.c | 124 ++++++++++++++++------------------------
2 files changed, 97 insertions(+), 89 deletions(-)
diffs (273 lines):
diff -r 8275caec8459 -r eecd80a61dd7 sys/arch/m68k/fpe/fpu_exp.c
--- a/sys/arch/m68k/fpe/fpu_exp.c Mon Dec 05 13:17:28 2016 +0000
+++ b/sys/arch/m68k/fpe/fpu_exp.c Mon Dec 05 15:31:01 2016 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: fpu_exp.c,v 1.8 2013/04/20 04:54:22 isaki Exp $ */
+/* $NetBSD: fpu_exp.c,v 1.9 2016/12/05 15:31:01 isaki Exp $ */
/*
* Copyright (c) 1995 Ken Nakata
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fpu_exp.c,v 1.8 2013/04/20 04:54:22 isaki Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fpu_exp.c,v 1.9 2016/12/05 15:31:01 isaki Exp $");
#include <machine/ieee.h>
@@ -100,12 +100,16 @@
}
/*
- * exp(x)
+ * exp(x) = 2^k * exp(r) with k = round(x / ln2) and r = x - k * ln2
+ *
+ * Algorithm partially taken from libm, where exp(r) is approximated by a
+ * rational function of r. We use the Taylor expansion instead.
*/
struct fpn *
fpu_etox(struct fpemu *fe)
{
- struct fpn *fp;
+ struct fpn x, *fp;
+ int j, k;
if (ISNAN(&fe->fe_f2))
return &fe->fe_f2;
@@ -115,19 +119,47 @@
return &fe->fe_f2;
}
- if (fe->fe_f2.fp_sign == 0) {
- /* exp(x) */
- fp = fpu_etox_taylor(fe);
- } else {
- /* 1/exp(-x) */
- fe->fe_f2.fp_sign = 0;
+ CPYFPN(&x, &fe->fe_f2);
+
+ /* k = round(x / ln2) */
+ CPYFPN(&fe->fe_f1, &fe->fe_f2);
+ fpu_const(&fe->fe_f2, FPU_CONST_LN_2);
+ fp = fpu_div(fe);
+ CPYFPN(&fe->fe_f2, fp);
+ fp = fpu_int(fe);
+ if (ISZERO(fp)) {
+ /* k = 0 */
+ CPYFPN(&fe->fe_f2, &x);
fp = fpu_etox_taylor(fe);
+ return fp;
+ }
+ /* extract k as integer format from fpn format */
+ j = FP_LG - fp->fp_exp;
+ if (j < 0) {
+ if (fp->fp_sign)
+ fp->fp_class = FPC_ZERO; /* k < -2^18 */
+ else
+ fp->fp_class = FPC_INF; /* k > 2^18 */
+ return fp;
+ }
+ k = fp->fp_mant[0] >> j;
+ if (fp->fp_sign)
+ k *= -1;
- CPYFPN(&fe->fe_f2, fp);
- fpu_const(&fe->fe_f1, FPU_CONST_1);
- fp = fpu_div(fe);
- }
-
+ /* exp(r) = exp(x - k * ln2) */
+ CPYFPN(&fe->fe_f1, fp);
+ fpu_const(&fe->fe_f2, FPU_CONST_LN_2);
+ fp = fpu_mul(fe);
+ fp->fp_sign = !fp->fp_sign;
+ CPYFPN(&fe->fe_f1, fp);
+ CPYFPN(&fe->fe_f2, &x);
+ fp = fpu_add(fe);
+ CPYFPN(&fe->fe_f2, fp);
+ fp = fpu_etox_taylor(fe);
+
+ /* 2^k */
+ fp->fp_exp += k;
+
return fp;
}
diff -r 8275caec8459 -r eecd80a61dd7 sys/arch/m68k/fpe/fpu_hyperb.c
--- a/sys/arch/m68k/fpe/fpu_hyperb.c Mon Dec 05 13:17:28 2016 +0000
+++ b/sys/arch/m68k/fpe/fpu_hyperb.c Mon Dec 05 15:31:01 2016 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: fpu_hyperb.c,v 1.16 2013/10/11 03:37:08 isaki Exp $ */
+/* $NetBSD: fpu_hyperb.c,v 1.17 2016/12/05 15:31:01 isaki Exp $ */
/*
* Copyright (c) 1995 Ken Nakata
@@ -57,15 +57,12 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fpu_hyperb.c,v 1.16 2013/10/11 03:37:08 isaki Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fpu_hyperb.c,v 1.17 2016/12/05 15:31:01 isaki Exp $");
#include <machine/ieee.h>
#include "fpu_emulate.h"
-/* The number of items to terminate the Taylor expansion */
-#define MAX_ITEMS (2000)
-
/*
* fpu_hyperb.c: defines the following functions
*
@@ -137,71 +134,14 @@
}
/*
- * taylor expansion used by sinh(), cosh().
+ * exp(x) + exp(-x)
+ * cosh(x) = ------------------
+ * 2
*/
-static struct fpn *
-__fpu_sinhcosh_taylor(struct fpemu *fe, struct fpn *s0, uint32_t f)
-{
- struct fpn res;
- struct fpn x2;
- struct fpn *s1;
- struct fpn *r;
- int sign;
- uint32_t k;
-
- /* x2 := x * x */
- CPYFPN(&fe->fe_f1, &fe->fe_f2);
- r = fpu_mul(fe);
- CPYFPN(&x2, r);
-
- /* res := s0 */
- CPYFPN(&res, s0);
-
- sign = 1; /* sign := (-1)^n */
-
- for (; f < (2 * MAX_ITEMS); ) {
- /* (f1 :=) s0 * x^2 */
- CPYFPN(&fe->fe_f1, s0);
- CPYFPN(&fe->fe_f2, &x2);
- r = fpu_mul(fe);
- CPYFPN(&fe->fe_f1, r);
-
- /*
- * for sinh(), s1 := s0 * x^2 / (2n+1)2n
- * for cosh(), s1 := s0 * x^2 / 2n(2n-1)
- */
- k = f * (f + 1);
- fpu_explode(fe, &fe->fe_f2, FTYPE_LNG, &k);
- s1 = fpu_div(fe);
-
- /* break if s1 is enough small */
- if (ISZERO(s1))
- break;
- if (res.fp_exp - s1->fp_exp >= EXT_FRACBITS)
- break;
-
- /* s0 := s1 for next loop */
- CPYFPN(s0, s1);
-
- /* res += s1 */
- CPYFPN(&fe->fe_f2, s1);
- CPYFPN(&fe->fe_f1, &res);
- r = fpu_add(fe);
- CPYFPN(&res, r);
-
- f += 2;
- sign ^= 1;
- }
-
- CPYFPN(&fe->fe_f2, &res);
- return &fe->fe_f2;
-}
-
struct fpn *
fpu_cosh(struct fpemu *fe)
{
- struct fpn s0;
- struct fpn *r;
+ struct fpn x, *fp;
if (ISNAN(&fe->fe_f2))
return &fe->fe_f2;
@@ -211,17 +151,37 @@
return &fe->fe_f2;
}
- fpu_const(&s0, FPU_CONST_1);
- r = __fpu_sinhcosh_taylor(fe, &s0, 1);
+ /* if x is +0/-0, return 1 */ /* XXX is this necessary? */
+ if (ISZERO(&fe->fe_f2)) {
+ fpu_const(&fe->fe_f2, FPU_CONST_1);
+ return &fe->fe_f2;
+ }
+
+ fp = fpu_etox(fe);
+ CPYFPN(&x, fp);
- return r;
+ fpu_const(&fe->fe_f1, FPU_CONST_1);
+ CPYFPN(&fe->fe_f2, fp);
+ fp = fpu_div(fe);
+
+ CPYFPN(&fe->fe_f1, fp);
+ CPYFPN(&fe->fe_f2, &x);
+ fp = fpu_add(fe);
+
+ fp->fp_exp--;
+
+ return fp;
}
+/*
+ * exp(x) - exp(-x)
+ * sinh(x) = ------------------
+ * 2
+ */
struct fpn *
fpu_sinh(struct fpemu *fe)
{
- struct fpn s0;
- struct fpn *r;
+ struct fpn x, *fp;
if (ISNAN(&fe->fe_f2))
return &fe->fe_f2;
@@ -232,12 +192,28 @@
if (ISZERO(&fe->fe_f2))
return &fe->fe_f2;
- CPYFPN(&s0, &fe->fe_f2);
- r = __fpu_sinhcosh_taylor(fe, &s0, 2);
+ fp = fpu_etox(fe);
+ CPYFPN(&x, fp);
+
+ fpu_const(&fe->fe_f1, FPU_CONST_1);
+ CPYFPN(&fe->fe_f2, fp);
+ fp = fpu_div(fe);
- return r;
+ fp->fp_sign = 1;
+ CPYFPN(&fe->fe_f1, fp);
+ CPYFPN(&fe->fe_f2, &x);
+ fp = fpu_add(fe);
+
+ fp->fp_exp--;
+
+ return fp;
}
+/*
+ * sinh(x)
+ * tanh(x) = ---------
+ * cosh(x)
+ */
struct fpn *
fpu_tanh(struct fpemu *fe)
{
Home |
Main Index |
Thread Index |
Old Index