Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/m68k/fpe Introduce the CORDIC algorithm.
details: https://anonhg.NetBSD.org/src/rev/63ce6cd6a19c
branches: trunk
changeset: 786189:63ce6cd6a19c
user: isaki <isaki%NetBSD.org@localhost>
date: Fri Apr 19 13:31:11 2013 +0000
description:
Introduce the CORDIC algorithm.
o sine and cosine (e.g., FSIN, FCOS and FSINCOS instructions) is now
calculated in the CORDIC instead of Taylor expansion.
o tangent (FTAN) is not touched from a viewpoint of the code size.
o The CORDIC is applicable for hyperbolic functions (e.g., FSINH,
FCOSH, FTANH instructions), but I didn't use it because its working
range is poor.
o The CORDIC is also usable for inverse trigonometric functions,
I will commit it at next phase.
o The code size becomes a bit big. I cannot evaluate speed on m68k
for some reasons, but in test on i386 the CORDIC is approximately
100 times faster in sin/cos.
diffstat:
sys/arch/m68k/fpe/files.fpe | 3 +-
sys/arch/m68k/fpe/fpu_cordic.c | 646 ++++++++++++++++++++++++++++++++++++++++
sys/arch/m68k/fpe/fpu_emulate.h | 11 +-
sys/arch/m68k/fpe/fpu_hyperb.c | 71 ++++-
sys/arch/m68k/fpe/fpu_trig.c | 177 +---------
5 files changed, 751 insertions(+), 157 deletions(-)
diffs (truncated from 1050 to 300 lines):
diff -r a59683227822 -r 63ce6cd6a19c sys/arch/m68k/fpe/files.fpe
--- a/sys/arch/m68k/fpe/files.fpe Fri Apr 19 13:14:10 2013 +0000
+++ b/sys/arch/m68k/fpe/files.fpe Fri Apr 19 13:31:11 2013 +0000
@@ -1,10 +1,11 @@
-# $NetBSD: files.fpe,v 1.2 1995/11/03 04:51:51 briggs Exp $
+# $NetBSD: files.fpe,v 1.3 2013/04/19 13:31:11 isaki Exp $
# Config(.new) file for m68k floating point emulator.
# Included by ports that need it.
file arch/m68k/fpe/fpu_add.c fpu_emulate
file arch/m68k/fpe/fpu_calcea.c fpu_emulate
+file arch/m68k/fpe/fpu_cordic.c fpu_emulate
file arch/m68k/fpe/fpu_div.c fpu_emulate
file arch/m68k/fpe/fpu_emulate.c fpu_emulate
file arch/m68k/fpe/fpu_exp.c fpu_emulate
diff -r a59683227822 -r 63ce6cd6a19c sys/arch/m68k/fpe/fpu_cordic.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/m68k/fpe/fpu_cordic.c Fri Apr 19 13:31:11 2013 +0000
@@ -0,0 +1,646 @@
+/* $NetBSD: fpu_cordic.c,v 1.1 2013/04/19 13:31:11 isaki Exp $ */
+
+/*
+ * Copyright (c) 2013 Tetsuya Isaki. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: fpu_cordic.c,v 1.1 2013/04/19 13:31:11 isaki Exp $");
+
+#include <machine/ieee.h>
+
+#include "fpu_emulate.h"
+
+/*
+ * sfpn = shoftened fp number; the idea is from fpu_log.c but not the same.
+ * The most significant byte of sp_m0 is EXP (signed byte) and the rest
+ * of sp_m0 is fp_mant[0].
+ */
+struct sfpn {
+ uint32_t sp_m0;
+ uint32_t sp_m1;
+ uint32_t sp_m2;
+};
+
+#if defined(CORDIC_BOOTSTRAP)
+/*
+ * This is a bootstrap code to generate a pre-calculated tables such as
+ * atan_table[] and atanh_table[]. However, it's just for reference.
+ * If you want to run the bootstrap, you will define CORDIC_BOOTSTRAP
+ * and modify these files as a userland application.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <float.h>
+
+static void prepare_cordic_const(struct fpemu *);
+static struct fpn *fpu_gain1_cordic(struct fpemu *);
+static struct fpn *fpu_gain2_cordic(struct fpemu *);
+static struct fpn *fpu_atan_tayler(struct fpemu *);
+static void printf_fpn(const struct fpn *);
+static void printf_sfpn(const struct sfpn *);
+static void fpn_to_sfpn(struct sfpn *, const struct fpn *);
+
+static struct sfpn atan_table[EXT_FRACBITS];
+static struct sfpn atanh_table[EXT_FRACBITS];
+static struct fpn inv_gain1;
+static struct fpn inv_gain2;
+
+int
+main(int argc, char *argv[])
+{
+ struct fpemu dummyfe;
+ int i;
+ struct fpn fp;
+
+ memset(&dummyfe, 0, sizeof(dummyfe));
+ prepare_cordic_const(&dummyfe);
+
+ /* output as source code */
+ printf("static const struct sfpn atan_table[] = {\n");
+ for (i = 0; i < EXT_FRACBITS; i++) {
+ printf("\t");
+ printf_sfpn(&atan_table[i]);
+ printf(",\n");
+ }
+ printf("};\n\n");
+
+ printf("static const struct sfpn atanh_table[] = {\n");
+ for (i = 0; i < EXT_FRACBITS; i++) {
+ printf("\t");
+ printf_sfpn(&atanh_table[i]);
+ printf(",\n");
+ }
+ printf("};\n\n");
+
+ printf("const struct fpn fpu_cordic_inv_gain1 =\n\t");
+ printf_fpn(&inv_gain1);
+ printf(";\n\n");
+
+ printf("const struct fpn fpu_cordic_inv_gain2 =\n\t");
+ printf_fpn(&inv_gain2);
+ printf(";\n\n");
+}
+
+/*
+ * This routine uses fpu_const(), fpu_add(), fpu_div(), fpu_logn()
+ * and fpu_atan_tayler() as bootstrap.
+ */
+static void
+prepare_cordic_const(struct fpemu *fe)
+{
+ struct fpn t;
+ struct fpn x;
+ struct fpn *r;
+ int i;
+
+ /* atan_table and atanh_table */
+ fpu_const(&t, FPU_CONST_1);
+ for (i = 0; i < EXT_FRACBITS; i++) {
+ /* atan(t) */
+ CPYFPN(&fe->fe_f2, &t);
+ r = fpu_atan_tayler(fe);
+ fpn_to_sfpn(&atan_table[i], r);
+
+ /* t /= 2 */
+ t.fp_exp--;
+
+ /* (1-t) */
+ fpu_const(&fe->fe_f1, FPU_CONST_1);
+ CPYFPN(&fe->fe_f2, &t);
+ fe->fe_f2.fp_sign = 1;
+ r = fpu_add(fe);
+ CPYFPN(&x, r);
+
+ /* (1+t) */
+ fpu_const(&fe->fe_f1, FPU_CONST_1);
+ CPYFPN(&fe->fe_f2, &t);
+ r = fpu_add(fe);
+
+ /* r = (1+t)/(1-t) */
+ CPYFPN(&fe->fe_f1, r);
+ CPYFPN(&fe->fe_f2, &x);
+ r = fpu_div(fe);
+
+ /* r = log(r) */
+ CPYFPN(&fe->fe_f2, r);
+ r = fpu_logn(fe);
+
+ /* r /= 2 */
+ r->fp_exp--;
+
+ fpn_to_sfpn(&atanh_table[i], r);
+ }
+
+ /* inv_gain1 = 1 / gain1cordic() */
+ r = fpu_gain1_cordic(fe);
+ CPYFPN(&fe->fe_f2, r);
+ fpu_const(&fe->fe_f1, FPU_CONST_1);
+ r = fpu_div(fe);
+ CPYFPN(&inv_gain1, r);
+
+ /* inv_gain2 = 1 / gain2cordic() */
+ r = fpu_gain2_cordic(fe);
+ CPYFPN(&fe->fe_f2, r);
+ fpu_const(&fe->fe_f1, FPU_CONST_1);
+ r = fpu_div(fe);
+ CPYFPN(&inv_gain2, r);
+}
+
+static struct fpn *
+fpu_gain1_cordic(struct fpemu *fe)
+{
+ struct fpn x;
+ struct fpn y;
+ struct fpn z;
+ struct fpn v;
+
+ fpu_const(&x, FPU_CONST_1);
+ fpu_const(&y, FPU_CONST_0);
+ fpu_const(&z, FPU_CONST_0);
+ CPYFPN(&v, &x);
+ v.fp_sign = !v.fp_sign;
+
+ fpu_cordit1(fe, &x, &y, &z, &v);
+ CPYFPN(&fe->fe_f2, &x);
+ return &fe->fe_f2;
+}
+
+static struct fpn *
+fpu_gain2_cordic(struct fpemu *fe)
+{
+ struct fpn x;
+ struct fpn y;
+ struct fpn z;
+ struct fpn v;
+
+ fpu_const(&x, FPU_CONST_1);
+ fpu_const(&y, FPU_CONST_0);
+ fpu_const(&z, FPU_CONST_0);
+ CPYFPN(&v, &x);
+ v.fp_sign = !v.fp_sign;
+
+ fpu_cordit2(fe, &x, &y, &z, &v);
+ CPYFPN(&fe->fe_f2, &x);
+ return &fe->fe_f2;
+}
+
+/*
+ * arctan(x) = pi/4 (for |x| = 1)
+ *
+ * x^3 x^5 x^7
+ * arctan(x) = x - --- + --- - --- + ... (for |x| < 1)
+ * 3 5 7
+ */
+static struct fpn *
+fpu_atan_tayler(struct fpemu *fe)
+{
+ struct fpn res;
+ struct fpn x2;
+ struct fpn s0;
+ struct fpn *s1;
+ struct fpn *r;
+ uint32_t k;
+
+ /* arctan(1) is pi/4 */
+ if (fe->fe_f2.fp_exp == 0) {
+ fpu_const(&fe->fe_f2, FPU_CONST_PI);
+ fe->fe_f2.fp_exp -= 2;
+ return &fe->fe_f2;
+ }
+
+ /* s0 := x */
+ CPYFPN(&s0, &fe->fe_f2);
+
+ /* res := x */
+ CPYFPN(&res, &fe->fe_f2);
+
+ /* x2 := x * x */
+ CPYFPN(&fe->fe_f1, &fe->fe_f2);
+ r = fpu_mul(fe);
+ CPYFPN(&x2, r);
+
+ k = 3;
+ for (;;) {
+ /* s1 := -s0 * x2 */
+ CPYFPN(&fe->fe_f1, &s0);
+ CPYFPN(&fe->fe_f2, &x2);
+ s1 = fpu_mul(fe);
+ s1->fp_sign ^= 1;
+ CPYFPN(&fe->fe_f1, s1);
+
+ /* s0 := s1 for next loop */
+ CPYFPN(&s0, s1);
+
+ /* s1 := s1 / k */
+ fpu_explode(fe, &fe->fe_f2, FTYPE_LNG, &k);
+ s1 = fpu_div(fe);
+
+ /* break if s1 is enough small */
+ if (ISZERO(s1))
+ break;
+ if (res.fp_exp - s1->fp_exp >= FP_NMANT)
+ break;
+
+ /* res += s1 */
+ CPYFPN(&fe->fe_f2, s1);
+ CPYFPN(&fe->fe_f1, &res);
+ r = fpu_add(fe);
+ CPYFPN(&res, r);
+
+ k += 2;
+ }
+
+ CPYFPN(&fe->fe_f2, &res);
+ return &fe->fe_f2;
+}
+
Home |
Main Index |
Thread Index |
Old Index