matt-nb5-mips64 and FP emulation

To: port-mips%netbsd.org@localhost
Subject: matt-nb5-mips64 and FP emulation
From: Manuel Bouyer <bouyer%antioche.eu.org@localhost>
Date: Fri, 7 May 2010 18:02:48 +0200

Hi,
while working with the matt-nb5-mips64 on a platform without FPU,
I found that the mips/fp.S assembly code still use t8 as a general
purpose register, while it's now the curlwp register.
Of course bad things happens because of this.

A quick and dirty hack around this is saving/restoring s8 and using it
instead of t8 (see attached patch); maybe there is a better way.
I couldln't find a way to use one less register but I didn't try much ...

-- 
Manuel Bouyer <bouyer%antioche.eu.org@localhost>
     NetBSD: 26 ans d'experience feront toujours la difference
--

Index: fp.S
===================================================================
--- fp.S        (revision 55)
+++ fp.S        (revision 56)
@@ -118,10 +118,12 @@
 #if defined(__mips_o32) || defined(__mips_o64)
 #define        CALLFRAME_FRAME         (CALLFRAME_SIZ + 1*SZREG)       /* a1 
slot */
 #define        CALLFRAME_CAUSE         (CALLFRAME_SIZ + 2*SZREG)       /* a2 
slot */
+#define        CALLFRAME_S8            (CALLFRAME_SIZ + 3*SZREG)       /* a3 
slot */
 #endif
 #if defined(__mips_n32) || defined(__mips_n64)
 #define        CALLFRAME_FRAME         (1*SZREG)
 #define        CALLFRAME_CAUSE         (2*SZREG)
+#define        CALLFRAME_S8            (3*SZREG)
 #if CALLFRAME_RA == CALLFRAME_FRAME || CALLFRAME_RA == CALLFRAME_CAUSE
 #error N32/N64 ABI callframe error
 #endif
@@ -131,6 +133,7 @@
        REG_S   ra, CALLFRAME_RA(sp)
        REG_S   a1, CALLFRAME_FRAME(sp)
        REG_S   a2, CALLFRAME_CAUSE(sp)
+       REG_S   s8, CALLFRAME_S8(sp)
 /*
  * Decode the FMT field (bits 25-21) and FUNCTION field (bits 5-0).
  */
@@ -856,6 +859,7 @@
 
        /* Free MachEmulateFP call frame */
        REG_L   ra, CALLFRAME_RA(sp)
+       REG_L   s8, CALLFRAME_S8(sp)
        PTR_ADDU sp, CALLFRAME_SIZ
 
        j       _C_LABEL(bcemul_delay_slot)
@@ -920,7 +924,7 @@
  * Perform the addition.
  */
 5:
-       move    t8, zero                        # no shifted bits (sticky reg)
+       move    s8, zero                        # no shifted bits (sticky reg)
        beq     t1, ta1, 4f                     # no shift needed
        subu    v0, t1, ta1                     # v0 = difference of exponents
        move    v1, v0                          # v1 = abs(difference)
@@ -928,7 +932,7 @@
        negu    v1
 1:
        ble     v1, SFRAC_BITS+2, 2f            # is difference too great?
-       li      t8, STICKYBIT                   # set the sticky bit
+       li      s8, STICKYBIT                   # set the sticky bit
        bge     v0, zero, 1f                    # check which exp is larger
        move    t1, ta1                         # result exp is FTs
        move    t2, zero                        # FSs fraction shifted is zero
@@ -941,11 +945,11 @@
        subu    t9, t9, v1
        bgt     v0, zero, 3f                    # if FS > FT, shift FTs frac
        move    t1, ta1                         # FT > FS, result exp is FTs
-       sll     t8, t2, t9                      # save bits shifted out
+       sll     s8, t2, t9                      # save bits shifted out
        srl     t2, t2, v1                      # shift FSs fraction
        b       4f
 3:
-       sll     t8, ta2, t9                     # save bits shifted out
+       sll     s8, ta2, t9                     # save bits shifted out
        srl     ta2, ta2, v1                    # shift FTs fraction
 4:
        bne     t0, ta0, 1f                     # if signs differ, subtract
@@ -964,15 +968,15 @@
        and     t0, t0, ta0                     # compute result sign
        b       result_fs_s
 2:
-       sltu    t9, zero, t8                    # compute t2:zero - ta2:t8
-       subu    t8, zero, t8
+       sltu    t9, zero, s8                    # compute t2:zero - ta2:s8
+       subu    s8, zero, s8
        subu    t2, t2, ta2                     # subtract fractions
        subu    t2, t2, t9                      # subtract barrow
        b       norm_s
 3:
        move    t0, ta0                         # sign of result = FTs
-       sltu    t9, zero, t8                    # compute ta2:zero - t2:t8
-       subu    t8, zero, t8
+       sltu    t9, zero, s8                    # compute ta2:zero - t2:s8
+       subu    s8, zero, s8
        subu    t2, ta2, t2                     # subtract fractions
        subu    t2, t2, t9                      # subtract barrow
        b       norm_s
@@ -1045,7 +1049,7 @@
  * Perform the addition.
  */
 5:
-       move    t8, zero                        # no shifted bits (sticky reg)
+       move    s8, zero                        # no shifted bits (sticky reg)
        beq     t1, ta1, 4f                     # no shift needed
        subu    v0, t1, ta1                     # v0 = difference of exponents
        move    v1, v0                          # v1 = abs(difference)
@@ -1053,7 +1057,7 @@
        negu    v1
 1:
        ble     v1, DFRAC_BITS+2, 2f            # is difference too great?
-       li      t8, STICKYBIT                   # set the sticky bit
+       li      s8, STICKYBIT                   # set the sticky bit
        bge     v0, zero, 1f                    # check which exp is larger
        move    t1, ta1                         # result exp is FTs
        move    t2, zero                        # FSs fraction shifted is zero
@@ -1070,15 +1074,15 @@
        blt     v1, t9, 1f                      # shift right by < 32?
        subu    v1, v1, t9
        subu    t9, t9, v1
-       sll     t8, t2, t9                      # save bits shifted out
+       sll     s8, t2, t9                      # save bits shifted out
        sltu    t9, zero, t3                    # dont lose any one bits
-       or      t8, t8, t9                      # save sticky bit
+       or      s8, s8, t9                      # save sticky bit
        srl     t3, t2, v1                      # shift FSs fraction
        move    t2, zero
        b       4f
 1:
        subu    t9, t9, v1
-       sll     t8, t3, t9                      # save bits shifted out
+       sll     s8, t3, t9                      # save bits shifted out
        srl     t3, t3, v1                      # shift FSs fraction
        sll     t9, t2, t9                      # save bits shifted out of t2
        or      t3, t3, t9                      # and put into t3
@@ -1088,13 +1092,13 @@
        blt     v1, t9, 1f                      # shift right by < 32?
        subu    v1, v1, t9
        subu    t9, t9, v1
-       sll     t8, ta2, t9                     # save bits shifted out
+       sll     s8, ta2, t9                     # save bits shifted out
        srl     ta3, ta2, v1                    # shift FTs fraction
        move    ta2, zero
        b       4f
 1:
        subu    t9, t9, v1
-       sll     t8, ta3, t9                     # save bits shifted out
+       sll     s8, ta3, t9                     # save bits shifted out
        srl     ta3, ta3, v1                    # shift FTs fraction
        sll     t9, ta2, t9                     # save bits shifted out of t2
        or      ta3, ta3, t9                    # and put into t3
@@ -1122,8 +1126,8 @@
        and     t0, t0, ta0                     # compute result sign
        b       result_fs_d
 2:
-       beq     t8, zero, 1f                    # compute t2:t3:zero - 
ta2:ta3:t8
-       subu    t8, zero, t8
+       beq     s8, zero, 1f                    # compute t2:t3:zero - 
ta2:ta3:s8
+       subu    s8, zero, s8
        sltu    v0, t3, 1                       # compute barrow out
        subu    t3, t3, 1                       # subtract barrow
        subu    t2, t2, v0
@@ -1135,8 +1139,8 @@
        b       norm_d
 3:
        move    t0, ta0                         # sign of result = FTs
-       beq     t8, zero, 1f                    # compute ta2:ta3:zero - 
t2:t3:t8
-       subu    t8, zero, t8
+       beq     s8, zero, 1f                    # compute ta2:ta3:zero - 
t2:t3:s8
+       subu    s8, zero, s8
        sltu    v0, ta3, 1                      # compute barrow out
        subu    ta3, ta3, 1                     # subtract barrow
        subu    ta2, ta2, v0
@@ -1189,7 +1193,7 @@
        addu    t1, t1, ta1                     # compute result exponent
        addu    t1, t1, 9                       # account for binary point
        multu   t2, ta2                         # multiply fractions
-       mflo    t8
+       mflo    s8
        mfhi    t2
        b       norm_s
 
@@ -1247,20 +1251,20 @@
        move    ta0, t2                         # free up t2,t3 for result
        move    ta1, t3
        mflo    a3                              # save low order bits
-       mfhi    t8
-       not     v0, t8
+       mfhi    s8
+       not     v0, s8
        multu   ta0, ta3                                # multiply FS(high) * 
FT(low)
        mflo    v1
        mfhi    t3                              # init low result
        sltu    v0, v0, v1                      # compute carry
-       addu    t8, v1
+       addu    s8, v1
        multu   ta1, ta2                                # multiply FS(low) * 
FT(high)
        addu    t3, t3, v0                      # add carry
-       not     v0, t8
+       not     v0, s8
        mflo    v1
        mfhi    t2
        sltu    v0, v0, v1
-       addu    t8, v1
+       addu    s8, v1
        multu   ta0, ta2                                # multiply FS(high) * 
FT(high)
        addu    t3, v0
        not     v1, t3
@@ -1273,8 +1277,8 @@
        sltu    v0, v0, v1
        addu    t2, v0
        addu    t3, v1
-       sltu    a3, zero, a3                    # reduce t8,a3 to just t8
-       or      t8, a3
+       sltu    a3, zero, a3                    # reduce s8,a3 to just s8
+       or      s8, a3
        b       norm_d
 
 /*
@@ -1333,18 +1337,18 @@
        subu    t1, t1, ta1                     # compute exponent
        subu    t1, t1, 3                       # compensate for result position
        li      v0, SFRAC_BITS+3                # number of bits to divide
-       move    t8, t2                          # init dividend
+       move    s8, t2                          # init dividend
        move    t2, zero                        # init result
 1:
-       bltu    t8, ta2, 3f                     # is dividend >= divisor?
+       bltu    s8, ta2, 3f                     # is dividend >= divisor?
 2:
-       subu    t8, t8, ta2                     # subtract divisor from dividend
+       subu    s8, s8, ta2                     # subtract divisor from dividend
        or      t2, t2, 1                       # remember that we did
-       bne     t8, zero, 3f                    # if not done, continue
+       bne     s8, zero, 3f                    # if not done, continue
        sll     t2, t2, v0                      # shift result to final position
        b       norm_s
 3:
-       sll     t8, t8, 1                       # shift dividend
+       sll     s8, s8, 1                       # shift dividend
        sll     t2, t2, 1                       # shift result
        subu    v0, v0, 1                       # are we done?
        bne     v0, zero, 1b                    # no, continue
@@ -1414,21 +1418,21 @@
        subu    t1, t1, ta1                     # compute exponent
        subu    t1, t1, 3                       # compensate for result position
        li      v0, DFRAC_BITS+3                # number of bits to divide
-       move    t8, t2                          # init dividend
+       move    s8, t2                          # init dividend
        move    t9, t3
        move    t2, zero                        # init result
        move    t3, zero
 1:
-       bltu    t8, ta2, 3f                     # is dividend >= divisor?
-       bne     t8, ta2, 2f
+       bltu    s8, ta2, 3f                     # is dividend >= divisor?
+       bne     s8, ta2, 2f
        bltu    t9, ta3, 3f
 2:
        sltu    v1, t9, ta3                     # subtract divisor from dividend
        subu    t9, t9, ta3
-       subu    t8, t8, ta2
-       subu    t8, t8, v1
+       subu    s8, s8, ta2
+       subu    s8, s8, v1
        or      t3, t3, 1                       # remember that we did
-       bne     t8, zero, 3f                    # if not done, continue
+       bne     s8, zero, 3f                    # if not done, continue
        bne     t9, zero, 3f
        li      v1, 32                          # shift result to final position
        blt     v0, v1, 2f                      # shift < 32 bits?
@@ -1444,9 +1448,9 @@
        sll     t3, t3, v0
        b       norm_d
 3:
-       sll     t8, t8, 1                       # shift dividend
+       sll     s8, s8, 1                       # shift dividend
        srl     v1, t9, 31                      # save bit shifted out
-       or      t8, t8, v1                      # and put into upper part
+       or      s8, s8, v1                      # and put into upper part
        sll     t9, t9, 1
        sll     t2, t2, 1                       # shift result
        srl     v1, t3, 31                      # save bit shifted out
@@ -1455,7 +1459,7 @@
        subu    v0, v0, 1                       # are we done?
        bne     v0, zero, 1b                    # no, continue
        sltu    v0, zero, t9                    # be sure to save any one bits
-       or      t8, t8, v0                      # from the lower remainder
+       or      s8, s8, v0                      # from the lower remainder
        b       norm_d
 
 #ifdef MIPS3_PLUS
@@ -1486,16 +1490,16 @@
        /* generate sqrt(FS) bit by bit */
        add     t2, t2, t2                      # ix += ix;
        move    ta0, zero                       # q = 0; (result)
-       li      t8, SIMPL_ONE<<1                # r = 0x01000000;
+       li      s8, SIMPL_ONE<<1                # r = 0x01000000;
        move    ta2, zero                       # s = 0;
-1:     beq     t8, zero, 3f                    # while (r != 0) {
-       add     t9, ta2, t8                     #       t = s + r;
+1:     beq     s8, zero, 3f                    # while (r != 0) {
+       add     t9, ta2, s8                     #       t = s + r;
        bgt     t9, t2, 2f                      #       if (t <= ix)
-       add     ta2, t9, t8                     #               s = t + r;
+       add     ta2, t9, s8                     #               s = t + r;
        sub     t2, t2, t9                      #               ix -= t;
-       add     ta0, ta0, t8                    #               q += r;
+       add     ta0, ta0, s8                    #               q += r;
 2:     add     t2, t2, t2                      #       ix += ix;
-       srl     t8, t8, 1                       #       r >>= 1;
+       srl     s8, s8, 1                       #       r >>= 1;
        b       1b                              # }
 3:
        /* rounding -- all mips rounding modes use the same rounding here */
@@ -1556,30 +1560,30 @@
        move    ta1, zero                       # q1 = 0;       (result)
        move    ta2, zero                       # s0 = 0;
        move    ta3, zero                       # s1 = 0;
-       li      t8, DIMPL_ONE<<1                # t = 0x00200000;
-1:     beq     t8, zero, 3f                    # while (r != 0) {
-       add     t9, ta2, t8                     #       t = s0+r;
+       li      s8, DIMPL_ONE<<1                # t = 0x00200000;
+1:     beq     s8, zero, 3f                    # while (r != 0) {
+       add     t9, ta2, s8                     #       t = s0+r;
        bgt     t9, t2, 2f                      #       if (t <= ix0)
-       add     ta2, t9, t8                     #               s0 = t + r;
+       add     ta2, t9, s8                     #               s0 = t + r;
        sub     t2, t2, t9                      #               ix0 -= t;
-       add     ta0, ta0, t8                    #               q += r;
+       add     ta0, ta0, s8                    #               q += r;
 2:     add     t2, t2, t2                      #       ix0 += ix0;
        srl     v0, t3, 31                      #       ix0 += (ix1&sign)>>31)
        and     v0, v0, 1                       #       ...
        add     t2, t2, v0                      #       ...
        addu    t3, t3, t3                      #       ix1 += ix1;
-       srl     t8, t8, 1                       #       r >>= 1;
+       srl     s8, s8, 1                       #       r >>= 1;
        b       1b                              # }
 3:
        /* then lower bits */
-       li      t8, 1<<31                       # r = sign;
-1:     beq     t8, zero, 4f                    # while (r != 0) {
-       addu    v1, ta3, t8                     #    t1 = s1 + r;
+       li      s8, 1<<31                       # r = sign;
+1:     beq     s8, zero, 4f                    # while (r != 0) {
+       addu    v1, ta3, s8                     #    t1 = s1 + r;
        move    t9, ta2                         #    t = s0;
        blt     t9, t2, 2f                      #    if ( (t<ix0) ||
        bne     t9, t2, 3f                      #         ((t == ix0) &&
        bgtu    v1, t3, 3f                      #          (t1 <= ix1)))
-2:     addu    ta3, v1, t8                     #       s1 = t1 + r;
+2:     addu    ta3, v1, s8                     #       s1 = t1 + r;
        srl     v0, v1, 31                      #       if (((t1&sign)==sign) &&
        and     v0, v0, 1                       #       ...
        beq     v0, zero, 2f                    #       ...
@@ -1591,13 +1595,13 @@
        bgeu    t3, v1, 2f                      #       if (ix1 < t1)
        sub     t2, t2, 1                       #           ix0 -= 1;
 2:     subu    t3, t3, v1                      #       ix1 -= t1;
-       addu    ta1, ta1, t8                    #       q1 += r;
+       addu    ta1, ta1, s8                    #       q1 += r;
 3:     add     t2, t2, t2                      #    ix0 += ix0;
        srl     v0, t3, 31                      #    ix0 += (ix1&sign)>>31)
        and     v0, v0, 1                       #    ...
        add     t2, t2, v0                      #    ...
        addu    t3, t3, t3                      #    ix1 += ix1;
-       srl     t8, t8, 1                       #    r >>= 1;
+       srl     s8, s8, 1                       #    r >>= 1;
        b       1b                              # }
 4:
 
@@ -1718,8 +1722,8 @@
        bne     t1, DEXP_INF, 1f                # is FS an infinity?
        li      t1, SEXP_INF                    # convert to single
        sll     t2, t2, 3                       # convert D fraction to S
-       srl     t8, t3, 32 - 3
-       or      t2, t2, t8
+       srl     s8, t3, 32 - 3
+       or      t2, t2, s8
        b       result_fs_s
 1:
        bne     t1, zero, 2f                    # is FS zero?
@@ -1734,9 +1738,9 @@
        or      t2, t2, DIMPL_ONE               # add implied one bit
 3:
        sll     t2, t2, 3                       # convert D fraction to S
-       srl     t8, t3, 32 - 3
-       or      t2, t2, t8
-       sll     t8, t3, 3
+       srl     s8, t3, 32 - 3
+       or      t2, t2, s8
+       sll     s8, t3, 3
        b       norm_noshift_s
 
 /*
@@ -1795,7 +1799,7 @@
 2:
        negu    t9                              # shift right by t9
        subu    v0, v0, t9
-       sll     t8, t2, v0                      # save bits shifted out
+       sll     s8, t2, v0                      # save bits shifted out
        srl     t2, t2, t9
        b       norm_noshift_s
 
@@ -1812,7 +1816,7 @@
        bne     t1, zero, 2f                    # is FS denormalized or zero?
        beq     t2, zero, result_fs_d           # is FS zero?
        jal     _C_LABEL(renorm_fs_s)
-       move    t8, zero
+       move    s8, zero
        b       norm_d
 2:
        addu    t1, t1, DEXP_BIAS - SEXP_BIAS   # bias exponent correctly
@@ -1965,10 +1969,10 @@
 1:
        negu    v0                              # shift right by v0
        subu    v1, v1, v0
-       sll     t8, t3, v1                      # save bits shifted out
-       sltu    t8, zero, t8                    # dont lose any ones
+       sll     s8, t3, v1                      # save bits shifted out
+       sltu    s8, zero, s8                    # dont lose any ones
        srl     t3, t3, v0                      # shift FSs fraction
-       or      t3, t3, t8
+       or      t3, t3, s8
        sll     t9, t2, v1                      # save bits shifted out of t2
        or      t3, t3, t9                      # and put into t3
        srl     t2, t2, v0
@@ -2163,7 +2167,7 @@
        move    v0, t2
        move    t9, zero                        # t9 = num of leading zeros
        bne     t2, zero, 1f
-       move    v0, t8
+       move    v0, s8
        addu    t9, 32
 1:
        srl     v1, v0, 16
@@ -2190,7 +2194,7 @@
        bne     v1, zero, 1f
        addu    t9, 1
 /*
- * Now shift t2,t8 the correct number of bits.
+ * Now shift t2,s8 the correct number of bits.
  */
 1:
        subu    t9, t9, SLEAD_ZEROS             # dont count leading zeros
@@ -2199,20 +2203,20 @@
        li      v1, 32
        blt     t9, zero, 1f                    # if shift < 0, shift right
        subu    v1, v1, t9
-       sll     t2, t2, t9                      # shift t2,t8 left
-       srl     v0, t8, v1                      # save bits shifted out
+       sll     t2, t2, t9                      # shift t2,s8 left
+       srl     v0, s8, v1                      # save bits shifted out
        or      t2, t2, v0
-       sll     t8, t8, t9
+       sll     s8, s8, t9
        b       norm_noshift_s
 1:
-       negu    t9                              # shift t2,t8 right by t9
+       negu    t9                              # shift t2,s8 right by t9
        subu    v1, v1, t9
-       sll     v0, t8, v1                      # save bits shifted out
+       sll     v0, s8, v1                      # save bits shifted out
        sltu    v0, zero, v0                    # be sure to save any one bits
-       srl     t8, t8, t9
-       or      t8, t8, v0
+       srl     s8, s8, t9
+       or      s8, s8, v0
        sll     v0, t2, v1                      # save bits shifted out
-       or      t8, t8, v0
+       or      s8, s8, v0
        srl     t2, t2, t9
 norm_noshift_s:
        move    ta1, t1                         # save unrounded exponent
@@ -2226,7 +2230,7 @@
 1:
        bne     t0, zero, 5f                    # if sign is negative, truncate
 2:
-       beq     t8, zero, 5f                    # if exact, continue
+       beq     s8, zero, 5f                    # if exact, continue
        addu    t2, t2, 1                       # add rounding bit
        bne     t2, SIMPL_ONE<<1, 5f            # need to adjust exponent?
        addu    t1, t1, 1                       # adjust exponent
@@ -2234,8 +2238,8 @@
        b       5f
 3:
        li      v0, GUARDBIT                    # load guard bit for rounding
-       addu    v0, v0, t8                      # add remainder
-       sltu    v1, v0, t8                      # compute carry out
+       addu    v0, v0, s8                      # add remainder
+       sltu    v1, v0, s8                      # compute carry out
        beq     v1, zero, 4f                    # if no carry, continue
        addu    t2, t2, 1                       # add carry to result
        bne     t2, SIMPL_ONE<<1, 4f            # need to adjust exponent?
@@ -2247,7 +2251,7 @@
 5:
        bgt     t1, SEXP_MAX, overflow_s        # overflow?
        blt     t1, SEXP_MIN, underflow_s       # underflow?
-       bne     t8, zero, inexact_s             # is result inexact?
+       bne     s8, zero, inexact_s             # is result inexact?
        addu    t1, t1, SEXP_BIAS               # bias exponent
        and     t2, t2, ~SIMPL_ONE              # clear implied one bit
        b       result_fs_s
@@ -2325,7 +2329,7 @@
        move    t1, ta1                         # get unrounded exponent
        move    t2, ta2                         # get unrounded fraction
        li      t9, SEXP_MIN                    # compute shift amount
-       subu    t9, t9, t1                      # shift t2,t8 right by t9
+       subu    t9, t9, t1                      # shift t2,s8 right by t9
        blt     t9, SFRAC_BITS+2, 3f            # shift all the bits out?
        move    t1, zero                        # result is inexact zero
        move    t2, zero
@@ -2348,9 +2352,9 @@
 3:
        li      v1, 32
        subu    v1, v1, t9
-       sltu    v0, zero, t8                    # be sure to save any one bits
-       sll     t8, t2, v1                      # save bits shifted out
-       or      t8, t8, v0                      # include sticky bits
+       sltu    v0, zero, s8                    # be sure to save any one bits
+       sll     s8, t2, v1                      # save bits shifted out
+       or      s8, s8, v0                      # include sticky bits
        srl     t2, t2, t9
 /*
  * Now round the denormalized result.
@@ -2364,13 +2368,13 @@
 1:
        bne     t0, zero, 5f                    # if sign is negative, truncate
 2:
-       beq     t8, zero, 5f                    # if exact, continue
+       beq     s8, zero, 5f                    # if exact, continue
        addu    t2, t2, 1                       # add rounding bit
        b       5f
 3:
        li      v0, GUARDBIT                    # load guard bit for rounding
-       addu    v0, v0, t8                      # add remainder
-       sltu    v1, v0, t8                      # compute carry out
+       addu    v0, v0, s8                      # add remainder
+       sltu    v1, v0, s8                      # compute carry out
        beq     v1, zero, 4f                    # if no carry, continue
        addu    t2, t2, 1                       # add carry to result
 4:
@@ -2379,7 +2383,7 @@
 5:
        move    t1, zero                        # denorm or zero exponent
        jal     _C_LABEL(set_fd_s)              # save result
-       beq     t8, zero, done                  # check for exact result
+       beq     s8, zero, done                  # check for exact result
        or      a2, a2, MIPS_FPU_EXCEPTION_UNDERFLOW | MIPS_FPU_STICKY_UNDERFLOW
        or      a2, a2, MIPS_FPU_EXCEPTION_INEXACT | MIPS_FPU_STICKY_INEXACT
        and     v0, a2, MIPS_FPU_ENABLE_INEXACT
@@ -2404,7 +2408,7 @@
        move    v0, t3
        addu    t9, 32
        bne     t3, zero, 1f
-       move    v0, t8
+       move    v0, s8
        addu    t9, 32
 1:
        srl     v1, v0, 16
@@ -2431,7 +2435,7 @@
        bne     v1, zero, 1f
        addu    t9, 1
 /*
- * Now shift t2,t3,t8 the correct number of bits.
+ * Now shift t2,t3,s8 the correct number of bits.
  */
 1:
        subu    t9, t9, DLEAD_ZEROS             # dont count leading zeros
@@ -2443,10 +2447,10 @@
        subu    t9, t9, v1                      # shift by >= 32
        subu    v1, v1, t9
        sll     t2, t3, t9                      # shift left by t9
-       srl     v0, t8, v1                      # save bits shifted out
+       srl     v0, s8, v1                      # save bits shifted out
        or      t2, t2, v0
-       sll     t3, t8, t9
-       move    t8, zero
+       sll     t3, s8, t9
+       move    s8, zero
        b       norm_noshift_d
 1:
        subu    v1, v1, t9
@@ -2454,19 +2458,19 @@
        srl     v0, t3, v1                      # save bits shifted out
        or      t2, t2, v0
        sll     t3, t3, t9
-       srl     v0, t8, v1                      # save bits shifted out
+       srl     v0, s8, v1                      # save bits shifted out
        or      t3, t3, v0
-       sll     t8, t8, t9
+       sll     s8, s8, t9
        b       norm_noshift_d
 2:
        negu    t9                              # shift right by t9
        subu    v1, v1, t9                      #  (known to be < 32 bits)
-       sll     v0, t8, v1                      # save bits shifted out
+       sll     v0, s8, v1                      # save bits shifted out
        sltu    v0, zero, v0                    # be sure to save any one bits
-       srl     t8, t8, t9
-       or      t8, t8, v0
+       srl     s8, s8, t9
+       or      s8, s8, v0
        sll     v0, t3, v1                      # save bits shifted out
-       or      t8, t8, v0
+       or      s8, s8, v0
        srl     t3, t3, t9
        sll     v0, t2, v1                      # save bits shifted out
        or      t3, t3, v0
@@ -2484,7 +2488,7 @@
 1:
        bne     t0, zero, 5f                    # if sign is negative, truncate
 2:
-       beq     t8, zero, 5f                    # if exact, continue
+       beq     s8, zero, 5f                    # if exact, continue
        addu    t3, t3, 1                       # add rounding bit
        bne     t3, zero, 5f                    # branch if no carry
        addu    t2, t2, 1                       # add carry
@@ -2494,8 +2498,8 @@
        b       5f
 3:
        li      v0, GUARDBIT                    # load guard bit for rounding
-       addu    v0, v0, t8                      # add remainder
-       sltu    v1, v0, t8                      # compute carry out
+       addu    v0, v0, s8                      # add remainder
+       sltu    v1, v0, s8                      # compute carry out
        beq     v1, zero, 4f                    # branch if no carry
        addu    t3, t3, 1                       # add carry
        bne     t3, zero, 4f                    # branch if no carry
@@ -2509,7 +2513,7 @@
 5:
        bgt     t1, DEXP_MAX, overflow_d        # overflow?
        blt     t1, DEXP_MIN, underflow_d       # underflow?
-       bne     t8, zero, inexact_d             # is result inexact?
+       bne     s8, zero, inexact_d             # is result inexact?
        addu    t1, t1, DEXP_BIAS               # bias exponent
        and     t2, t2, ~DIMPL_ONE              # clear implied one bit
        b       result_fs_d
@@ -2590,7 +2594,7 @@
        move    t2, ta2                         # get unrounded fraction (MS)
        move    t3, ta3                         # get unrounded fraction (LS)
        li      t9, DEXP_MIN                    # compute shift amount
-       subu    t9, t9, t1                      # shift t2,t8 right by t9
+       subu    t9, t9, t1                      # shift t2,s8 right by t9
        blt     t9, DFRAC_BITS+2, 3f            # shift all the bits out?
        move    t1, zero                        # result is inexact zero
        move    t2, zero
@@ -2616,17 +2620,17 @@
        blt     t9, v1, 1f                      # shift by < 32?
        subu    t9, t9, v1                      # shift right by >= 32
        subu    v1, v1, t9
-       sltu    v0, zero, t8                    # be sure to save any one bits
-       sll     t8, t2, v1                      # save bits shifted out
-       or      t8, t8, v0                      # include sticky bits
+       sltu    v0, zero, s8                    # be sure to save any one bits
+       sll     s8, t2, v1                      # save bits shifted out
+       or      s8, s8, v0                      # include sticky bits
        srl     t3, t2, t9
        move    t2, zero
        b       2f
 1:
        subu    v1, v1, t9                      # shift right by t9
-       sltu    v0, zero, t8                    # be sure to save any one bits
-       sll     t8, t3, v1                      # save bits shifted out
-       or      t8, t8, v0                      # include sticky bits
+       sltu    v0, zero, s8                    # be sure to save any one bits
+       sll     s8, t3, v1                      # save bits shifted out
+       or      s8, s8, v0                      # include sticky bits
        srl     t3, t3, t9
        sll     v0, t2, v1                      # save bits shifted out
        or      t3, t3, v0
@@ -2644,15 +2648,15 @@
 1:
        bne     t0, zero, 5f                    # if sign is negative, truncate
 2:
-       beq     t8, zero, 5f                    # if exact, continue
+       beq     s8, zero, 5f                    # if exact, continue
        addu    t3, t3, 1                       # add rounding bit
        bne     t3, zero, 5f                    # if no carry, continue
        addu    t2, t2, 1                       # add carry
        b       5f
 3:
        li      v0, GUARDBIT                    # load guard bit for rounding
-       addu    v0, v0, t8                      # add remainder
-       sltu    v1, v0, t8                      # compute carry out
+       addu    v0, v0, s8                      # add remainder
+       sltu    v1, v0, s8                      # compute carry out
        beq     v1, zero, 4f                    # if no carry, continue
        addu    t3, t3, 1                       # add rounding bit
        bne     t3, zero, 4f                    # if no carry, continue
@@ -2663,7 +2667,7 @@
 5:
        move    t1, zero                        # denorm or zero exponent
        jal     _C_LABEL(set_fd_d)              # save result
-       beq     t8, zero, done                  # check for exact result
+       beq     s8, zero, done                  # check for exact result
        or      a2, a2, MIPS_FPU_EXCEPTION_UNDERFLOW | MIPS_FPU_STICKY_UNDERFLOW
        or      a2, a2, MIPS_FPU_EXCEPTION_INEXACT | MIPS_FPU_STICKY_INEXACT
        and     v0, a2, MIPS_FPU_ENABLE_INEXACT
@@ -2772,6 +2776,7 @@
 #endif
        REG_L   a1, CALLFRAME_SIZ + 1*SZREG(sp) # frame
        REG_L   a2, CALLFRAME_SIZ + 2*SZREG(sp) # cause
+       REG_L   s8, CALLFRAME_S8(sp)
        REG_L   ra, CALLFRAME_RA(sp)
        PTR_ADDU sp, CALLFRAME_SIZ
        j       _C_LABEL(fpemul_sigfpe)
@@ -2789,6 +2794,7 @@
 #endif
        REG_L   a1, CALLFRAME_FRAME(sp)         # frame
        REG_L   a2, CALLFRAME_CAUSE(sp)         # cause
+       REG_L   s8, CALLFRAME_S8(sp)
        REG_L   ra, CALLFRAME_RA(sp)
        PTR_ADDU sp, CALLFRAME_SIZ
        j       _C_LABEL(fpemul_sigill)
@@ -2848,6 +2854,7 @@
        REG_EPILOGUE
 
        REG_L   ra, CALLFRAME_RA(sp)
+       REG_L   s8, CALLFRAME_S8(sp)
        PTR_ADDU sp, CALLFRAME_SIZ
        j       ra
 END(MachEmulateFP)

Follow-Ups:
- RE: matt-nb5-mips64 and FP emulation
  - From: Paul Koning

Prev by Date: Visiting Nurses & RN's marketing email list
Next by Date: RE: matt-nb5-mips64 and FP emulation
Previous by Thread: Visiting Nurses & RN's marketing email list
Next by Thread: RE: matt-nb5-mips64 and FP emulation
Indexes:

Home | Main Index | Thread Index | Old Index