This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
sh4 soft fp
- From: amylaar at spamcop dot net (Joern Rennecke)
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 30 Sep 2004 21:24:31 +0100 (BST)
- Subject: sh4 soft fp
The normalization in muldf3 posted here:
http://gcc.gnu.org/ml/gcc-patches/2004-08/msg00459.html
doesn't work right when the top 32 bits are zero but the
lower 32 bits have to be distrubuted over two 32-bit words.
When the upper 32 bits are zero, the following strategy should work better:
copy low word to high word, shift low word left by 16, use signed shift
count on high word, shift +16 on low word.
I've also found a number of smaller bugs / missing comments in the other code,
so here is an update to the patches to the old files:
Index: lib1funcs.asm
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/lib1funcs.asm,v
retrieving revision 1.36
diff -p -u -r1.36 lib1funcs.asm
--- lib1funcs.asm 12 Aug 2003 01:25:07 -0000 1.36
+++ lib1funcs.asm 30 Sep 2004 18:59:32 -0000
@@ -37,6 +37,8 @@ Boston, MA 02111-1307, USA. */
ELF local label prefixes by J"orn Rennecke
amylaar@cygnus.com */
+#include "insn-constants.h"
+
#ifdef __ELF__
#define LOCAL(X) .L_##X
#define FUNC(X) .type X,@function
@@ -56,6 +58,34 @@ Boston, MA 02111-1307, USA. */
#define FMOVD_WORKS
#endif
+#ifdef __sh1__
+#define SL(branch, dest, in_slot, in_slot_arg2) \
+ in_slot, in_slot_arg2; branch dest
+#define SL_(branch, dest, in_slot) \
+ in_slot; branch dest
+#define SLC(branch, dest, in_slot, in_slot_arg2) \
+ branch dest; in_slot, in_slot_arg2
+#define SLI(in_slot, in_slot_arg2) in_slot, in_slot_arg2
+#define SLCMP(branch, cmp1, cmp1arg2, cmp2, cmp2arg2) \
+ branch .+6; bra .+6; cmp2, cmp2arg2; cmp1, cmp1arg2
+#else
+#define SL(branch, dest, in_slot, in_slot_arg2) \
+ branch##/s dest; in_slot, in_slot_arg2
+#define SL_(branch, dest, in_slot) \
+ branch##/s dest; in_slot
+#define SLC(branch, dest, in_slot, in_slot_arg2) \
+ branch##/s dest; in_slot, in_slot_arg2
+#define SLI(in_slot, in_slot_arg)
+#define SLCMP(branch, cmp1, cmp1arg2, cmp2, cmp2arg2) \
+ branch##/s .+6; cmp1, cmp1arg2; cmp2, cmp2arg2
+#endif
+
+#if defined (__sh1__) || defined (__sh2__) || defined (__SH2E__)
+/* don't #define DYN_SHIFT */
+#else
+#define DYN_SHIFT 1
+#endif
+
#if ! __SH5__
#ifdef L_ashiftrt
.global GLOBAL(ashiftrt_r4_0)
@@ -2873,3 +2903,1647 @@ GLOBAL(GCC_pop_shmedia_regs_nofpu):
ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
#endif /* __SH5__ == 32 */
#endif /* L_push_pop_shmedia_regs */
+
+/* Floating-point emulation. We handle NANs, +-infinity, and +-zero.
+ However, we assume that for NANs, the topmost bit of the fraction is set. */
+#ifdef L_nesf2
+/* -ffinite-math-only inline version, T := r4:SF == r5:SF
+ cmp/eq r4,r5
+ mov r4,r0
+ bt 0f
+ or r5,r0
+ add r0,r0
+ tst r0,r0
+ 0: */
+ .global GLOBAL(nesf2_)
+ FUNC(GLOBAL(nesf2_))
+GLOBAL(nesf2_):
+ /* If the raw values are unequal, the result is unequal, unless
+ both values are +-zero.
+ If the raw values are equal, the result is equal, unless
+ the values are nan or infinity. */
+ cmp/eq r4,r5
+ mov.l LOCAL(c_SF_NAN_MASK),r1
+ not r4,r0
+ bt LOCAL(check_nan)
+ mov r4,r0
+ or r5,r0
+ rts
+ add r0,r0
+LOCAL(check_nan):
+ tst r1,r0
+ rts
+ movt r0
+ .balign 4
+LOCAL(c_SF_NAN_MASK):
+ .long SF_NAN_MASK
+ ENDFUNC(GLOBAL(nesf2_))
+#endif /* L_nesf2 */
+
+#ifdef __LITTLE_ENDIAN__
+#define DBL0L r4
+#define DBL0H r5
+#define DBL1L r6
+#define DBL1H r7
+#define DBLRL r0
+#define DBLRH r1
+#else
+#define DBL0L r5
+#define DBL0H r4
+#define DBL1L r7
+#define DBL1H r6
+#define DBLRL r1
+#define DBLRH r0
+#endif
+
+#ifdef L_nedf2
+/* -ffinite-math-only -mb inline version, T := r4:DF == r6:DF
+ cmp/eq r5,r7
+ mov r4,r0
+ bf 0f
+ cmp/eq r4,r6
+ bt 0f
+ or r6,r0
+ add r0,r0
+ or r5,r0
+ tst r0,r0
+ 0: */
+ .global GLOBAL(nedf2_)
+ FUNC(GLOBAL(nedf2_))
+GLOBAL(nedf2_):
+ cmp/eq DBL0L,DBL1L
+ mov.l LOCAL(c_DF_NAN_MASK),r1
+ bf LOCAL(ne)
+ cmp/eq DBL0H,DBL1H
+ not DBL0H,r0
+ bt LOCAL(check_nan)
+ mov DBL0H,r0
+ or DBL1H,r0
+ add r0,r0
+ rts
+ or DBL0L,r0
+LOCAL(check_nan):
+ tst r1,r0
+ rts
+ movt r0
+LOCAL(ne):
+ rts
+ mov #1,r0
+ .balign 4
+LOCAL(c_DF_NAN_MASK):
+ .long DF_NAN_MASK
+ ENDFUNC(GLOBAL(nedf2_))
+#endif /* L_nedf2 */
+
+#ifdef L_unordsf2
+ .global GLOBAL(unordsf2_)
+ FUNC(GLOBAL(unordsf2_))
+GLOBAL(unordsf2_):
+ mov.l LOCAL(c_SF_NAN_MASK),r1
+ not r4,r0
+ tst r1,r0
+ not r5,r0
+ bt LOCAL(unord)
+ tst r1,r0
+LOCAL(unord):
+ rts
+ movt r0
+ .balign 4
+LOCAL(c_SF_NAN_MASK):
+ .long SF_NAN_MASK
+ ENDFUNC(GLOBAL(unordsf2_))
+#endif /* L_unordsf2 */
+
+#ifdef L_unorddf2
+ .global GLOBAL(unorddf2_)
+ FUNC(GLOBAL(unorddf2_))
+GLOBAL(unorddf2_):
+ mov.l LOCAL(c_DF_NAN_MASK),r1
+ not r4,r0
+ tst r1,r0
+ not r6,r0
+ bt LOCAL(unord)
+ tst r1,r0
+LOCAL(unord):
+ rts
+ movt r0
+ .balign 4
+LOCAL(c_DF_NAN_MASK):
+ .long DF_NAN_MASK
+ ENDFUNC(GLOBAL(unorddf2_))
+#endif /* L_unorddf2 */
+
+#if defined(L_gtsf2t) || defined(L_gtsf2t_trap)
+/* -ffinite-math-only inline version, T := r4:SF > r5:SF ? 0 : 1
+ cmp/pz r4
+ mov r4,r0
+ bf/s 0f
+ cmp/hs r5,r4
+ cmp/ge r4,r5
+ or r5,r0
+ bt 0f
+ add r0,r0
+ tst r0,r0
+ 0: */
+#ifdef L_gtsf2t
+#define fun_label GLOBAL(gtsf2t)
+#else
+#define fun_label GLOBAL(gtsf2t_trap)
+#endif
+ .global fun_label
+ FUNC(fun_label)
+fun_label:
+ /* If the raw values compare greater, the result true, unless
+ any of them is a nan (but infinity is fine), or both values are
+ +- zero. Otherwise, the result false. */
+ mov.l LOCAL(c_SF_NAN_MASK),r1
+ cmp/pz r4
+ not r5,r0
+ SLC(bf, LOCAL(neg),
+ tst r1,r0)
+ mov r4,r0
+ bt LOCAL(nan)
+ cmp/gt r5,r4
+ SLC(bf, LOCAL(check_nan),
+ cmp/gt r4,r1)
+ bf LOCAL(nan)
+ or r5,r0
+ rts
+ add r0,r0
+LOCAL(neg):
+ SLI(tst r1,r0)
+ bt LOCAL(nan)
+ not r4,r0
+ tst r1,r0
+ bt LOCAL(nan)
+ cmp/hi r4,r5
+#if defined(L_gtsf2t) && defined(DELAYED_BRANCHES)
+LOCAL(check_nan):
+#endif /* DELAYED_BRANCHES */
+ rts
+ movt r0
+#ifdef L_gtsf2t
+LOCAL(check_nan):
+LOCAL(nan):
+ rts
+ mov #0,r0
+#else /* ! L_gtsf2t */
+LOCAL(check_nan):
+ SLI(cmp/gt r4,r1)
+ bf LOCAL(nan)
+ rts
+ movt r0
+LOCAL(nan):
+ mov #0,r0
+ trapa #0
+#endif /* ! L_gtsf2t */
+ .balign 4
+LOCAL(c_SF_NAN_MASK):
+ .long SF_NAN_MASK
+ ENDFUNC(fun_label)
+#endif /* L_gtsf2t */
+
+#if defined(L_gtdf2t) || defined(L_gtdf2t_trap)
+#ifdef L_gtdf2t
+#define fun_label GLOBAL(gtdf2t)
+#else
+#define fun_label GLOBAL(gtdf2t_trap)
+#endif
+ .global fun_label
+ FUNC(fun_label)
+fun_label:
+ /* If the raw values compare greater, the result true, unless
+ any of them is a nan (but infinity is fine), or both values are
+ +- zero. Otherwise, the result false. */
+ mov.l LOCAL(c_DF_NAN_MASK),r1
+ cmp/pz DBL0H
+ not DBL1H,r0
+ SLC(bf, LOCAL(neg),
+ tst r1,r0)
+ mov DBL0H,r0
+ bt LOCAL(nan) /* return zero if DBL1 is NAN. */
+ cmp/eq DBL1H,DBL0H
+ bt LOCAL(cmp_low)
+ cmp/gt DBL1H,DBL0H
+ or DBL1H,r0
+ SLC(bf, LOCAL(check_nan),
+ cmp/gt DBL0H,r1)
+ add r0,r0
+ bf LOCAL(nan) /* return zero if DBL0 is NAN. */
+ or DBL0L,r0
+ rts
+ or DBL1L,r0 /* non-zero unless both DBL0 and DBL1 are +-zero. */
+LOCAL(cmp_low):
+ cmp/hi DBL1L,DBL0L
+ rts
+ movt r0
+LOCAL(neg):
+ SLI(tst r1,r0)
+ bt LOCAL(nan) /* return zero if DBL1 is NAN. */
+ cmp/eq DBL1H,DBL0H
+ SLC(bt, LOCAL(neg_cmp_low),
+ cmp/hi DBL0L,DBL1L)
+ not r4,r0
+ tst r1,r0
+ bt LOCAL(nan) /* return zero if DBL0 is NAN. */
+ cmp/hi DBL0H,DBL1H
+ SLI(rts !,)
+ SLI(movt r0 !,)
+LOCAL(neg_cmp_low):
+ SLI(cmp/hi DBL0L,DBL1L)
+ rts
+ movt r0
+LOCAL(check_nan):
+#ifdef L_gtdf2t
+LOCAL(nan):
+ rts
+ mov #0,r0
+#else
+ SLI(cmp/gt DBL0H,r1)
+ bf LOCAL(nan) /* return zero if DBL0 is NAN. */
+ rts
+ mov #0,r0
+LOCAL(nan):
+ mov #0,r0
+ trapa #0
+#endif
+ .balign 4
+LOCAL(c_DF_NAN_MASK):
+ .long DF_NAN_MASK
+ ENDFUNC(fun_label)
+#endif /* defined(L_gtdf2t) || defined(L_gtdf2t_trap) */
+
+#if defined(L_gesf2f) || defined(L_gesf2f_trap)
+/* -ffinite-math-only inline version, T := r4:SF >= r5:SF */
+ cmp/pz r5
+ mov r4,r0
+ bf/s 0f
+ cmp/hs r4,r5
+ cmp/ge r5,r4
+ or r5,r0
+ bt 0f
+ add r0,r0
+ tst r0,r0
+ 0:
+#ifdef L_gesf2f
+#define fun_label GLOBAL(gesf2f)
+#else
+#define fun_label GLOBAL(gesf2f_trap)
+#endif
+ .global fun_label
+ FUNC(fun_label)
+fun_label:
+ /* If the raw values compare greater or equal, the result is
+ true, unless any of them is a nan. If both are -+zero, the
+ result is true; otherwise, it is false.
+ We use 0 as true and nonzero as false for this function. */
+ mov.l LOCAL(c_SF_NAN_MASK),r1
+ cmp/pz r5
+ not r4,r0
+ SLC(bf, LOCAL(neg),
+ tst r1,r0)
+ mov r4,r0
+ bt LOCAL(nan)
+ cmp/gt r4,r5
+ SLC(bf, LOCAL(check_nan),
+ cmp/ge r1,r5)
+ bt LOCAL(nan)
+ or r5,r0
+ rts
+ add r0,r0
+LOCAL(neg):
+ SLI(tst r1,r0)
+ bt LOCAL(nan)
+ not r5,r0
+ tst r1,r0
+ bt LOCAL(nan)
+ cmp/hi r5,r4
+#if defined(L_gesf2f) && defined(DELAYED_BRANCHES)
+LOCAL(nan): LOCAL(check_nan):
+#endif
+ rts
+ movt r0
+#if defined(L_gesf2f) && ! defined(DELAYED_BRANCHES)
+LOCAL(check_nan):
+ cmp/ge r1,r5
+LOCAL(nan):
+ rts
+ movt r0
+#endif /* ! DELAYED_BRANCHES */
+#ifdef L_gesf2f_trap
+LOCAL(check_nan):
+ SLI(cmp/ge r1,r5)
+ bt LOCAL(nan)
+ rts
+LOCAL(nan):
+ movt r0
+ trapa #0
+#endif /* L_gesf2f_trap */
+ .balign 4
+LOCAL(c_SF_NAN_MASK):
+ .long SF_NAN_MASK
+ ENDFUNC(GLOBAL(gesf2f))
+#endif /* L_gesf2f */
+
+#ifdef L_gedf2f
+ .global GLOBAL(gedf2f)
+ FUNC(GLOBAL(gedf2f))
+GLOBAL(gedf2f):
+ /* If the raw values compare greater or equal, the result is
+ true, unless any of them is a nan, or both are the
+ same infinity. If both are -+zero, the result is true;
+ otherwise, it is false.
+ We use 0 as true and nonzero as false for this function. */
+ mov.l LOCAL(c_DF_NAN_MASK),r1
+ cmp/pz DBL1H
+ not DBL0H,r0
+ SLC(bf, LOCAL(neg),
+ tst r1,r0)
+ mov DBL0H,r0
+ bt LOCAL(nan)
+ cmp/eq DBL0H,DBL1H
+ bt LOCAL(cmp_low)
+ cmp/gt DBL0H,DBL1H
+ or DBL1H,r0
+ SLC(bf, LOCAL(check_nan),
+ cmp/ge r1,DBL1H)
+ add r0,r0
+ bt LOCAL(nan)
+ or DBL0L,r0
+ rts
+ or DBL1L,r0
+LOCAL(cmp_low):
+ cmp/hi DBL0L,DBL1L
+#if defined(L_gedf2f) && defined(DELAYED_BRANCHES)
+LOCAL(nan): LOCAL(check_nan):
+#endif
+ rts
+ movt r0
+#if defined(L_gedf2f) && ! defined(DELAYED_BRANCHES)
+LOCAL(check_nan):
+ SLI(cmp/ge r1,DBL1H)
+LOCAL(nan):
+ rts
+ movt r0
+#elif defined(L_gedf2f_trap)
+LOCAL(check_nan):
+ SLI(cmp/ge r1,DBL1H)
+ bt LOCAL(nan)
+ rts
+LOCAL(nan):
+ movt r0
+ trapa #0
+#endif /* L_gedf2f_trap */
+LOCAL(neg):
+ SLI(tst r1,r0)
+ bt LOCAL(nan)
+ cmp/eq DBL0H,DBL1H
+ not DBL1H,r0
+ SLC(bt, LOCAL(neg_cmp_low),
+ cmp/hi DBL1L,DBL0L)
+ tst r1,r0
+ bt LOCAL(nan)
+ cmp/hi DBL1H,DBL0H
+ SLI(rts !,)
+ SLI(movt r0 !,)
+LOCAL(neg_cmp_low):
+ SLI(cmp/hi DBL1L,DBL0L)
+ rts
+ movt r0
+ .balign 4
+LOCAL(c_DF_NAN_MASK):
+ .long DF_NAN_MASK
+ ENDFUNC(GLOBAL(gedf2f))
+#endif /* L_gedf2f */
+
+#ifndef DYN_SHIFT /* Basic conversions for SH1 / SH2 */
+#ifdef L_extendsfdf2
+ .global GLOBAL(extendsfdf2_)
+ FUNC(GLOBAL(extendsfdf2_))
+GLOBAL(extendsfdf2_):
+ mov.l LOCAL(x7f800000),r3
+ mov r4,DBLRL
+ tst r3,r4
+ bt LOCAL(zero_denorm)
+ mov.l LOCAL(xe0000000),r2
+ rotr DBLRL
+ rotr DBLRL
+ rotr DBLRL
+ and r2,DBLRL
+ mov r4,DBLRH
+ not r4,r2
+ shll DBLRH
+ shlr2 DBLRH
+ shlr2 DBLRH
+ add DBLRH,DBLRH
+ rotcr DBLRH
+ tst r3,r2
+ bt LOCAL(inf_nan)
+ mov.l LOCAL(x38000000),r2
+ rts
+ add r2,DBLRH
+LOCAL(inf_nan):
+ mov.l LOCAL(x70000000),r2
+ rts
+ add r2,DBLRH
+LOCAL(zero_denorm):
+ mov.l r4,@-r15
+ add r4,r4
+ tst r4,r4
+ bt LOCAL(zero)
+ add r3,r3 /* 0xff000000 */
+ mov.l LOCAL(xb8000009),r2
+LOCAL(shift_byte):
+ tst r3,r4
+ shll8 r4
+ SL(bt, LOCAL(shift_byte),
+ add #-8,r2)
+LOCAL(shift_bit):
+ shll r4
+ SL(bf, LOCAL(shift_bit),
+ add #-1,r2)
+ mov r4,DBLRH
+ mov.l @r15+,r4
+ shlr8 DBLRH
+ shlr2 DBLRH
+ shlr DBLRH
+ rotcr DBLRL
+ cmp/pz r4
+ rotcr DBLRH
+ rotcr DBLRL
+ rts
+ add r2,DBLRH
+LOCAL(zero):
+ mov.l @r15+,DBLRH
+ rts
+ mov #0,DBLRL
+ .balign 4
+LOCAL(x7f800000):
+ .long 0x7f800000
+LOCAL(x38000000):
+ .long 0x38000000
+LOCAL(xe0000000):
+ .long 0xe0000000
+LOCAL(x70000000):
+ .long 0x70000000
+LOCAL(xb8000009):
+ /* Flip sign back, do exponent adjustment, and compensate for -8 / -1
+ adjustments in first shift loop iterations. */
+ .long 0x80000000 + 0x38000000 + 9
+ ENDFUNC(GLOBAL(extendsfdf2_))
+#endif /* L_extendsfdf2 */
+
+#ifdef L_truncdfsf2
+ .global GLOBAL(truncdfsf2_)
+ FUNC(GLOBAL(truncdfsf2_))
+GLOBAL(truncdfsf2_):
+ mov.l LOCAL(x38000000),r3 ! exponent adjustment DF -> SF
+ mov DBL0H,r1
+ mov.l LOCAL(x70000000),r2 ! mask for out-of-range exponent bits
+ mov DBL0H,r0
+ mov.l DBL0L,@-r15
+ sub r3,r1
+ tst r2,r1
+ shll8 r0 !
+ shll2 r0 ! Isolate highpart fraction.
+ shll2 r0 !
+ bf LOCAL(ill_exp)
+ shll2 r1
+ mov.l LOCAL(x2fffffff),r2 /* Fraction lsb | lower guard bits. */
+ shll2 r1
+ mov.l LOCAL(xff000000),r3
+ shlr8 r0
+ tst r2,DBL0L /* Check if msb guard bit wants rounding up. */
+ shlr16 DBL0L
+ shlr8 DBL0L
+ shlr2 DBL0L
+ SL_(bt, LOCAL(add_frac),
+ shlr2 DBL0L)
+ add #1,DBL0L
+LOCAL(add_frac):
+ add DBL0L,r0
+ mov.l LOCAL(x01000000),r2
+ and r3,r1
+ mov.l @r15+,DBL0L
+ add r1,r0
+ tst r3,r0
+ bt LOCAL(inf_denorm0)
+ cmp/hs r3,r0
+LOCAL(denorm_noup_sh1):
+ bt LOCAL(inf)
+ div0s DBL0H,r2 /* copy orig. sign into T. */
+ rts
+ rotcr r0
+LOCAL(inf_denorm0): ! We might need to undo previous rounding.
+ mov.l LOCAL(x2fffffff),r3 /* Old fraction lsb | lower guard bits. */
+ tst r1,r1
+ bf LOCAL(inf)
+ add #-1,r0
+ tst r3,DBL0L /* Check if msb guard bit was rounded up. */
+ mov.l LOCAL(x5fffffff),r3 /* Fraction lsb | lower guard bits. */
+ addc r2,r0
+ shlr r0
+ tst r3,DBL0L /* Check if msb guard bit wants rounding up. */
+#ifdef DELAYED_BRANCHES
+ bt/s LOCAL(denorm_noup)
+#else
+ bt LOCAL(denorm_noup_sh1)
+#endif
+ div0s DBL0H,r2 /* copy orig. sign into T. */
+ add #1,r0
+LOCAL(denorm_noup):
+ rts
+ rotcr r0
+LOCAL(ill_exp):
+ div0s DBL0H,r1
+ mov.l LOCAL(x7ff80000),r2
+ add r1,r1
+ bf LOCAL(inf_nan)
+ mov.w LOCAL(m32),r3 /* Handle denormal or zero. */
+ shlr16 r1
+ exts.w r1,r1
+ shll2 r1
+ add r1,r1
+ shlr8 r1
+ exts.w r1,r1
+ add #-8,r1 /* Go from 9 to 1 guard bit in MSW. */
+ cmp/gt r3,r1
+ mov.l @r15+,r3 /* DBL0L */
+ bf LOCAL(zero)
+ mov.l DBL0L, @-r15
+ shll8 DBL0L
+ rotcr r0 /* Insert leading 1. */
+ shlr16 r3
+ shll2 r3
+ add r3,r3
+ shlr8 r3
+ cmp/pl DBL0L /* Check lower 23 guard bits if guard bit 23 is 0. */
+ addc r3,r0 /* Assemble fraction with compressed guard bits. */
+ mov.l @r15+,DBL0L
+ mov #0,r2
+ neg r1,r1
+LOCAL(denorm_loop):
+ shlr r0
+ rotcl r2
+ dt r1
+ bf LOCAL(denorm_loop)
+ tst #2,r0
+ rotcl r0
+ tst r2,r2
+ rotcl r0
+ xor #3,r0
+ add #3,r0 /* Even overflow gives the correct result. */
+ shlr2 r0
+ div0s r0,DBL0H
+ rts
+ rotcr r0
+LOCAL(zero):
+ mov #0,r0
+ div0s r0,DBL0H
+ rts
+ rotcr r0
+LOCAL(inf_nan):
+ not DBL0H,r0
+ tst r2,r0
+ mov.l @r15+,DBL0L
+ bf LOCAL(inf)
+ rts
+ mov #-1,r0 /* NAN */
+LOCAL(inf): /* r2 must be positive here. */
+ mov.l LOCAL(xffe00000),r0
+ div0s r2,DBL0H
+ rts
+ rotcr r0
+LOCAL(m32):
+ .word -32
+ .balign 4
+LOCAL(x38000000):
+ .long 0x38000000
+LOCAL(x70000000):
+ .long 0x70000000
+LOCAL(x2fffffff):
+ .long 0x2fffffff
+LOCAL(x01000000):
+ .long 0x01000000
+LOCAL(xff000000):
+ .long 0xff000000
+LOCAL(x5fffffff):
+ .long 0x5fffffff
+LOCAL(x7ff80000):
+ .long 0x7ff80000
+LOCAL(xffe00000):
+ .long 0xffe00000
+ ENDFUNC(GLOBAL(truncdfsf2_))
+#endif /* L_truncdfsf2 */
+#endif /* ! DYN_SHIFT */
+
+/* The actual arithmetic uses dynamic shift. Supporting SH1 / SH2 here would
+ make this code too hard to maintain, so if you want to add SH1 / SH2
+ support, do it in a separate copy. */
+#ifdef DYN_SHIFT
+#ifdef L_extendsfdf2
+ .global GLOBAL(extendsfdf2_)
+ FUNC(GLOBAL(extendsfdf2_))
+GLOBAL(extendsfdf2_):
+ mov.l LOCAL(x7f800000),r2
+ mov #29,r3
+ mov r4,DBLRL
+ not r4,DBLRH
+ tst r2,r4
+ shld r3,DBLRL
+ bt LOCAL(zero_denorm)
+ mov #-3,r3
+ tst r2,DBLRH
+ mov r4,DBLRH
+ bt/s LOCAL(inf_nan)
+ shll DBLRH
+ shld r3,DBLRH
+ mov.l LOCAL(x38000000),r2
+ rotcr DBLRH
+ rts
+ add r2,DBLRH
+ .balign 4
+LOCAL(inf_nan):
+ shld r2,DBLRH
+ mov.l LOCAL(x70000000),r2
+ rotcr DBLRH
+ rts
+ add r2,DBLRH
+LOCAL(zero_denorm):
+ mov.l r4,@-r15
+ add r4,r4
+ tst r4,r4
+ extu.w r4,r2
+ bt LOCAL(zero)
+ cmp/eq r4,r2
+ extu.b r4,r1
+ bf/s LOCAL(three_bytes)
+ mov.l LOCAL(c__clz_tab),r0
+ cmp/eq r4,r1
+ mov #22,DBLRH
+ bt LOCAL(one_byte)
+ shlr8 r2
+ mov #14,DBLRH
+LOCAL(one_byte):
+#ifdef __pic__
+ add r0,r2
+ mova LOCAL(c__clz_tab),r0
+#endif
+ mov.b @(r0,r2),r2
+ mov #21,r3
+ mov.w LOCAL(x0),DBLRL
+ sub r2,DBLRH
+LOCAL(norm_shift):
+ shld DBLRH,r4
+ mov.l @r15+,r2
+ shld r3,DBLRH
+ mov.l LOCAL(xb7ffffff),r3
+ add r4,DBLRH
+ cmp/pz r2
+ mov r2,r4
+ rotcr DBLRH
+ rts
+ add r3,DBLRH
+LOCAL(three_bytes):
+ mov r4,r2
+ shlr16 r2
+#ifdef __pic__
+ add r0,r2
+ mova LOCAL(c__clz_tab),r0
+#endif
+ mov.b @(r0,r2),r2
+ mov #21,r3
+ mov #6-32,DBLRH
+ sub r2,DBLRH
+ mov r4,DBLRL
+ shld DBLRH,DBLRL
+ bra LOCAL(norm_shift)
+ add #32,DBLRH
+LOCAL(zero):
+ rts /* DBLRL has already been zeroed above. */
+ mov.l @r15+,DBLRH
+LOCAL(x0):
+ .word 0
+ .balign 4
+LOCAL(x7f800000):
+ .long 0x7f800000
+LOCAL(x38000000):
+ .long 0x38000000
+LOCAL(x70000000):
+ .long 0x70000000
+LOCAL(xb7ffffff):
+ /* Flip sign back, do exponent adjustment, and remove leading one. */
+ .long 0x80000000 + 0x38000000 - 1
+LOCAL(c__clz_tab):
+#ifdef __pic__
+ .long GLOBAL(clz_tab) - .
+#else
+ .long GLOBAL(clz_tab)
+#endif
+ ENDFUNC(GLOBAL(extendsfdf2_))
+#endif /* L_extendsfdf2 */
+
+#ifdef L_truncdfsf2
+ .global GLOBAL(truncdfsf2_)
+ FUNC(GLOBAL(truncdfsf2_))
+GLOBAL(truncdfsf2_):
+ mov.l LOCAL(x38000000),r3
+ mov DBL0H,r1
+ mov.l LOCAL(x70000000),r2
+ mov DBL0H,r0
+ sub r3,r1
+ mov.l DBL0L,@-r15
+ tst r2,r1
+ mov #12,r3
+ shld r3,r0 ! Isolate highpart fraction.
+ bf LOCAL(ill_exp)
+ shll2 r1
+ mov.l LOCAL(x2fffffff),r2 /* Fraction lsb | lower guard bits. */
+ shll2 r1
+ mov.l LOCAL(xff000000),r3
+ shlr8 r0
+ tst r2,DBL0L /* Check if msb guard bit wants rounding up. */
+ mov #-28,r2
+ bt/s LOCAL(add_frac)
+ shld r2,DBL0L
+ add #1,DBL0L
+LOCAL(add_frac):
+ add DBL0L,r0
+ mov.l LOCAL(x01000000),r2
+ and r3,r1
+ mov.l @r15+,DBL0L
+ add r1,r0
+ tst r3,r0
+ bt LOCAL(inf_denorm0)
+#if 0 // No point checking overflow -> infinity if we dont't raise a signal.
+ cmp/hs r3,r0
+ bt LOCAL(inf)
+#endif
+ div0s DBL0H,r2 /* copy orig. sign into T. */
+ rts
+ rotcr r0
+LOCAL(inf_denorm0): ! We might need to undo previous rounding.
+ mov.l LOCAL(x2fffffff),r3 /* Old fraction lsb | lower guard bits. */
+ tst r1,r1
+ bf LOCAL(inf)
+ add #-1,r0
+ tst r3,DBL0L /* Check if msb guard bit was rounded up. */
+ mov.l LOCAL(x5fffffff),r3 /* Fraction lsb | lower guard bits. */
+ addc r2,r0
+ shlr r0
+ tst r3,DBL0L /* Check if msb guard bit wants rounding up. */
+ bt/s LOCAL(denorm_noup)
+ div0s DBL0H,r2 /* copy orig. sign into T. */
+ add #1,r0
+LOCAL(denorm_noup):
+ rts
+ rotcr r0
+LOCAL(ill_exp):
+ div0s DBL0H,r1
+ mov.l LOCAL(x7ff80000),r2
+ add r1,r1
+ bf LOCAL(inf_nan)
+ mov.w LOCAL(m32),r3 /* Handle denormal or zero. */
+ mov #-21,r2
+ shad r2,r1
+ add #-8,r1 /* Go from 9 to 1 guard bit in MSW. */
+ cmp/gt r3,r1
+ mov.l @r15+,r3 /* DBL0L */
+ bf LOCAL(zero)
+ mov.l DBL0L, @-r15
+ shll8 DBL0L
+ rotcr r0 /* Insert leading 1. */
+ shld r2,r3
+ cmp/pl DBL0L /* Check lower 23 guard bits if guard bit 23 is 0. */
+ addc r3,r0 /* Assemble fraction with compressed guard bits. */
+ mov r0,r2
+ shld r1,r0
+ mov.l @r15+,DBL0L
+ add #32,r1
+ shld r1,r2
+ tst #2,r0
+ rotcl r0
+ tst r2,r2
+ rotcl r0
+ xor #3,r0
+ add #3,r0 /* Even overflow gives the correct result. */
+ shlr2 r0
+ div0s r0,DBL0H
+ rts
+ rotcr r0
+LOCAL(zero):
+ mov #0,r0
+ div0s r0,DBL0H
+ rts
+ rotcr r0
+LOCAL(inf_nan):
+ not DBL0H,r0
+ tst r2,r0
+ mov.l @r15+,DBL0L
+ bf LOCAL(inf)
+ rts
+ mov #-1,r0 /* NAN */
+LOCAL(inf): /* r2 must be positive here. */
+ mov.l LOCAL(xffe00000),r0
+ div0s r2,DBL0H
+ rts
+ rotcr r0
+LOCAL(m32):
+ .word -32
+ .balign 4
+LOCAL(x38000000):
+ .long 0x38000000
+LOCAL(x70000000):
+ .long 0x70000000
+LOCAL(x2fffffff):
+ .long 0x2fffffff
+LOCAL(x01000000):
+ .long 0x01000000
+LOCAL(xff000000):
+ .long 0xff000000
+LOCAL(x5fffffff):
+ .long 0x5fffffff
+LOCAL(x7ff80000):
+ .long 0x7ff80000
+LOCAL(xffe00000):
+ .long 0xffe00000
+ ENDFUNC(GLOBAL(truncdfsf2_))
+#endif /* L_truncdfsf2 */
+
+#ifdef L_add_sub_sf3
+ .global GLOBAL(subsf3_)
+ FUNC(GLOBAL(subsf3_))
+ .global GLOBAL(addsf3_)
+ FUNC(GLOBAL(addsf3_))
+GLOBAL(subsf3_):
+ cmp/pz r5
+ add r5,r5
+ rotcr r5
+GLOBAL(addsf3_):
+ mov.l LOCAL(x7f800000),r3
+ mov r4,r6
+ add r6,r6
+ mov r5,r7
+ add r7,r7
+ mov r4,r0
+ or r3,r0
+ cmp/hi r6,r7
+ mov r5,r1
+ bf/s LOCAL(r4_hs)
+ or r3,r1
+ cmp/eq r5,r1
+ bt LOCAL(ret_r5) /* sole Inf or NaN, return unchanged. */
+ shll8 r0
+ tst r6,r6
+ shll8 r1
+ mov #-24,r2
+ bt LOCAL(denorm_r4)
+LOCAL(denorm_r4_done):
+ mov r6,r3
+ shld r2,r3
+ mov r7,r6
+ shld r2,r6
+ sub r6,r3
+ mov r0,r7
+ shld r3,r0 /* Get 31 upper bits. */
+ mov.l LOCAL(xff000000),r2
+ add #31,r3
+ mov.l r5,@-r15 ! push result sign.
+ cmp/pl r3
+ shld r3,r7
+ bf LOCAL(ret_stack)
+ div0s r4,r5
+ bf/s LOCAL(add)
+ cmp/pl r7 /* Is LSB in r0 clear, but any lower guards bit set? */
+ subc r0,r1
+ mov.l LOCAL(c__clz_tab),r7
+ tst r2,r1
+ mov #-24,r3
+ bf/s LOCAL(norm_r0)
+ mov r1,r0
+ extu.w r1,r1
+ bra LOCAL(norm_check2)
+ cmp/eq r0,r1
+LOCAL(ret_r5):
+ rts
+ mov r5,r0
+LOCAL(ret_stack):
+ rts
+ mov.l @r15+,r0
+
+/* We leave the numbers denormalized, but we change the bit position to be
+ consistent with normalized numbers. This also removes the spurious
+ leading one that was inserted before. */
+LOCAL(denorm_r4):
+ tst r7,r7
+ add r0,r0
+ bf LOCAL(denorm_r4_done)
+ bra LOCAL(denorm_r4_done)
+ add r1,r1
+LOCAL(denorm_r5):
+ tst r6,r6
+ add r1,r1
+ bf LOCAL(denorm_r5_done)
+ clrt
+ bra LOCAL(denorm_r5_done)
+ add r0,r0
+
+/* If the exponent differs by two or more, normalization is minimal, and
+ few guard bits are needed for an exact final result, so sticky guard
+ bit compresion before subtraction (or add) works fine.
+ If the exponent differs by one, only one extra guard bit is generated,
+ and effectively no guard bit compression takes place. */
+
+LOCAL(r4_hs):
+ cmp/eq r4,r0
+ shll8 r0
+ bt LOCAL(inf_nan_arg0)
+ shll8 r1
+ mov #-24,r2
+ tst r7,r7
+ shld r2,r7
+ bt LOCAL(denorm_r5)
+LOCAL(denorm_r5_done):
+ mov r1,r3
+ shld r2,r6
+ subc r6,r7
+ mov.l LOCAL(xff000000),r2
+ bf LOCAL(same_exp)
+ shld r7,r1 /* Get 31 upper bits. */
+ add #31,r7
+ mov.l r4,@-r15 ! push result sign.
+ cmp/pl r7
+ shld r7,r3
+ bf LOCAL(ret_stack)
+ div0s r4,r5
+ bf/s LOCAL(add)
+ cmp/pl r3 /* Is LSB in r1 clear, but any lower guard bit set? */
+ subc r1,r0
+ mov.l LOCAL(c__clz_tab),r7
+LOCAL(norm_check):
+ tst r2,r0
+ mov #-24,r3
+ bf LOCAL(norm_r0)
+ extu.w r0,r1
+ cmp/eq r0,r1
+LOCAL(norm_check2):
+ mov #-8,r3
+ bt LOCAL(norm_r0)
+ mov #-16,r3
+LOCAL(norm_r0):
+ mov r0,r1
+ shld r3,r0
+#ifdef __pic__
+ add r0,r7
+ mova LOCAL(c__clz_tab),r0
+#endif
+ mov.b @(r0,r7),r7
+ add #25,r3
+ add #-9+1,r6
+ mov r1,r0
+ sub r7,r3
+ mov.l LOCAL(xbfffffff),r7
+ sub r3,r6 /* generate exp-1 */
+ mov.w LOCAL(d24),r2
+ cmp/pz r6 /* check exp > 0 */
+ shld r3,r0 /* Leading 1 becomes +1 exp adjustment. */
+ bf LOCAL(zero_denorm)
+LOCAL(denorm_done):
+ add #30,r3
+ shld r3,r1
+ mov.w LOCAL(m1),r3
+ tst r7,r1 ! clear T if rounding up
+ shld r2,r6
+ subc r3,r0 ! round - overflow will boost exp adjustment to 2.
+ mov.l @r15+,r2
+ add r6,r0 ! overflow will generate inf
+ cmp/ge r2,r3 ! get sign into T
+ rts
+ rotcr r0
+LOCAL(ret_r4):
+ rts
+ mov r4,r0
+
+/* At worst, we are shifting the number back in place where an incoming
+ denormal was. Thus, the shifts won't get out of range. They still
+ might generate a zero fraction, but that's OK, that makes it 0. */
+LOCAL(zero_denorm):
+ add r6,r3
+ mov r1,r0
+ mov #0,r6 /* leading one will become free (except for rounding) */
+ bra LOCAL(denorm_done)
+ shld r3,r0
+
+/* Handle abs(r4) >= abs(r5), same exponents specially so we don't need
+ check for a zero fraction in the main path. */
+LOCAL(same_exp):
+ div0s r4,r5
+ mov.l r4,@-r15
+ bf LOCAL(add)
+ cmp/eq r1,r0
+ mov.l LOCAL(c__clz_tab),r7
+ bf/s LOCAL(norm_check)
+ sub r1,r0
+ rts ! zero difference -> return +zero
+ mov.l @r15+,r1
+
+/* r2: 0xff000000 */
+LOCAL(add):
+ addc r1,r0
+ mov.w LOCAL(x2ff),r7
+ shll8 r6
+ bf/s LOCAL(no_carry)
+ shll16 r6
+ tst r7,r0
+ shlr8 r0
+ mov.l @r15+,r3 ! discard saved sign
+ subc r2,r0
+ sett
+ addc r6,r0
+ cmp/hs r2,r0
+ bt/s LOCAL(inf)
+ div0s r7,r4 /* Copy sign. */
+ rts
+ rotcr r0
+LOCAL(inf):
+ mov r6,r0
+ rts
+ rotcr r0
+LOCAL(no_carry):
+ mov.w LOCAL(m1),r3
+ shll r0
+ bf LOCAL(denorm_add)
+ tst r7,r0
+ shlr8 r0
+ mov.l @r15+,r1 ! discard saved sign
+ subc r3,r0 ! round ; overflow -> exp++
+ cmp/ge r4,r3 /* Copy sign. */
+ add r6,r0 ! overflow -> inf
+ rts
+ rotcr r0
+
+LOCAL(denorm_add):
+ shlr r0
+ cmp/ge r4,r3 /* Copy sign. */
+ shlr8 r0
+ mov.l @r15+,r1 ! discard saved sign
+ rts
+ rotcr r0
+
+LOCAL(inf_nan_arg0):
+ cmp/eq r5,r1
+ bf LOCAL(ret_r4)
+ div0s r4,r5 /* Both are inf or NaN, check signs. */
+ bt LOCAL(ret_nan) /* inf - inf, or NaN. */
+ mov r4,r0 ! same sign; return NaN if either is NaN.
+ rts
+ or r5,r0
+LOCAL(ret_nan):
+ rts
+ mov #-1,r0
+
+LOCAL(d24):
+ .word 24
+LOCAL(x2ff):
+ .word 0x2ff
+LOCAL(m1):
+ .word -1
+ .balign 4
+LOCAL(x7f800000):
+ .long 0x7f800000
+LOCAL(xbfffffff):
+ .long 0xbfffffff
+LOCAL(xff000000):
+ .long 0xff000000
+LOCAL(xfe000000):
+ .long 0xfe000000
+LOCAL(c__clz_tab):
+#ifdef __pic__
+ .long GLOBAL(clz_tab) - .
+#else
+ .long GLOBAL(clz_tab)
+#endif
+
+ ENDFUNC(GLOBAL(addsf3_))
+ ENDFUNC(GLOBAL(subsf3_))
+#endif /* L_add_sub_sf3 */
+
+#ifdef L_mulsf3
+ .global GLOBAL(mulsf3_)
+ FUNC(GLOBAL(mulsf3_))
+GLOBAL(mulsf3_):
+ mov.l LOCAL(x7f800000),r1
+ not r4,r2
+ mov r4,r3
+ not r5,r0
+ tst r1,r2
+ or r1,r3
+ bt/s LOCAL(inf_nan_arg0)
+ tst r1,r0
+ bt LOCAL(inf_nan_arg1)
+ tst r1,r5
+ mov r1,r2
+ shll8 r3
+ or r5,r1
+ bt/s LOCAL(zero_denorm_arg1)
+ shll8 r1
+ tst r2,r4
+ bt LOCAL(zero_denorm_arg0)
+ dmulu.l r3,r1
+ mov r4,r0
+ and r2,r0
+LOCAL(arg_norm):
+ and r5,r2
+ mov.l LOCAL(x3f800000),r3
+ sts mach,r1
+ sub r3,r0
+ sts macl,r3
+ add r2,r0
+ cmp/pz r1
+ mov.w LOCAL(x100),r2
+ bf/s LOCAL(norm_frac)
+ tst r3,r3
+ shll2 r1 /* Shift one up, replace leading 1 with 0. */
+ shlr r1
+ tst r3,r3
+LOCAL(norm_frac):
+ mov.w LOCAL(mx80),r3
+ bf LOCAL(round_frac)
+ tst r2,r1
+LOCAL(round_frac):
+ mov.l LOCAL(xff000000),r2
+ subc r3,r1 /* Even overflow gives right result: exp++, frac=0. */
+ shlr8 r1
+ add r1,r0
+ shll r0
+ bt LOCAL(ill_exp)
+ tst r2,r0
+ bt LOCAL(denorm0)
+ cmp/hs r2,r0
+ bt LOCAL(inf)
+LOCAL(insert_sign):
+ div0s r4,r5
+ rts
+ rotcr r0
+LOCAL(denorm0):
+ sub r2,r0
+ bra LOCAL(insert_sign)
+ shlr r0
+LOCAL(zero_denorm_arg1):
+ mov.l LOCAL(x60000000),r2 /* Check exp0 >= -64 */
+ add r1,r1
+ tst r1,r1 /* arg1 == 0 ? */
+ mov #0,r0
+ bt LOCAL(insert_sign) /* argument 1 is zero ==> return 0 */
+ tst r4,r2
+ bt LOCAL(insert_sign) /* exp0 < -64 ==> return 0 */
+ mov.l LOCAL(c__clz_tab),r0
+ mov r3,r2
+ mov r1,r3
+ bra LOCAL(arg_normalize)
+ mov r2,r1
+LOCAL(zero_denorm_arg0):
+ mov.l LOCAL(x60000000),r2 /* Check exp1 >= -64 */
+ add r3,r3
+ tst r3,r3 /* arg0 == 0 ? */
+ mov #0,r0
+ bt LOCAL(insert_sign) /* argument 0 is zero ==> return 0 */
+ tst r5,r2
+ bt LOCAL(insert_sign) /* exp1 < -64 ==> return 0 */
+ mov.l LOCAL(c__clz_tab),r0
+LOCAL(arg_normalize):
+ mov.l r7,@-r15
+ extu.w r3,r7
+ cmp/eq r3,r7
+ mov.l LOCAL(xff000000),r7
+ mov #-8,r2
+ bt 0f
+ tst r7,r3
+ mov #-16,r2
+ bt 0f
+ mov #-24,r2
+0:
+ mov r3,r7
+ shld r2,r7
+#ifdef __pic__
+ add r0,r7
+ mova LOCAL(c__clz_tab),r0
+#endif
+ mov.b @(r0,r7),r0
+ add #32,r2
+ mov r2,r7
+ mov #23,r2
+ sub r0,r7
+ mov.l LOCAL(x7f800000),r0
+ shld r7,r3
+ shld r2,r7
+ mov r0,r2
+ and r4,r0
+ sub r7,r0
+ mov.l @r15+,r7
+ bra LOCAL(arg_norm)
+ dmulu.l r3,r1
+#if 0 /* This is slightly slower, but could be used if table lookup causes
+ cache thrashing. */
+ bt LOCAL(insert_sign) /* exp1 < -64 ==> return 0 */
+ mov.l LOCAL(xff000000),r2
+ mov r4,r0
+LOCAL(arg_normalize):
+ tst r2,r3
+ bf LOCAL(arg_bit_norm)
+LOCAL(arg_byte_loop):
+ tst r2,r3
+ add r2,r0
+ shll8 r3
+ bt LOCAL(arg_byte_loop)
+ add r4,r0
+LOCAL(arg_bit_norm):
+ mov.l LOCAL(x7f800000),r2
+ rotl r3
+LOCAL(arg_bit_loop):
+ add r2,r0
+ bf/s LOCAL(arg_bit_loop)
+ rotl r3
+ rotr r3
+ rotr r3
+ sub r2,r0
+ bra LOCAL(arg_norm)
+ dmulu.l r3,r1
+#endif /* 0 */
+LOCAL(inf):
+ bra LOCAL(insert_sign)
+ mov r2,r0
+LOCAL(inf_nan_arg0):
+ bt LOCAL(inf_nan_both)
+ add r0,r0
+ cmp/eq #-1,r0 /* arg1 zero? -> NAN */
+ bt LOCAL(insert_sign)
+ mov r4,r0
+LOCAL(inf_insert_sign):
+ bra LOCAL(insert_sign)
+ add r0,r0
+LOCAL(inf_nan_both):
+ mov r4,r0
+ bra LOCAL(inf_insert_sign)
+ or r5,r0
+LOCAL(inf_nan_arg1):
+ mov r2,r0
+ add r0,r0
+ cmp/eq #-1,r0 /* arg0 zero? */
+ bt LOCAL(insert_sign)
+ bra LOCAL(inf_insert_sign)
+ mov r5,r0
+LOCAL(ill_exp):
+ cmp/pz r0
+ mov #-24,r3
+ bt LOCAL(inf)
+ add r1,r1
+ mov r0,r2
+ sub r1,r2 ! remove fraction to get back pre-rounding exponent.
+ sts mach,r0
+ sts macl,r1
+ shad r3,r2
+ mov r0,r3
+ shld r2,r0
+ add #32,r2
+ cmp/pz r2
+ shld r2,r3
+ bf LOCAL(zero)
+ or r1,r3
+ mov #-1,r1
+ tst r3,r3
+ mov.w LOCAL(x100),r3
+ bf/s LOCAL(denorm_round_up)
+ mov #-0x80,r1
+ tst r3,r0
+LOCAL(denorm_round_up):
+ mov #-7,r3
+ subc r1,r0
+ bra LOCAL(insert_sign)
+ shld r3,r0
+LOCAL(zero):
+ bra LOCAL(insert_sign)
+ mov #0,r0
+LOCAL(x100):
+ .word 0x100
+LOCAL(mx80):
+ .word -0x80
+ .balign 4
+LOCAL(x7f800000):
+ .long 0x7f800000
+LOCAL(x3f800000):
+ .long 0x3f800000
+LOCAL(xff000000):
+ .long 0xff000000
+LOCAL(x60000000):
+ .long 0x60000000
+LOCAL(c__clz_tab):
+#ifdef __pic__
+ .long GLOBAL(clz_tab) - .
+#else
+ .long GLOBAL(clz_tab)
+#endif
+ ENDFUNC(GLOBAL(mulsf3_))
+#endif /* L_mulsf3 */
+
+#ifdef L_hypotf
+ .global GLOBAL(hypotf)
+ FUNC(GLOBAL(hypotf))
+GLOBAL(hypotf):
+/* This integer implementation takes 71 to 72 cycles in the main path.
+ This is a bit slower than the SH4 can do this computation using double
+ precision hardware floating point - 57 cycles, or 69 with mode switches. */
+ /* First, calculate x (r4) as the sum of the square of the fractions -
+ the exponent is calculated separately in r3.
+ Then, alculate sqrt(x) for the fraction by reciproot iteration.
+ We get an 7.5 bit inital value using linear approximation with two slopes
+ that are powers of two.
+ x (- [1. .. 2.) y0 := 1.25 - x/4 - tab(x) y (- (0.8 .. 1.0)
+ x (- [2. .. 4.) y0 := 1. - x/8 - tab(x) y (- (0.5 .. 0.8)
+ x is represented with two bits before the point,
+ y with 0 bits before the binary point.
+ Thus, to calculate y0 := 1. - x/8 - tab(x), all you have to do is to shift x
+ right by 1, negate it, and subtract tab(x). */
+
+ /* y1 := 1.5*y0 - 0.5 * (x * y0) * (y0 * y0)
+ z0 := x * y1
+ z1 := z0 + 0.5 * (y1 - (y1*y1) * z0) */
+
+ mov.l LOCAL(xff000000),r1
+ add r4,r4
+ mov r4,r0
+ add r5,r5
+ cmp/hs r5,r4
+ sub r5,r0
+ mov #-24,r2
+ bf/s LOCAL(r5_large)
+ shad r2,r0
+ mov r4,r3
+ shll8 r4
+ rotcr r4
+ tst #0xe0,r0
+ neg r0,r0
+ bt LOCAL(ret_abs_r3)
+ tst r1,r5
+ shll8 r5
+ bt/s LOCAL(denorm_r5)
+ cmp/hi r3,r1
+ dmulu.l r4,r4
+ bf LOCAL(inf_nan)
+ rotcr r5
+ shld r0,r5
+LOCAL(denorm_r5_done):
+ sts mach,r4
+ dmulu.l r5,r5
+ mov.l r6,@-r15
+ mov #20,r6
+
+ sts mach,r5
+LOCAL(add_frac):
+ mova LOCAL(tab)-32,r0
+ mov.l r7,@-r15
+ mov.w LOCAL(x1380),r7
+ and r1,r3
+ addc r5,r4
+ mov.w LOCAL(m25),r2 ! -25
+ bf LOCAL(frac_ok)
+ sub r1,r3
+ rotcr r4
+ cmp/eq r1,r3 ! did we generate infinity ?
+ bt LOCAL(inf_nan)
+ shlr r4
+ mov r4,r1
+ shld r2,r1
+ mov.b @(r0,r1),r0
+ mov r4,r1
+ shld r6,r1
+ bra LOCAL(frac_low2)
+ sub r1,r7
+
+LOCAL(frac_ok):
+ mov r4,r1
+ shld r2,r1
+ mov.b @(r0,r1),r1
+ cmp/pz r4
+ mov r4,r0
+ bt/s LOCAL(frac_low)
+ shld r6,r0
+ mov.w LOCAL(xf80),r7
+ shlr r0
+LOCAL(frac_low):
+ sub r0,r7
+LOCAL(frac_low2):
+ mov.l LOCAL(x40000080),r0 ! avoid denorm results near 1. << r3
+ sub r1,r7 ! {0.12}
+ mov.l LOCAL(xfffe0000),r5 ! avoid rounding overflow near 4. << r3
+ swap.w r7,r1 ! {0.28}
+ dmulu.l r1,r4 /* two issue cycles */
+ mulu.w r7,r7 /* two issue cycles */
+ sts mach,r2 ! {0.26}
+ mov r1,r7
+ shlr r1
+ sts macl,r6 ! {0.24}
+ cmp/hi r0,r4
+ shlr2 r2
+ bf LOCAL(near_one)
+ shlr r2 ! {0.23} systemic error of linear approximation keeps y1 < 1
+ dmulu.l r2,r6
+ cmp/hs r5,r4
+ add r7,r1 ! {1.28}
+ bt LOCAL(near_four)
+ shlr2 r1 ! {1.26}
+ sts mach,r0 ! {0.15} x*y0^3 == {0.16} 0.5*x*y0^3
+ shlr2 r1 ! {1.24}
+ shlr8 r1 ! {1.16}
+ sett ! compensate for truncation of subtrahend, keep y1 < 1
+ subc r0,r1 ! {0.16} y1; max error about 3.5 ulp
+ swap.w r1,r0
+ dmulu.l r0,r4 ! { 1.30 }
+ mulu.w r1,r1
+ sts mach,r2
+ shlr2 r0
+ sts macl,r1
+ add r2,r0
+ mov.l LOCAL(xff000000),r6
+ add r2,r0
+ dmulu.l r1,r2
+ add #127,r0
+ add r6,r3 ! precompensation for adding leading 1
+ sts mach,r1
+ shlr r3
+ mov.l @r15+,r7
+ sub r1,r0 ! {0.31} max error about 50 ulp (+127)
+ mov.l @r15+,r6
+ shlr8 r0 ! {0.23} max error about 0.7 ulp
+ rts
+ add r3,r0
+
+LOCAL(r5_large):
+ mov r5,r3
+ mov #-31,r2
+ cmp/ge r2,r0
+ shll8 r5
+ bf LOCAL(ret_abs_r3)
+ rotcr r5
+ tst r1,r4
+ shll8 r4
+ bt/s LOCAL(denorm_r4)
+ cmp/hi r3,r1
+ dmulu.l r5,r5
+ bf LOCAL(inf_nan)
+ rotcr r4
+LOCAL(denorm_r4_done):
+ shld r0,r4
+ sts mach,r5
+ dmulu.l r4,r4
+ mov.l r6,@-r15
+ mov #20,r6
+ bra LOCAL(add_frac)
+ sts mach,r4
+
+LOCAL(near_one):
+ bra LOCAL(assemble_sqrt)
+ mov #0,r0
+LOCAL(near_four):
+ ! exact round-to-nearest would add 255. We add 256 for speed & compactness.
+ mov r4,r0
+ shlr8 r0
+ add #1,r0
+ tst r0,r0
+ addc r0,r3 ! might generate infinity.
+LOCAL(assemble_sqrt):
+ mov.l @r15+,r7
+ shlr r3
+ mov.l @r15+,r6
+ rts
+ add r3,r0
+LOCAL(inf_nan):
+LOCAL(ret_abs_r3):
+ mov r3,r0
+ rts
+ shlr r0
+LOCAL(denorm_r5):
+ bf LOCAL(inf_nan)
+ tst r1,r4
+ bt LOCAL(denorm_both)
+ dmulu.l r4,r4
+ bra LOCAL(denorm_r5_done)
+ shld r0,r5
+LOCAL(denorm_r4):
+ bf LOCAL(inf_nan)
+ tst r1,r5
+ dmulu.l r5,r5
+ bf LOCAL(denorm_r4_done)
+LOCAL(denorm_both): ! normalize according to r3.
+ extu.w r3,r2
+ mov.l LOCAL(c__clz_tab),r0
+ cmp/eq r3,r2
+ mov #-8,r2
+ bt 0f
+ tst r1,r3
+ mov #-16,r2
+ bt 0f
+ mov #-24,r2
+0:
+ shld r2,r3
+ mov.l r7,@-r15
+#ifdef __pic__
+ add r0,r3
+ mova LOCAL(c__clz_tab),r0
+#endif
+ mov.b @(r0,r3),r0
+ add #32,r2
+ sub r0,r2
+ shld r2,r4
+ mov r2,r7
+ dmulu.l r4,r4
+ sts.l pr,@-r15
+ mov #1,r3
+ bsr LOCAL(denorm_r5_done)
+ shld r2,r5
+ mov.l LOCAL(x01000000),r1
+ neg r7,r2
+ lds.l @r15+,pr
+ tst r1,r0
+ mov.l @r15+,r7
+ bt 0f
+ add #1,r2
+ sub r1,r0
+0:
+ rts
+ shld r2,r0
+
+LOCAL(m25):
+ .word -25
+LOCAL(x1380):
+ .word 0x1380
+LOCAL(xf80):
+ .word 0xf80
+ .balign 4
+LOCAL(xff000000):
+ .long 0xff000000
+LOCAL(x40000080):
+ .long 0x40000080
+LOCAL(xfffe0000):
+ .long 0xfffe0000
+LOCAL(x01000000):
+ .long 0x01000000
+LOCAL(c__clz_tab):
+#ifdef __pic__
+ .long GLOBAL(clz_tab) - .
+#else
+ .long GLOBAL(clz_tab)
+#endif
+
+/*
+double err(double x)
+{
+ return (x < 2. ? 1.25 - x/4. : 1. - x/8.) - 1./sqrt(x);
+}
+
+int
+main ()
+{
+ int i = 0;
+ double x, s, v;
+ double lx, hx;
+
+ s = 1./32.;
+ for (x = 1.; x < 4; x += s, i++)
+ {
+ lx = x;
+ hx = x + s - 1. / (1 << 30);
+ v = 0.5 * (err (lx) + err (hx));
+ printf ("%s% 4d%c",
+ (i & 7) == 0 ? "\t.byte\t" : "",
+ (int)(v * 4096 + 0.5) - 128,
+ (i & 7) == 7 ? '\n' : ',');
+ }
+ return 0;
+} */
+
+ .balign 4
+LOCAL(tab):
+ .byte -113, -84, -57, -33, -11, 8, 26, 41
+ .byte 55, 67, 78, 87, 94, 101, 106, 110
+ .byte 113, 115, 115, 115, 114, 112, 109, 106
+ .byte 101, 96, 91, 84, 77, 69, 61, 52
+ .byte 51, 57, 63, 68, 72, 77, 80, 84
+ .byte 87, 89, 91, 93, 95, 96, 97, 97
+ .byte 97, 97, 97, 96, 95, 94, 93, 91
+ .byte 89, 87, 84, 82, 79, 76, 72, 69
+ .byte 65, 61, 57, 53, 49, 44, 39, 34
+ .byte 29, 24, 19, 13, 8, 2, -4, -10
+ .byte -17, -23, -29, -36, -43, -50, -57, -64
+ .byte -71, -78, -85, -93,-101,-108,-116,-124
+ ENDFUNC(GLOBAL(hypotf))
+#endif /* L_hypotf */
+
+#ifdef L_add_sub_df3
+#include "IEEE-754/adddf3.S"
+#endif /* _add_sub_df3 _muldf3 */
+
+#ifdef L_muldf3
+#include "IEEE-754/muldf3.S"
+#endif /* L_muldf3 */
+#endif /* DYN_SHIFT */
Index: sh-modes.def
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/sh-modes.def,v
retrieving revision 1.1
diff -p -u -r1.1 sh-modes.def
--- sh-modes.def 13 Oct 2003 21:16:32 -0000 1.1
+++ sh-modes.def 30 Sep 2004 18:59:32 -0000
@@ -1,5 +1,5 @@
-/* Alpha extra machine modes.
- Copyright (C) 2003 Free Software Foundation, Inc.
+/* SH extra machine modes.
+ Copyright (C) 2004 Free Software Foundation, Inc.
This file is part of GCC.
@@ -21,3 +21,7 @@ Boston, MA 02111-1307, USA. */
/* The SH uses a partial integer mode to represent the FPSCR register. */
PARTIAL_INT_MODE (SI);
+/* For software floating point comparisons. */
+CC_MODE (CC_FP_NE);
+CC_MODE (CC_FP_GT);
+CC_MODE (CC_FP_UNLT);
Index: sh-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/sh-protos.h,v
retrieving revision 1.55
diff -p -u -r1.55 sh-protos.h
--- sh-protos.h 10 May 2004 23:25:13 -0000 1.55
+++ sh-protos.h 30 Sep 2004 18:59:32 -0000
@@ -93,6 +93,10 @@ extern void expand_sf_binop (rtx (*)(rtx
extern void expand_df_unop (rtx (*)(rtx, rtx, rtx), rtx *);
extern void expand_df_binop (rtx (*)(rtx, rtx, rtx, rtx), rtx *);
extern void expand_fp_branch (rtx (*)(void), rtx (*)(void));
+extern void expand_sfunc_unop (enum machine_mode, rtx (*) (rtx, rtx),
+ const char *, enum rtx_code code, rtx *);
+extern void expand_sfunc_binop (enum machine_mode, rtx (*) (rtx, rtx),
+ const char *, enum rtx_code code, rtx *);
extern int sh_insn_length_adjustment (rtx);
extern int sh_can_redirect_branch (rtx, rtx);
extern void sh_expand_unop_v2sf (enum rtx_code, rtx, rtx);
Index: sh.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/sh.c,v
retrieving revision 1.270.2.3
diff -p -u -r1.270.2.3 sh.c
--- sh.c 29 Jun 2004 17:33:57 -0000 1.270.2.3
+++ sh.c 30 Sep 2004 18:59:37 -0000
@@ -1061,6 +1061,68 @@ prepare_scc_operands (enum rtx_code code
return t_reg;
}
+static rtx
+sh_soft_fp_cmp (int code, enum machine_mode op_mode)
+{
+ const char *name;
+ rtx (*fun) (rtx, rtx), addr, tmp, first, last, equiv;
+ int df = op_mode == DFmode;
+ enum machine_mode mode;
+
+ if (flag_finite_math_only && ! df)
+ switch (code)
+ {
+ case EQ:
+ return gen_cmpeqsf_i1_finite (sh_compare_op0, sh_compare_op1);
+ case LE:
+ case UNLE:
+ return gen_cmplesf_i1_finite (sh_compare_op0, sh_compare_op1);
+ case GE:
+ case UNGE:
+ return gen_cmplesf_i1_finite (sh_compare_op1, sh_compare_op0);
+ default:
+ break;
+ }
+ if (flag_finite_math_only && df && code == EQ)
+ return gen_cmpeqdf_i1_finite (sh_compare_op0, sh_compare_op1);
+
+ switch (code)
+ {
+ case EQ:
+ name = df ? "__nedf2_" : "__nesf2_";
+ fun = df ? gen_cmpnedf_i1 : gen_cmpnesf_i1;
+ mode = CC_FP_NEmode;
+ break;
+ case UNLE:
+ name = df ? "__gtdf2t" : "__gtsf2t";
+ fun = df ? gen_cmpgtdf_i1 : gen_cmpgtsf_i1;
+ mode = CC_FP_GTmode;
+ break;
+ case GE:
+ name = df ? "__gedf2f" : "__gesf2f";
+ fun = df ? gen_cmpunltdf_i1 : gen_cmpunltsf_i1;
+ mode = CC_FP_UNLTmode;
+ break;
+ default: abort ();
+ }
+ tmp = gen_reg_rtx (mode);
+ addr = force_reg (Pmode, function_symbol (name));
+ first = emit_move_insn (gen_rtx_REG (op_mode, R4_REG), sh_compare_op0);
+ emit_move_insn (gen_rtx_REG (op_mode, R5_REG + df), sh_compare_op1);
+ last = emit_insn (fun (tmp, addr));
+ equiv = gen_rtx_fmt_ee (COMPARE, mode, sh_compare_op0, sh_compare_op1);
+ REG_NOTES (last) = gen_rtx_EXPR_LIST (REG_EQUAL, equiv, REG_NOTES (last));
+ /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
+ invariant code motion can move it. */
+ REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
+ REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
+ /* Use fpcmp_i1 rather than cmpeqsi_t, so that the optimizers can grok
+ the computation. */
+ return gen_rtx_SET (VOIDmode,
+ gen_rtx_REG (SImode, T_REG),
+ gen_rtx_fmt_ee (code, SImode, tmp, CONST0_RTX (mode)));
+}
+
/* Called from the md file, set up the operands of a compare instruction. */
void
@@ -1081,11 +1143,16 @@ from_compare (rtx *operands, int code)
|| (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
sh_compare_op1 = force_reg (mode, sh_compare_op1);
}
- if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_SH2E && code == GE
+ && (mode == SFmode || TARGET_SH4))
{
from_compare (operands, GT);
insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
}
+ else if (GET_MODE_CLASS (mode) == MODE_FLOAT
+ && ! TARGET_SH4 && TARGET_SH1
+ && (mode == DFmode || ! TARGET_SH2E))
+ insn = sh_soft_fp_cmp (code, mode);
else
insn = gen_rtx_SET (VOIDmode,
gen_rtx_REG (SImode, T_REG),
@@ -7582,7 +7649,7 @@ equality_comparison_operator (rtx op, en
int
greater_comparison_operator (rtx op, enum machine_mode mode)
{
- if (mode != VOIDmode && GET_MODE (op) == mode)
+ if (mode != VOIDmode && GET_MODE (op) != mode)
return 0;
switch (GET_CODE (op))
{
@@ -7599,7 +7666,7 @@ greater_comparison_operator (rtx op, enu
int
less_comparison_operator (rtx op, enum machine_mode mode)
{
- if (mode != VOIDmode && GET_MODE (op) == mode)
+ if (mode != VOIDmode && GET_MODE (op) != mode)
return 0;
switch (GET_CODE (op))
{
@@ -7613,6 +7680,37 @@ less_comparison_operator (rtx op, enum m
}
}
+int
+soft_fp_comparison_operator (rtx op, enum machine_mode mode)
+{
+ if (mode != VOIDmode && GET_MODE (op) != mode)
+ return 0;
+ switch (GET_CODE (op))
+ {
+ default:
+ return 0;
+ case EQ: mode = CC_FP_NEmode; break;
+ case UNLE: mode = CC_FP_GTmode; break;
+ case GE: mode = CC_FP_UNLTmode; break;
+ }
+ return register_operand (XEXP (op, 0), mode);
+}
+
+int
+soft_fp_comparison_operand (rtx op, enum machine_mode mode)
+{
+ switch (GET_MODE (op))
+ {
+ default:
+ return 0;
+ case CC_FP_NEmode: case CC_FP_GTmode: case CC_FP_UNLTmode:
+ break;
+ }
+ if (mode == SFmode && TARGET_SH2E)
+ return 0;
+ return register_operand (op, mode);
+}
+
/* Accept pseudos and branch target registers. */
int
target_reg_operand (rtx op, enum machine_mode mode)
@@ -7946,6 +8044,54 @@ expand_df_binop (rtx (*fun) (rtx, rtx, r
emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
get_fpscr_rtx ()));
}
+
+/* Expand an sfunc operation taking NARGS MODE arguments, using generator
+ function FUN, which needs symbol NAME loaded int a register first.
+ Add a REG_EQUAL note using EQUIV. */
+static void
+expand_sfunc_op (int nargs, enum machine_mode mode, rtx (*fun) (rtx, rtx),
+ const char *name, rtx equiv, rtx *operands)
+{
+ int next_reg = FIRST_PARM_REG, i;
+ rtx addr, first = NULL_RTX, last, insn;
+
+ addr = force_reg (Pmode, function_symbol (name));
+ for ( i = 1; i <= nargs; i++)
+ {
+ insn = emit_move_insn (gen_rtx_REG (mode, next_reg), operands[i]);
+ if (!first)
+ first = insn;
+ next_reg += GET_MODE_SIZE (mode) / UNITS_PER_WORD;
+ }
+ last = emit_insn ((*fun) (operands[0], addr));
+ REG_NOTES (last) = gen_rtx_EXPR_LIST (REG_EQUAL, equiv, REG_NOTES (last));
+ /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
+ invariant code motion can move it. */
+ REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
+ REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
+}
+
+/* Expand an sfunc unary operation taking an MODE argument, using generator
+ function FUN, which needs symbol NAME loaded int a register first.
+ Add a REG_EQUAL note using CODE. */
+void
+expand_sfunc_unop (enum machine_mode mode, rtx (*fun) (rtx, rtx),
+ const char *name, enum rtx_code code, rtx *operands)
+{
+ rtx equiv = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
+ expand_sfunc_op (1, mode, fun, name, equiv, operands);
+}
+
+/* Expand an sfunc binary operation in MODE, using generator function FUN,
+ which needs symbol NAME loaded int a register first.
+ Add a REG_EQUAL note using CODE. */
+void
+expand_sfunc_binop (enum machine_mode mode, rtx (*fun) (rtx, rtx),
+ const char *name, enum rtx_code code, rtx *operands)
+{
+ rtx equiv = gen_rtx_fmt_ee (code, mode, operands[1], operands[2]);
+ expand_sfunc_op (2, mode, fun, name, equiv, operands);
+}
/* ??? gcc does flow analysis strictly after common subexpression
elimination. As a result, common subexpression elimination fails
Index: sh.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/sh.h,v
retrieving revision 1.240.2.4
diff -p -u -r1.240.2.4 sh.h
--- sh.h 29 Jun 2004 17:33:57 -0000 1.240.2.4
+++ sh.h 30 Sep 2004 18:59:37 -0000
@@ -3295,6 +3295,8 @@ extern int rtx_equal_function_value_matt
{"noncommutative_float_operator", {MINUS, DIV}}, \
{"shmedia_6bit_operand", {SUBREG, REG, CONST_INT}}, \
{"sh_register_operand", {REG, SUBREG, CONST_INT}}, \
+ {"soft_fp_comparison_operand", {SUBREG, REG}}, \
+ {"soft_fp_comparison_operator", {EQ, UNLE, GE}}, \
{"target_reg_operand", {SUBREG, REG}}, \
{"target_operand", {SUBREG, REG, LABEL_REF, SYMBOL_REF, CONST, UNSPEC}},\
{"trunc_hi_operand", {SUBREG, REG, TRUNCATE}}, \
@@ -3308,6 +3310,7 @@ extern int rtx_equal_function_value_matt
#define SPECIAL_MODE_PREDICATES \
"any_register_operand", \
"int_gpr_dest", \
+ "soft_fp_comparison_operand" \
"trunc_hi_operand", \
/* This line intentionally left blank. */
Index: sh.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/sh.md,v
retrieving revision 1.172.2.1
diff -p -u -r1.172.2.1 sh.md
--- sh.md 8 Jun 2004 16:55:33 -0000 1.172.2.1
+++ sh.md 30 Sep 2004 18:59:42 -0000
@@ -152,6 +152,16 @@
(UNSPECV_CONST8 6)
(UNSPECV_WINDOW_END 10)
(UNSPECV_CONST_END 11)
+
+ ;; NaN handling for software floating point:
+ ;; We require one bit specific for a precision to be set in all NaNs,
+ ;; so that we can test them with a not / tst sequence.
+ ;; ??? Ironically, this is the quiet bit for now, because that is the
+ ;; only bit set by __builtin_nan ("").
+ ;; ??? Should really use one bit lower and force it set by using
+ ;; a custom encoding function.
+ (SF_NAN_MASK 0x7fc00000)
+ (DF_NAN_MASK 0x7ff80000)
])
;; -------------------------------------------------------------------------
@@ -660,6 +670,14 @@
cmp/eq %1,%0"
[(set_attr "type" "mt_group")])
+(define_insn "fpcmp_i1"
+ [(set (reg:SI T_REG)
+ (match_operator:SI 1 "soft_fp_comparison_operator"
+ [(match_operand 0 "soft_fp_comparison_operand" "r") (const_int 0)]))]
+ "TARGET_SH1 && !TARGET_SH4"
+ "tst %0,%0"
+ [(set_attr "type" "mt_group")])
+
(define_insn "cmpgtsi_t"
[(set (reg:SI T_REG)
(gt:SI (match_operand:SI 0 "arith_reg_operand" "r,r")
@@ -5272,6 +5290,14 @@
DONE;
}
+ if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT
+ && TARGET_SH1 && !TARGET_SH4
+ && (!TARGET_SH2E || GET_MODE (sh_compare_op0) == DFmode))
+ {
+ from_compare (operands, UNLE);
+ emit_jump_insn (gen_branch_false (operands[0]));
+ DONE;
+ }
from_compare (operands, GT);
}")
@@ -5308,10 +5334,15 @@
rtx tmp = sh_compare_op0;
sh_compare_op0 = sh_compare_op1;
sh_compare_op1 = tmp;
- emit_insn (gen_bgt (operands[0]));
- DONE;
+ if (TARGET_SH4 || (TARGET_SH2E && GET_MODE (sh_compare_op0) == SFmode))
+ {
+ emit_insn (gen_bgt (operands[0]));
+ DONE;
+ }
+ from_compare (operands, UNLE);
}
- from_compare (operands, GE);
+ else
+ from_compare (operands, GE);
}")
(define_expand "ble"
@@ -5342,9 +5373,9 @@
DONE;
}
- if (TARGET_SH2E
- && TARGET_IEEE
- && GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
+ if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT
+ && (!TARGET_SH2E || TARGET_IEEE
+ || (!TARGET_SH4 && GET_MODE (sh_compare_op0) == DFmode)))
{
rtx tmp = sh_compare_op0;
sh_compare_op0 = sh_compare_op1;
@@ -5383,9 +5414,9 @@
DONE;
}
- if (TARGET_SH2E
- && ! TARGET_IEEE
- && GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
+ if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT
+ && TARGET_SH2E && !TARGET_IEEE
+ && (TARGET_SH4 || GET_MODE (sh_compare_op0) == SFmode))
{
rtx tmp = sh_compare_op0;
sh_compare_op0 = sh_compare_op1;
@@ -5484,19 +5515,82 @@
from_compare (operands, GTU);
}")
+;; ??? Can't use DFmode bcc patterns for SH2E whwn there is no SFmode
+;; equivalent: the insn predicate has to be evaluable at compiler startup,
+;; and FAIL in bcc patterns causes crashes.
(define_expand "bunordered"
[(set (match_dup 1) (unordered:DI (match_dup 2) (match_dup 3)))
(set (pc)
(if_then_else (ne (match_dup 1) (const_int 0))
(label_ref:DI (match_operand 0 "" ""))
(pc)))]
- "TARGET_SHMEDIA"
+ "(TARGET_SH1 && !TARGET_SH2E) || TARGET_SHMEDIA"
"
{
- operands[1] = gen_reg_rtx (DImode);
operands[2] = force_reg (GET_MODE (sh_compare_op0), sh_compare_op0);
operands[3] = force_reg (GET_MODE (sh_compare_op1), sh_compare_op1);
+ if (TARGET_SH1)
+ {
+ HOST_WIDE_INT mask;
+ switch (GET_MODE (operands[2]))
+ {
+ case SFmode:
+ mask = SF_NAN_MASK;
+ break;
+ case DFmode:
+ mask = DF_NAN_MASK;
+ break;
+ default:
+ FAIL;
+ }
+ emit_insn (gen_cmpunsf_i1 (operands[2], operands[3],
+ force_reg (SImode, GEN_INT (mask))));
+ emit_jump_insn (gen_branch_true (operands[0]));
+ DONE;
+ }
+ operands[1] = gen_reg_rtx (DImode);
}")
+
+(define_expand "bunle"
+ [(set (pc)
+ (if_then_else (ne (reg:SI T_REG) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ "(TARGET_SH1 && !TARGET_SH2E) || TARGET_SHMEDIA_FPU"
+ "
+{
+ if (TARGET_SHMEDIA_FPU)
+ {
+ rtx tmp = gen_reg_rtx (DImode);
+
+ emit_insn (gen_sgt (tmp));
+ emit_jump_insn (gen_beq_media (operands[0], tmp, const0_rtx));
+ DONE;
+ }
+
+ from_compare (operands, UNLE);
+}")
+
+(define_expand "bunlt"
+ [(set (pc)
+ (if_then_else (eq (reg:SI T_REG) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ "(TARGET_SH1 && !TARGET_SH2E) || TARGET_SHMEDIA_FPU"
+ "
+{
+ if (TARGET_SHMEDIA_FPU)
+ {
+ rtx tmp = gen_reg_rtx (DImode);
+
+ emit_insn (gen_sge (tmp));
+ emit_jump_insn (gen_beq_media (operands[0], tmp, const0_rtx));
+ DONE;
+ }
+
+ from_compare (operands, GE);
+}")
+
;; ------------------------------------------------------------------------
;; Jump and linkage insns
@@ -7495,6 +7589,13 @@ mov.l\\t1f,r0\\n\\
DONE;
if (! rtx_equal_function_value_matters)
FAIL;
+ if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT
+ && !TARGET_SH4 && (!TARGET_SH2E || GET_MODE (sh_compare_op0) == DFmode))
+ {
+ from_compare (operands, EQ);
+ emit_insn (gen_movt (operands[0]));
+ DONE;
+ }
operands[1] = prepare_scc_operands (EQ);
}")
@@ -7543,6 +7644,9 @@ mov.l\\t1f,r0\\n\\
}
if (! rtx_equal_function_value_matters)
FAIL;
+ if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT && !TARGET_SH4
+ && (!TARGET_SH2E || GET_MODE (sh_compare_op0) == DFmode))
+ FAIL;
operands[1] = prepare_scc_operands (LT);
}")
@@ -7647,6 +7751,9 @@ mov.l\\t1f,r0\\n\\
}
if (! rtx_equal_function_value_matters)
FAIL;
+ if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT && !TARGET_SH4
+ && (!TARGET_SH2E || GET_MODE (sh_compare_op0) == DFmode))
+ FAIL;
operands[1] = prepare_scc_operands (GT);
}")
@@ -7703,7 +7810,13 @@ mov.l\\t1f,r0\\n\\
FAIL;
if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
{
- if (TARGET_IEEE)
+ if (!TARGET_SH4
+ && (!TARGET_SH2E || GET_MODE (sh_compare_op0) == DFmode))
+ {
+ from_compare (operands, GE);
+ emit_insn (gen_movt (operands[0]));
+ }
+ else if (TARGET_IEEE)
{
rtx lab = gen_label_rtx ();
prepare_scc_operands (EQ);
@@ -7834,6 +7947,21 @@ mov.l\\t1f,r0\\n\\
operands[1] = prepare_scc_operands (GEU);
}")
+(define_expand "sunle"
+ [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (match_dup 1))]
+ "TARGET_SH1 && !TARGET_SH4"
+ "
+{
+ if (TARGET_SH2E && GET_MODE (sh_compare_op0) == SFmode)
+ FAIL;
+ if (! rtx_equal_function_value_matters)
+ FAIL;
+ from_compare (operands, UNLE);
+ emit_insn (gen_movt (operands[0]));
+ DONE;
+}")
+
;; sne moves the complement of the T reg to DEST like this:
;; cmp/eq ...
;; mov #-1,temp
@@ -7882,7 +8010,15 @@ mov.l\\t1f,r0\\n\\
DONE;
if (! rtx_equal_function_value_matters)
FAIL;
- operands[1] = prepare_scc_operands (EQ);
+ if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT
+ && !TARGET_SH4
+ && (!TARGET_SH2E || GET_MODE (sh_compare_op0) == DFmode))
+ {
+ from_compare (operands, EQ);
+ operands[1] = gen_rtx_REG (SImode, T_REG);
+ }
+ else
+ operands[1] = prepare_scc_operands (EQ);
operands[2] = gen_reg_rtx (SImode);
}")
@@ -8257,7 +8393,7 @@ mov.l\\t1f,r0\\n\\
[(set (match_operand:SF 0 "arith_reg_operand" "")
(plus:SF (match_operand:SF 1 "arith_reg_operand" "")
(match_operand:SF 2 "arith_reg_operand" "")))]
- "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+ "TARGET_SH2E || TARGET_SH3 || TARGET_SHMEDIA_FPU"
"
{
if (TARGET_SH2E)
@@ -8265,6 +8401,12 @@ mov.l\\t1f,r0\\n\\
expand_sf_binop (&gen_addsf3_i, operands);
DONE;
}
+ else if (TARGET_SH3)
+ {
+ expand_sfunc_binop (SFmode, &gen_addsf3_i3, \"__addsf3_\", PLUS,
+ operands);
+ DONE;
+ }
}")
(define_insn "*addsf3_media"
@@ -8341,6 +8483,22 @@ mov.l\\t1f,r0\\n\\
}"
[(set_attr "type" "fparith_media")])
+(define_insn "addsf3_i3"
+ [(set (match_operand:SF 0 "arith_reg_operand" "=z")
+ (plus:SF (reg:SF R4_REG) (reg:SF R5_REG)))
+ (clobber (reg:SI T_REG))
+ (clobber (reg:SI PR_REG))
+ (clobber (reg:SI R1_REG))
+ (clobber (reg:SI R2_REG))
+ (clobber (reg:SI R3_REG))
+ (clobber (reg:SI R6_REG))
+ (clobber (reg:SI R7_REG))
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ "TARGET_SH3 && ! TARGET_SH2E"
+ "jsr @%1%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
(define_insn "addsf3_i"
[(set (match_operand:SF 0 "arith_reg_operand" "=f")
(plus:SF (match_operand:SF 1 "arith_reg_operand" "%0")
@@ -8355,7 +8513,7 @@ mov.l\\t1f,r0\\n\\
[(set (match_operand:SF 0 "fp_arith_reg_operand" "")
(minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "")
(match_operand:SF 2 "fp_arith_reg_operand" "")))]
- "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+ "TARGET_SH2E || TARGET_SH3 || TARGET_SHMEDIA_FPU"
"
{
if (TARGET_SH2E)
@@ -8363,6 +8521,12 @@ mov.l\\t1f,r0\\n\\
expand_sf_binop (&gen_subsf3_i, operands);
DONE;
}
+ else if (TARGET_SH3)
+ {
+ expand_sfunc_binop (SFmode, &gen_subsf3_i3, \"__subsf3_\", MINUS,
+ operands);
+ DONE;
+ }
}")
(define_insn "*subsf3_media"
@@ -8373,6 +8537,23 @@ mov.l\\t1f,r0\\n\\
"fsub.s %1, %2, %0"
[(set_attr "type" "fparith_media")])
+(define_insn "subsf3_i3"
+ [(set (match_operand:SF 0 "arith_reg_operand" "=z")
+ (minus:SF (reg:SF R4_REG) (reg:SF R5_REG)))
+ (clobber (reg:SI T_REG))
+ (clobber (reg:SI PR_REG))
+ (clobber (reg:SI R1_REG))
+ (clobber (reg:SI R2_REG))
+ (clobber (reg:SI R3_REG))
+ (clobber (reg:SI R5_REG))
+ (clobber (reg:SI R6_REG))
+ (clobber (reg:SI R7_REG))
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ "TARGET_SH3 && ! TARGET_SH2E"
+ "jsr @%1%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
(define_insn "subsf3_i"
[(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
(minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "0")
@@ -8392,13 +8573,19 @@ mov.l\\t1f,r0\\n\\
[(set (match_operand:SF 0 "fp_arith_reg_operand" "")
(mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "")
(match_operand:SF 2 "fp_arith_reg_operand" "")))]
- "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+ "TARGET_SH2E || TARGET_SH3 || TARGET_SHMEDIA_FPU"
"
{
if (TARGET_SH4)
expand_sf_binop (&gen_mulsf3_i4, operands);
else if (TARGET_SH2E)
emit_insn (gen_mulsf3_ie (operands[0], operands[1], operands[2]));
+ else if (TARGET_SH3)
+ {
+ expand_sfunc_binop (SFmode, &gen_mulsf3_i3, \"__mulsf3_\", MULT,
+ operands);
+ DONE;
+ }
if (! TARGET_SHMEDIA)
DONE;
}")
@@ -8429,6 +8616,22 @@ mov.l\\t1f,r0\\n\\
"fmul %2,%0"
[(set_attr "type" "fp")])
+(define_insn "mulsf3_i3"
+ [(set (match_operand:SF 0 "arith_reg_operand" "=z")
+ (mult:SF (reg:SF R4_REG) (reg:SF R5_REG)))
+ (clobber (reg:SI MACH_REG))
+ (clobber (reg:SI MACL_REG))
+ (clobber (reg:SI T_REG))
+ (clobber (reg:SI PR_REG))
+ (clobber (reg:SI R1_REG))
+ (clobber (reg:SI R2_REG))
+ (clobber (reg:SI R3_REG))
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ "TARGET_SH3 && ! TARGET_SH2E"
+ "jsr @%1%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
(define_insn "*mac_media"
[(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
(plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f")
@@ -8589,6 +8792,95 @@ mov.l\\t1f,r0\\n\\
"ftrc %1,%0"
[(set_attr "type" "fp")])
+(define_insn "cmpnesf_i1"
+ [(set (match_operand:CC_FP_NE 0 "register_operand" "=z")
+ (compare:CC_FP_NE (reg:SF R4_REG) (reg:SF R5_REG)))
+ (clobber (reg:SI T_REG))
+ (clobber (reg:SI PR_REG))
+ (clobber (reg:SI R1_REG))
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ "TARGET_SH1 && ! TARGET_SH2E"
+ "jsr @%1%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "cmpgtsf_i1"
+ [(set (match_operand:CC_FP_GT 0 "register_operand" "=z")
+ (compare:CC_FP_GT (reg:SF R4_REG) (reg:SF R5_REG)))
+ (clobber (reg:SI T_REG))
+ (clobber (reg:SI PR_REG))
+ (clobber (reg:SI R1_REG))
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ "TARGET_SH1 && ! TARGET_SH2E"
+ "jsr @%1%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "cmpunltsf_i1"
+ [(set (match_operand:CC_FP_UNLT 0 "register_operand" "=z")
+ (compare:CC_FP_UNLT (reg:SF R4_REG) (reg:SF R5_REG)))
+ (clobber (reg:SI T_REG))
+ (clobber (reg:SI PR_REG))
+ (clobber (reg:SI R1_REG))
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ "TARGET_SH1 && ! TARGET_SH2E"
+ "jsr @%1%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "cmpeqsf_i1_finite"
+ [(set (reg:SI T_REG)
+ (eq:SI (match_operand:SF 0 "arith_reg_operand" "r,r,r")
+ (match_operand:SF 1 "arith_reg_operand" "r,r,r")))
+ (clobber (match_scratch:SI 2 "=0,1,?r"))]
+ "TARGET_SH1 && ! TARGET_SH2E && flag_finite_math_only"
+ "*
+{
+ if (which_alternative == 0)
+ output_asm_insn (\"cmp/eq\t%0,%1\;or\t%1,%2\;bt\t0f\", operands);
+ else if (which_alternative == 1)
+ output_asm_insn (\"cmp/eq\t%0,%1\;or\t%0,%2\;bt\t0f\", operands);
+ else
+ output_asm_insn (\"cmp/eq\t%0,%1\;mov\t%0,%2\;bt\t0f\;or\t%1,%2\",
+ operands);
+ return \"add\t%2,%2\;tst\t%2,%2\\n0:\";
+}"
+ [(set_attr "length" "10,10,12")])
+
+(define_insn "cmplesf_i1_finite"
+ [(set (reg:SI T_REG)
+ (le:SI (match_operand:SF 0 "arith_reg_operand" "r,r,r")
+ (match_operand:SF 1 "arith_reg_operand" "r,r,r")))
+ (clobber (match_scratch:SI 2 "=0,1,r"))]
+ "TARGET_SH1 && ! TARGET_SH2E && flag_finite_math_only"
+ "*
+{
+ output_asm_insn (\"cmp/pz\t%0\", operands);
+ if (which_alternative == 2)
+ output_asm_insn (\"mov\t%0,%2\", operands);
+ if (TARGET_SH2)
+ output_asm_insn (\"bf/s\t0f\;cmp/hs\t%1,%0\;cmp/ge\t%0,%1\", operands);
+ else
+ output_asm_insn (\"bt\t1f\;bra\t0f\;cmp/hs\t%1,%0\\n1:\tcmp/ge\t%0,%1\",
+ operands);
+ if (which_alternative == 1)
+ output_asm_insn (\"or\t%0,%2\", operands);
+ else
+ output_asm_insn (\"or\t%1,%2\", operands);
+ return \"bt\t0f\;add\t%2,%2\;tst\t%2,%2\\n0:\";
+}"
+ [(set_attr "length" "18,18,20")])
+
+(define_insn "cmpunsf_i1"
+ [(set (reg:SI T_REG)
+ (unordered:SI (match_operand:SF 0 "arith_reg_operand" "r,r")
+ (match_operand:SF 1 "arith_reg_operand" "r,r")))
+ (use (match_operand:SI 2 "arith_reg_operand" "r,r"))
+ (clobber (match_scratch:SI 3 "=0,&r"))]
+ "TARGET_SH1 && ! TARGET_SH2E"
+ "not\t%0,%3\;tst\t%2,%3\;not\t%1,%3\;bt\t0f\;tst\t%2,%3\;0:"
+ [(set_attr "length" "10")])
+
(define_insn "cmpgtsf_t"
[(set (reg:SI T_REG)
(gt:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
@@ -8684,7 +8976,7 @@ mov.l\\t1f,r0\\n\\
[(set (reg:SI T_REG)
(compare (match_operand:SF 0 "arith_operand" "")
(match_operand:SF 1 "arith_operand" "")))]
- "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+ "TARGET_SH1 || TARGET_SHMEDIA_FPU"
"
{
sh_compare_op0 = operands[0];
@@ -8779,11 +9071,44 @@ mov.l\\t1f,r0\\n\\
[(set_attr "type" "fmove")
(set_attr "fp_mode" "single")])
+(define_expand "abssc2"
+ [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+ (abs:SF (match_operand:SC 1 "fp_arith_reg_operand" "")))]
+ "TARGET_SH3 && ! TARGET_SH2E"
+ "
+{
+ if (TARGET_SH3)
+ {
+ expand_sfunc_unop (SCmode, &gen_abssc2_i3, \"__hypotf\", ABS,
+ operands);
+ DONE;
+ }
+ FAIL;
+}")
+
+(define_insn "abssc2_i3"
+ [(set (match_operand:SF 0 "arith_reg_operand" "=z")
+ (abs:SF (reg:SC R4_REG)))
+ (clobber (reg:SI MACH_REG))
+ (clobber (reg:SI MACL_REG))
+ (clobber (reg:SI T_REG))
+ (clobber (reg:SI PR_REG))
+ (clobber (reg:SI R1_REG))
+ (clobber (reg:SI R2_REG))
+ (clobber (reg:SI R3_REG))
+ (clobber (reg:SI R4_REG))
+ (clobber (reg:SI R5_REG))
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ "TARGET_SH3 && ! TARGET_SH2E"
+ "jsr @%1%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
(define_expand "adddf3"
[(set (match_operand:DF 0 "fp_arith_reg_operand" "")
(plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
(match_operand:DF 2 "fp_arith_reg_operand" "")))]
- "TARGET_SH4 || TARGET_SHMEDIA_FPU"
+ "TARGET_SH4 || TARGET_SHMEDIA_FPU || TARGET_SH3"
"
{
if (TARGET_SH4)
@@ -8791,6 +9116,12 @@ mov.l\\t1f,r0\\n\\
expand_df_binop (&gen_adddf3_i, operands);
DONE;
}
+ else if (TARGET_SH3)
+ {
+ expand_sfunc_binop (DFmode, &gen_adddf3_i3_wrap, \"__adddf3_\", PLUS,
+ operands);
+ DONE;
+ }
}")
(define_insn "*adddf3_media"
@@ -8811,6 +9142,30 @@ mov.l\\t1f,r0\\n\\
[(set_attr "type" "dfp_arith")
(set_attr "fp_mode" "double")])
+(define_expand "adddf3_i3_wrap"
+ [(match_operand:DF 0 "" "") (match_operand:SI 1 "" "")]
+ "TARGET_SH3"
+ "
+{
+ emit_insn (gen_adddf3_i3 (operands[1]));
+ emit_move_insn (operands[0], gen_rtx_REG (DFmode, R0_REG));
+ DONE;
+}")
+
+(define_insn "adddf3_i3"
+ [(set (reg:DF R0_REG)
+ (plus:DF (reg:DF R4_REG) (reg:DF R6_REG)))
+ (clobber (reg:SI T_REG))
+ (clobber (reg:SI PR_REG))
+ (clobber (reg:DI R2_REG))
+ (clobber (reg:DF R4_REG))
+ (clobber (reg:DF R6_REG))
+ (use (match_operand:SI 0 "arith_reg_operand" "r"))]
+ "TARGET_SH3"
+ "jsr @%0%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
(define_expand "subdf3"
[(set (match_operand:DF 0 "fp_arith_reg_operand" "")
(minus:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
@@ -8847,7 +9202,7 @@ mov.l\\t1f,r0\\n\\
[(set (match_operand:DF 0 "fp_arith_reg_operand" "")
(mult:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
(match_operand:DF 2 "fp_arith_reg_operand" "")))]
- "TARGET_SH4 || TARGET_SHMEDIA_FPU"
+ "TARGET_SH4 || TARGET_SHMEDIA_FPU || TARGET_SH3"
"
{
if (TARGET_SH4)
@@ -8855,6 +9210,12 @@ mov.l\\t1f,r0\\n\\
expand_df_binop (&gen_muldf3_i, operands);
DONE;
}
+ else if (TARGET_SH3)
+ {
+ expand_sfunc_binop (DFmode, &gen_muldf3_i3_wrap, \"__muldf3_\", MULT,
+ operands);
+ DONE;
+ }
}")
(define_insn "*muldf3_media"
@@ -8875,6 +9236,32 @@ mov.l\\t1f,r0\\n\\
[(set_attr "type" "dfp_arith")
(set_attr "fp_mode" "double")])
+(define_expand "muldf3_i3_wrap"
+ [(match_operand:DF 0 "" "") (match_operand:SI 1 "" "")]
+ "TARGET_SH3"
+ "
+{
+ emit_insn (gen_muldf3_i3 (operands[1]));
+ emit_move_insn (operands[0], gen_rtx_REG (DFmode, R0_REG));
+ DONE;
+}")
+
+(define_insn "muldf3_i3"
+ [(set (reg:DF R0_REG)
+ (mult:DF (reg:DF R4_REG) (reg:DF R6_REG)))
+ (clobber (reg:SI MACH_REG))
+ (clobber (reg:SI MACL_REG))
+ (clobber (reg:SI T_REG))
+ (clobber (reg:SI PR_REG))
+ (clobber (reg:DI R2_REG))
+ (clobber (reg:DF R4_REG))
+ (clobber (reg:DF R6_REG))
+ (use (match_operand:SI 0 "arith_reg_operand" "r"))]
+ "TARGET_SH3"
+ "jsr @%0%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
(define_expand "divdf3"
[(set (match_operand:DF 0 "fp_arith_reg_operand" "")
(div:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
@@ -9004,6 +9391,61 @@ mov.l\\t1f,r0\\n\\
;; (use (match_dup 2))])
;; (set (match_dup 0) (reg:SI FPUL_REG))])
+(define_insn "cmpnedf_i1"
+ [(set (match_operand:CC_FP_NE 0 "register_operand" "=z")
+ (compare:CC_FP_NE (reg:DF R4_REG) (reg:DF R6_REG)))
+ (clobber (reg:SI T_REG))
+ (clobber (reg:SI PR_REG))
+ (clobber (reg:SI R1_REG))
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ "TARGET_SH1 && ! TARGET_SH2E"
+ "jsr @%1%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "cmpgtdf_i1"
+ [(set (match_operand:CC_FP_GT 0 "register_operand" "=z")
+ (compare:CC_FP_GT (reg:DF R4_REG) (reg:DF R6_REG)))
+ (clobber (reg:SI T_REG))
+ (clobber (reg:SI PR_REG))
+ (clobber (reg:SI R1_REG))
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ "TARGET_SH1 && ! TARGET_SH4"
+ "jsr @%1%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "cmpunltdf_i1"
+ [(set (match_operand:CC_FP_UNLT 0 "register_operand" "=z")
+ (compare:CC_FP_UNLT (reg:DF R4_REG) (reg:DF R6_REG)))
+ (clobber (reg:SI T_REG))
+ (clobber (reg:SI PR_REG))
+ (clobber (reg:SI R1_REG))
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ "TARGET_SH1 && ! TARGET_SH4"
+ "jsr @%1%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "cmpeqdf_i1_finite"
+ [(set (reg:SI T_REG)
+ (eq:SI (match_operand:DF 0 "arith_reg_operand" "r")
+ (match_operand:DF 1 "arith_reg_operand" "r")))
+ (clobber (match_scratch:SI 2 "=&r"))]
+ "TARGET_SH1 && ! TARGET_SH4 && flag_finite_math_only"
+ "cmp/eq\t%R0,%R1\;mov\t%S0,%2\;bf\t0f\;cmp/eq\t%S0,%S1\;bt\t0f\;or\t%S1,%2\;add\t%2,%2\;or\t%R0,%2\;tst\t%2,%2\\n0:"
+ [(set_attr "length" "18")])
+
+(define_insn "cmpundf_i1"
+ [(set (reg:SI T_REG)
+ (unordered:SI (match_operand:DF 0 "arith_reg_operand" "r,r")
+ (match_operand:DF 1 "arith_reg_operand" "r,r")))
+ (use (match_operand:SI 2 "arith_reg_operand" "r,r"))
+ (clobber (match_scratch:SI 3 "=0,&r"))]
+ "TARGET_SH1 && ! TARGET_SH2E"
+ "not\t%S0,%3\;tst\t%2,%3\;not\t%S1,%3\;bt\t0f\;tst\t%2,%3\;0:"
+ [(set_attr "length" "10")])
+
(define_insn "cmpgtdf_t"
[(set (reg:SI T_REG)
(gt:SI (match_operand:DF 0 "arith_reg_operand" "f")
@@ -9071,7 +9513,7 @@ mov.l\\t1f,r0\\n\\
[(set (reg:SI T_REG)
(compare (match_operand:DF 0 "arith_operand" "")
(match_operand:DF 1 "arith_operand" "")))]
- "TARGET_SH4 || TARGET_SHMEDIA_FPU"
+ "TARGET_SH1 || TARGET_SHMEDIA_FPU"
"
{
sh_compare_op0 = operands[0];
@@ -9169,7 +9611,7 @@ mov.l\\t1f,r0\\n\\
(define_expand "extendsfdf2"
[(set (match_operand:DF 0 "fp_arith_reg_operand" "")
(float_extend:DF (match_operand:SF 1 "fpul_operand" "")))]
- "TARGET_SH4 || TARGET_SHMEDIA_FPU"
+ "TARGET_SH1 || TARGET_SHMEDIA_FPU"
"
{
if (TARGET_SH4)
@@ -9178,6 +9620,12 @@ mov.l\\t1f,r0\\n\\
get_fpscr_rtx ()));
DONE;
}
+ else if (TARGET_SH1)
+ {
+ expand_sfunc_unop (SFmode, &gen_extendsfdf2_i1, \"__extendsfdf2_\",
+ FLOAT_EXTEND, operands);
+ DONE;
+ }
}")
(define_insn "*extendsfdf2_media"
@@ -9196,10 +9644,43 @@ mov.l\\t1f,r0\\n\\
[(set_attr "type" "fp")
(set_attr "fp_mode" "double")])
+;; ??? In order to use this efficiently, we'd have to have an extra
+;; register class for r0 and r1 - and that would cause repercussions in
+;; register allocation elsewhere. So just say we clobber r0 / r1, and
+;; that we can use an arbitrary target. */
+(define_insn_and_split "extendsfdf2_i1"
+ [(set (match_operand:DF 0 "arith_reg_operand" "=r")
+ (float_extend:DF (reg:SF R4_REG)))
+ (clobber (reg:SI T_REG))
+ (clobber (reg:SI PR_REG))
+ (clobber (reg:SI R0_REG))
+ (clobber (reg:SI R1_REG))
+ (clobber (reg:SI R2_REG))
+ (clobber (reg:SI R3_REG))
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ "TARGET_SH1 && !TARGET_SH4"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (reg:DF R0_REG))]
+ "emit_insn (gen_extendsfdf2_i1_r0 (operands[1]));"
+ [(set_attr "type" "sfunc")])
+
+(define_insn "extendsfdf2_i1_r0"
+ [(set (reg:DF R0_REG) (float_extend:DF (reg:SF R4_REG)))
+ (clobber (reg:SI T_REG))
+ (clobber (reg:SI PR_REG))
+ (clobber (reg:SI R2_REG))
+ (clobber (reg:SI R3_REG))
+ (use (match_operand:SI 0 "arith_reg_operand" "r"))]
+ "TARGET_SH1 && !TARGET_SH4"
+ "jsr @%0%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
(define_expand "truncdfsf2"
[(set (match_operand:SF 0 "fpul_operand" "")
(float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "")))]
- "TARGET_SH4 || TARGET_SHMEDIA_FPU"
+ "TARGET_SH1 || TARGET_SHMEDIA_FPU"
"
{
if (TARGET_SH4)
@@ -9208,6 +9689,12 @@ mov.l\\t1f,r0\\n\\
get_fpscr_rtx ()));
DONE;
}
+ else if (TARGET_SH1)
+ {
+ expand_sfunc_unop (DFmode, &gen_truncdfsf2_i1, \"__truncdfsf2_\",
+ FLOAT_TRUNCATE, operands);
+ DONE;
+ }
}")
(define_insn "*truncdfsf2_media"
@@ -9225,6 +9712,21 @@ mov.l\\t1f,r0\\n\\
"fcnvds %1,%0"
[(set_attr "type" "fp")
(set_attr "fp_mode" "double")])
+
+(define_insn "truncdfsf2_i1"
+ [(set (match_operand:SF 0 "arith_reg_operand" "=z")
+ (float_truncate:SF (reg:DF R4_REG)))
+ (clobber (reg:SI T_REG))
+ (clobber (reg:SI PR_REG))
+ (clobber (reg:SI R1_REG))
+ (clobber (reg:SI R2_REG))
+ (clobber (reg:SI R3_REG))
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ "TARGET_SH1 && ! TARGET_SH4"
+ "jsr @%1%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
;; Bit field extract patterns. These give better code for packed bitfields,
;; because they allow auto-increment addresses to be generated.
Index: t-sh
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/t-sh,v
retrieving revision 1.16.30.1
diff -p -u -r1.16.30.1 t-sh
--- t-sh 16 Jun 2004 19:58:35 -0000 1.16.30.1
+++ t-sh 30 Sep 2004 18:59:42 -0000
@@ -1,6 +1,8 @@
LIB1ASMSRC = sh/lib1funcs.asm
LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movstr \
_movstr_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
+ _nesf2 _nedf2 _gtsf2t _gtdf2t _gesf2f _gedf2f _extendsfdf2 _truncdfsf2 \
+ _add_sub_sf3 _mulsf3 _hypotf _muldf3 _add_sub_df3 \
$(LIB1ASMFUNCS_CACHE)
# We want fine grained libraries, so use the new code to build the