This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Re: RFC: Handling of libgcc symbols in SH shared libraries

From: Joern Rennecke <joern dot rennecke at superh dot com>
To: kumar107 at rediffmail dot com
Cc: kkojima at rr dot iij4u dot or dot jp, joern dot rennecke at superh dot com, gcc at gcc dot gnu dot org, aoliva at redhat dot com, amylaar at spamcop dot net
Date: Wed, 4 Aug 2004 13:11:23 +0100 (BST)
Subject: Re: RFC: Handling of libgcc symbols in SH shared libraries
> ess registers=0A> > (and are faster, obviously).=0A=0AWe, here at HCL, have=
>  implemented a hand-coded floating point library=0Afor single precision and=
>  double precision floating point arithmetic=0Afunctions for SH architecture=
> . The implementation confirms to IEEE-754=0Astandards and aims towards repl=
> acing the functions in [fd]p-bit.c.=0AIt could be seen as following of Pete=

Are you saying you have implemented different rounding modes and
all the flags (inexact, overflow, invalid, underflow...) ?
I have kept with mostly the current fp-bit functionality in terms of
features, however I have have made sure there is no double rounding for
denormals - IIRC fp-bit.c had some trouble with this.
Keeping withintg this feature set, and allowing for PIC code generation,
my priorities were to make the SH4 code fast, use fewer registers, and make
it compact, in that order.

> r's initiative for ARM. It will work for SH[1-4]. SH5 is not in scope.=0A=
> =0AWe are already through with the implemenation and tested with release=0A=
> version on paranoia. It is working fine. The only thing remaining is=0Abenc=
> hmarking. It would be on the list in a few days.=0A=0AWe don't want effort =
> to be duplicated. Please make sure we are not=0Aworking on same lines.=0A=
> =0AThanks and Best Regards,=0ARakesh Kumar=0A=0ARakesh

I have appended what I have so far.  I did some regression testing on most
of the code, but the hypotf implementation is completely untested yet.
Its near_one code path is also unnecessary because of the rounding,
so another five instructions and four bytes of data can be saved.
Its 'tab' table could also be shared with single and double precision sqrt
by using one of three techniques:
_ put the table in a separate module, and load the address pc-relative
  (static libraries only).
- amalgamate the related functions into a single module (that makes most
  sense in a dynamic library).
- Use separate sections and a linker script to group the related functions
  together, and the table at the end.

Note, we not only need functions that implement the arithmetic, but
also expanders in sh.md / sh.c to take advantage of lower register usage
and to enable some extra optimizations that work only on explicit
rtl operations, but not on ordinary function calls.

The way fp-bit does comparisons is also encumbered with backwards
compatibility baggage from the time that there was only one comparison
function, and no proper NaN handling.

Let's see your code too so that we can see how we can integrate our work,
and what pieces are still useful to add or enhance.

Index: lib1funcs.asm
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/lib1funcs.asm,v
retrieving revision 1.36
diff -p -r1.36 lib1funcs.asm
*** lib1funcs.asm	12 Aug 2003 01:25:07 -0000	1.36
--- lib1funcs.asm	2 Aug 2004 03:57:36 -0000
*************** Boston, MA 02111-1307, USA.  */
*** 37,42 ****
--- 37,44 ----
     ELF local label prefixes by J"orn Rennecke
     amylaar@cygnus.com  */
  
+ #include "insn-constants.h"
+ 
  #ifdef __ELF__
  #define LOCAL(X)	.L_##X
  #define FUNC(X)		.type X,@function
*************** Boston, MA 02111-1307, USA.  */
*** 56,61 ****
--- 58,91 ----
  #define FMOVD_WORKS
  #endif
  
+ #ifdef __sh1__
+ #define SL(branch, dest, in_slot, in_slot_arg2) \
+         in_slot, in_slot_arg2; branch dest
+ #define SL_(branch, dest, in_slot) \
+         in_slot; branch dest
+ #define SLC(branch, dest, in_slot, in_slot_arg2) \
+         branch dest; in_slot, in_slot_arg2
+ #define SLI(in_slot, in_slot_arg2) in_slot, in_slot_arg2
+ #define SLCMP(branch, cmp1, cmp1arg2, cmp2, cmp2arg2) \
+ 	branch .+6; bra .+6; cmp2, cmp2arg2; cmp1, cmp1arg2
+ #else
+ #define SL(branch, dest, in_slot, in_slot_arg2) \
+         branch##/s dest; in_slot, in_slot_arg2
+ #define SL_(branch, dest, in_slot) \
+         branch##/s dest; in_slot
+ #define SLC(branch, dest, in_slot, in_slot_arg2) \
+         branch##/s dest; in_slot, in_slot_arg2
+ #define SLI(in_slot, in_slot_arg)
+ #define SLCMP(branch, cmp1, cmp1arg2, cmp2, cmp2arg2) \
+ 	branch##/s .+6; cmp1, cmp1arg2; cmp2, cmp2arg2
+ #endif
+ 
+ #if defined (__sh1__) || defined (__sh2__) || defined (__SH2E__)
+ /* don't #define DYN_SHIFT */
+ #else
+ #define DYN_SHIFT 1
+ #endif
+ 
  #if ! __SH5__
  #ifdef L_ashiftrt
  	.global	GLOBAL(ashiftrt_r4_0)
*************** GLOBAL(GCC_pop_shmedia_regs_nofpu):
*** 2873,2875 ****
--- 2903,4543 ----
  	ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
  #endif /* __SH5__ == 32 */
  #endif /* L_push_pop_shmedia_regs */
+ 
+ /* Floating-point emulation.  We handle NANs, +-infinity, and +-zero.
+    However, we assume that for NANs, the topmost bit of the fraction is set.  */
+ #ifdef L_nesf2
+ /* -ffinite-math-only inline version, T := r4:SF == r5:SF
+ 	cmp/eq	r4,r5
+ 	mov	r4,r0
+ 	bt	0f
+ 	or	r5,r0
+ 	add	r0,r0
+ 	tst	r0,r0
+ 	0:			*/
+ 	.global GLOBAL(nesf2_)
+ 	FUNC(GLOBAL(nesf2_))
+ GLOBAL(nesf2_):
+         /* If the raw values are unequal, the result is unequal, unless
+ 	   both values are +-zero.
+ 	   If the raw values are equal, the result is equal, unless
+ 	   the values are nan or infinity.  */
+ 	cmp/eq	r4,r5
+ 	mov.l   LOCAL(c_SF_NAN_MASK),r1
+ 	not	r4,r0
+ 	bt	LOCAL(check_nan)
+ 	mov	r4,r0
+ 	or	r5,r0
+ 	rts
+ 	add	r0,r0
+ LOCAL(check_nan):
+ 	tst	r1,r0
+ 	rts
+ 	movt	r0
+ 	.balign 4
+ LOCAL(c_SF_NAN_MASK):
+ 	.long SF_NAN_MASK
+ 	ENDFUNC(GLOBAL(nesf2_))
+ #endif /* L_nesf2 */
+ 
+ #ifdef __LITTLE_ENDIAN__
+ #define DBL0L r4
+ #define DBL0H r5
+ #define DBL1L r6
+ #define DBL1H r7
+ #define DBLRL r0
+ #define DBLRH r1
+ #else
+ #define DBL0L r5
+ #define DBL0H r4
+ #define DBL1L r7
+ #define DBL1H r6
+ #define DBLRL r1
+ #define DBLRH r0
+ #endif
+ 
+ #ifdef L_nedf2
+ /* -ffinite-math-only -mb inline version, T := r4:DF == r6:DF
+ 	cmp/eq	r5,r7
+ 	mov	r4,r0
+ 	bf	0f
+ 	cmp/eq	r4,r6
+ 	bt	0f
+ 	or	r6,r0
+ 	add	r0,r0
+ 	or	r5,r0
+ 	tst	r0,r0
+ 	0:			*/
+ 	.global GLOBAL(nedf2_)
+ 	FUNC(GLOBAL(nedf2_))
+ GLOBAL(nedf2_):
+ 	cmp/eq	DBL0L,DBL1L
+ 	mov.l   LOCAL(c_DF_NAN_MASK),r1
+ 	bf LOCAL(ne)
+ 	cmp/eq	DBL0H,DBL1H
+ 	not	DBL0H,r0
+ 	bt	LOCAL(check_nan)
+ 	mov	DBL0H,r0
+ 	or	DBL1H,r0
+ 	add	r0,r0
+ 	rts
+ 	or	DBL0L,r0
+ LOCAL(check_nan):
+ 	tst	r1,r0
+ 	rts
+ 	movt	r0
+ LOCAL(ne):
+ 	rts
+ 	mov #1,r0
+ 	.balign 4
+ LOCAL(c_DF_NAN_MASK):
+ 	.long DF_NAN_MASK
+ 	ENDFUNC(GLOBAL(nedf2_))
+ #endif /* L_nedf2 */
+ 
+ #ifdef L_unordsf2
+ 	.global GLOBAL(unordsf2_)
+ 	FUNC(GLOBAL(unordsf2_))
+ GLOBAL(unordsf2_):
+ 	mov.l	LOCAL(c_SF_NAN_MASK),r1
+ 	not	r4,r0
+ 	tst	r1,r0
+ 	not	r5,r0
+ 	bt	LOCAL(unord)
+ 	tst	r1,r0
+ LOCAL(unord):
+ 	rts
+ 	movt	r0
+ 	.balign	4
+ LOCAL(c_SF_NAN_MASK):
+ 	.long SF_NAN_MASK
+ 	ENDFUNC(GLOBAL(unordsf2_))
+ #endif /* L_unordsf2 */
+ 
+ #ifdef L_unorddf2
+ 	.global GLOBAL(unorddf2_)
+ 	FUNC(GLOBAL(unorddf2_))
+ GLOBAL(unorddf2_):
+ 	mov.l	LOCAL(c_DF_NAN_MASK),r1
+ 	not	r4,r0
+ 	tst	r1,r0
+ 	not	r6,r0
+ 	bt	LOCAL(unord)
+ 	tst	r1,r0
+ LOCAL(unord):
+ 	rts
+ 	movt	r0
+ 	.balign	4
+ LOCAL(c_DF_NAN_MASK):
+ 	.long DF_NAN_MASK
+ 	ENDFUNC(GLOBAL(unorddf2_))
+ #endif /* L_unorddf2 */
+ 
+ #if defined(L_gtsf2t) || defined(L_gtsf2t_trap)
+ /* -ffinite-math-only inline version, T := r4:SF > r5:SF ? 0 : 1
+ 	cmp/pz	r4
+ 	mov	r4,r0
+ 	bf/s	0f
+ 	 cmp/hs	r5,r4
+ 	cmp/ge	r4,r5
+ 	or	r5,r0
+ 	bt	0f
+ 	add	r0,r0
+ 	tst	r0,r0
+ 	0:			*/
+ #ifdef L_gtsf2t
+ #define fun_label GLOBAL(gtsf2t)
+ #else
+ #define fun_label GLOBAL(gtsf2t_trap)
+ #endif
+ 	.global fun_label
+ 	FUNC(fun_label)
+ fun_label:
+ 	/* If the raw values compare greater, the result true, unless
+ 	   any of them is a nan (but infinity is fine), or both values are
+ 	   +- zero.  Otherwise, the result false.  */
+ 	mov.l	LOCAL(c_SF_NAN_MASK),r1
+ 	cmp/pz	r4
+ 	not	r5,r0
+ 	SLC(bf,	LOCAL(neg),
+ 	 tst	r1,r0)
+ 	mov	r4,r0
+ 	bt	LOCAL(nan)
+ 	cmp/gt	r5,r4
+ 	SLC(bf,	LOCAL(check_nan),
+ 	 cmp/gt	r4,r1)
+ 	bf	LOCAL(nan)
+ 	or	r5,r0
+ 	rts
+ 	add	r0,r0
+ LOCAL(neg):
+ 	SLI(tst	r1,r0)
+ 	bt	LOCAL(nan)
+ 	not	r4,r0
+ 	tst	r1,r0
+ 	bt	LOCAL(nan)
+ 	cmp/hi	r4,r5
+ #if defined(L_gtsf2t) && defined(DELAYED_BRANCHES)
+ LOCAL(check_nan):
+ #endif /* DELAYED_BRANCHES */
+ 	rts
+ 	movt	r0
+ #ifdef L_gtsf2t
+ LOCAL(check_nan):
+ LOCAL(nan):
+ 	rts
+ 	mov	#0,r0
+ #else /* ! L_gtsf2t */
+ LOCAL(check_nan):
+ 	SLI(cmp/gt	r4,r1)
+ 	bf	LOCAL(nan)
+ 	rts
+ 	movt	r0
+ LOCAL(nan):
+ 	mov	#0,r0
+ 	trapa	#0
+ #endif /* ! L_gtsf2t */
+ 	.balign	4
+ LOCAL(c_SF_NAN_MASK):
+ 	.long SF_NAN_MASK
+ 	ENDFUNC(fun_label)
+ #endif /* L_gtsf2t */
+ 
+ #if defined(L_gtdf2t) || defined(L_gtdf2t_trap)
+ #ifdef L_gtdf2t
+ #define fun_label GLOBAL(gtdf2t)
+ #else
+ #define fun_label GLOBAL(gtdf2t_trap)
+ #endif
+ 	.global fun_label
+ 	FUNC(fun_label)
+ fun_label:
+ 	/* If the raw values compare greater, the result true, unless
+ 	   any of them is a nan (but infinity is fine), or both values are
+ 	   +- zero.  Otherwise, the result false.  */
+ 	mov.l	LOCAL(c_DF_NAN_MASK),r1
+ 	cmp/pz	DBL0H
+ 	not	DBL1H,r0
+ 	SLC(bf,	LOCAL(neg),
+ 	 tst	r1,r0)
+ 	mov	DBL0H,r0
+ 	bt	LOCAL(nan) /* return zero if DBL1 is NAN.  */
+ 	cmp/eq	DBL1H,DBL0H
+ 	bt	LOCAL(cmp_low)
+ 	cmp/gt	DBL1H,DBL0H
+ 	or	DBL1H,r0
+ 	SLC(bf,	LOCAL(check_nan),
+ 	 cmp/gt	DBL0H,r1)
+ 	add	r0,r0
+ 	bf	LOCAL(nan) /* return zero if DBL0 is NAN.  */
+ 	or	DBL0L,r0
+ 	rts
+ 	or	DBL1L,r0 /* non-zero unless both DBL0 and DBL1 are +-zero.  */
+ LOCAL(cmp_low):
+ 	cmp/hi	DBL1L,DBL0L
+ 	rts
+ 	movt	r0
+ LOCAL(neg):
+ 	SLI(tst	r1,r0)
+ 	bt	LOCAL(nan) /* return zero if DBL1 is NAN.  */
+ 	cmp/eq	DBL1H,DBL0H
+ 	SLC(bt,	LOCAL(neg_cmp_low),
+ 	 cmp/hi	DBL0L,DBL1L)
+ 	not	r4,r0
+ 	tst	r1,r0
+ 	bt	LOCAL(nan) /* return zero if DBL0 is NAN.  */
+ 	cmp/hi	DBL0H,DBL1H
+ 	SLI(rts	!,)
+ 	SLI(movt r0 !,)
+ LOCAL(neg_cmp_low):
+ 	SLI(cmp/hi	DBL0L,DBL1L)
+ 	rts
+ 	movt	r0
+ LOCAL(check_nan):
+ #ifdef L_gtdf2t
+ LOCAL(nan):
+ 	rts
+ 	mov	#0,r0
+ #else
+ 	SLI(cmp/gt DBL0H,r1)
+ 	bf	LOCAL(nan) /* return zero if DBL0 is NAN.  */
+ 	rts
+ 	mov	#0,r0
+ LOCAL(nan):
+ 	mov	#0,r0
+ 	trapa	#0
+ #endif
+ 	.balign	4
+ LOCAL(c_DF_NAN_MASK):
+ 	.long DF_NAN_MASK
+ 	ENDFUNC(fun_label)
+ #endif /* defined(L_gtdf2t) || defined(L_gtdf2t_trap) */
+ 
+ #if defined(L_gesf2f) || defined(L_gesf2f_trap)
+ /* -ffinite-math-only inline version, T := r4:SF >= r5:SF */
+ 	cmp/pz	r5
+ 	mov	r4,r0
+ 	bf/s	0f
+ 	 cmp/hs	r4,r5
+ 	cmp/ge	r5,r4
+ 	or	r5,r0
+ 	bt	0f
+ 	add	r0,r0
+ 	tst	r0,r0
+ 	0:
+ #ifdef L_gesf2f
+ #define fun_label GLOBAL(gesf2f)
+ #else
+ #define fun_label GLOBAL(gesf2f_trap)
+ #endif
+ 	.global fun_label
+ 	FUNC(fun_label)
+ fun_label:
+ 	/* If the raw values compare greater or equal, the result is
+ 	   true, unless any of them is a nan.  If both are -+zero, the
+ 	   result is true; otherwise, it is false.
+ 	   We use 0 as true and nonzero as false for this function.  */
+ 	mov.l	LOCAL(c_SF_NAN_MASK),r1
+ 	cmp/pz	r5
+ 	not	r4,r0
+ 	SLC(bf,	LOCAL(neg),
+ 	 tst	r1,r0)
+ 	mov	r4,r0
+ 	bt	LOCAL(nan)
+ 	cmp/gt	r4,r5
+ 	SLC(bf,	LOCAL(check_nan),
+ 	 cmp/ge	r1,r5)
+ 	bt	LOCAL(nan)
+ 	or	r5,r0
+ 	rts
+ 	add	r0,r0
+ LOCAL(neg):
+ 	SLI(tst	r1,r0)
+ 	bt	LOCAL(nan)
+ 	not	r5,r0
+ 	tst	r1,r0
+ 	bt	LOCAL(nan)
+ 	cmp/hi	r5,r4
+ #if defined(L_gesf2f) && defined(DELAYED_BRANCHES)
+ LOCAL(nan): LOCAL(check_nan):
+ #endif
+ 	rts
+ 	movt	r0
+ #if defined(L_gesf2f) && ! defined(DELAYED_BRANCHES)
+ LOCAL(check_nan):
+ 	cmp/ge	r1,r5
+ LOCAL(nan):
+ 	rts
+ 	movt	r0
+ #endif /* ! DELAYED_BRANCHES */
+ #ifdef L_gesf2f_trap
+ LOCAL(check_nan):
+ 	SLI(cmp/ge	r1,r5)
+ 	bt	LOCAL(nan)
+ 	rts
+ LOCAL(nan):
+ 	movt	r0
+ 	trapa	#0
+ #endif /* L_gesf2f_trap */
+ 	.balign	4
+ LOCAL(c_SF_NAN_MASK):
+ 	.long SF_NAN_MASK
+ 	ENDFUNC(GLOBAL(gesf2f))
+ #endif /* L_gesf2f */
+ 
+ #ifdef L_gedf2f
+ 	.global GLOBAL(gedf2f)
+ 	FUNC(GLOBAL(gedf2f))
+ GLOBAL(gedf2f):
+ 	/* If the raw values compare greater or equal, the result is
+ 	   true, unless any of them is a nan, or both are the
+ 	   same infinity.  If both are -+zero, the result is true;
+ 	   otherwise, it is false.
+ 	   We use 0 as true and nonzero as false for this function.  */
+ 	mov.l	LOCAL(c_DF_NAN_MASK),r1
+ 	cmp/pz	DBL1H
+ 	not	DBL0H,r0
+ 	SLC(bf,	LOCAL(neg),
+ 	 tst	r1,r0)
+ 	mov	DBL0H,r0
+ 	bt	LOCAL(nan)
+ 	cmp/eq	DBL0H,DBL1H
+ 	bt	LOCAL(cmp_low)
+ 	cmp/gt	DBL0H,DBL1H
+ 	or	DBL1H,r0
+ 	SLC(bf,	LOCAL(check_nan),
+ 	 cmp/ge	r1,DBL1H)
+ 	add	r0,r0
+ 	bt	LOCAL(nan)
+ 	or	DBL0L,r0
+ 	rts
+ 	or	DBL1L,r0
+ LOCAL(cmp_low):
+ 	cmp/hi	DBL0L,DBL1L
+ #if defined(L_gedf2f) && defined(DELAYED_BRANCHES)
+ LOCAL(nan): LOCAL(check_nan):
+ #endif
+ 	rts
+ 	movt	r0
+ #if defined(L_gedf2f) && ! defined(DELAYED_BRANCHES)
+ LOCAL(check_nan):
+ 	SLI(cmp/ge	r1,DBL1H)
+ LOCAL(nan):
+ 	rts
+ 	movt	r0
+ #elif defined(L_gedf2f_trap)
+ LOCAL(check_nan):
+ 	SLI(cmp/ge	r1,DBL1H)
+ 	bt	LOCAL(nan)
+ 	rts
+ LOCAL(nan):
+ 	movt	r0
+ 	trapa	#0
+ #endif /* L_gedf2f_trap */
+ LOCAL(neg):
+ 	SLI(tst	r1,r0)
+ 	bt	LOCAL(nan)
+ 	cmp/eq	DBL0H,DBL1H
+ 	not	DBL1H,r0
+ 	SLC(bt,	LOCAL(neg_cmp_low),
+ 	 cmp/hi	DBL1L,DBL0L)
+ 	tst	r1,r0
+ 	bt	LOCAL(nan)
+ 	cmp/hi	DBL1H,DBL0H
+ 	SLI(rts !,)
+ 	SLI(movt	r0 !,)
+ LOCAL(neg_cmp_low):
+ 	SLI(cmp/hi	DBL1L,DBL0L)
+ 	rts
+ 	movt	r0
+ 	.balign	4
+ LOCAL(c_DF_NAN_MASK):
+ 	.long DF_NAN_MASK
+ 	ENDFUNC(GLOBAL(gedf2f))
+ #endif /* L_gedf2f */
+ 
+ #ifndef DYN_SHIFT /* Basic conversions for SH1 / SH2  */
+ #ifdef L_extendsfdf2
+ 	.global GLOBAL(extendsfdf2_)
+ 	FUNC(GLOBAL(extendsfdf2_))
+ GLOBAL(extendsfdf2_):
+ 	mov.l	LOCAL(x7f800000),r3
+ 	mov	r4,DBLRL
+ 	tst	r3,r4
+ 	bt	LOCAL(zero_denorm)
+ 	mov.l	LOCAL(xe0000000),r2
+ 	rotr	DBLRL
+ 	rotr	DBLRL
+ 	rotr	DBLRL
+ 	and	r2,DBLRL
+ 	mov	r4,DBLRH
+ 	not	r4,r2
+ 	shll	DBLRH
+ 	shlr2	DBLRH
+ 	shlr2	DBLRH
+ 	add	DBLRH,DBLRH
+ 	rotcr	DBLRH
+ 	tst	r3,r2
+ 	bt	LOCAL(inf_nan)
+ 	mov.l	LOCAL(x38000000),r2
+ 	rts
+ 	add	r2,DBLRH
+ LOCAL(inf_nan):
+ 	mov.l	LOCAL(x70000000),r2
+ 	rts
+ 	add	r2,DBLRH
+ LOCAL(zero_denorm):
+ 	mov.l	r4,@-r15
+ 	add	r4,r4
+ 	tst	r4,r4
+ 	bt	LOCAL(zero)
+ 	add	r3,r3	/* 0xff000000 */
+ 	mov.l	LOCAL(xb8000009),r2
+ LOCAL(shift_byte):
+ 	tst	r3,r4
+ 	shll8	r4
+ 	SL(bt,	LOCAL(shift_byte),
+ 	 add	#-8,r2)
+ LOCAL(shift_bit):
+ 	shll	r4
+ 	SL(bf,	LOCAL(shift_bit),
+ 	 add	#-1,r2)
+ 	mov	r4,DBLRH
+ 	mov.l	@r15+,r4
+ 	shlr8	DBLRH
+ 	shlr2	DBLRH
+ 	shlr	DBLRH
+ 	rotcr	DBLRL
+ 	cmp/pz	r4
+ 	rotcr	DBLRH
+ 	rotcr	DBLRL
+ 	rts
+ 	add	r2,DBLRH
+ LOCAL(zero):
+ 	mov.l	@r15+,DBLRH
+ 	rts
+ 	mov	#0,DBLRL
+ 	.balign	4
+ LOCAL(x7f800000):
+ 	.long	0x7f800000
+ LOCAL(x38000000):
+ 	.long	0x38000000
+ LOCAL(xe0000000):
+ 	.long	0xe0000000
+ LOCAL(x70000000):
+ 	.long	0x70000000
+ LOCAL(xb8000009):
+ 	/* Flip sign back, do exponent adjustment, and compensate for -8 / -1
+ 	   adjustments in first shift loop iterations.  */
+ 	.long 0x80000000 + 0x38000000 + 9
+ 	ENDFUNC(GLOBAL(extendsfdf2_))
+ #endif /* L_extendsfdf2 */
+ 
+ #ifdef L_truncdfsf2
+ 	.global GLOBAL(truncdfsf2_)
+ 	FUNC(GLOBAL(truncdfsf2_))
+ GLOBAL(truncdfsf2_):
+ 	mov.l	LOCAL(x38000000),r3	! exponent adjustment DF -> SF
+ 	mov	DBL0H,r1
+ 	mov.l	LOCAL(x70000000),r2	! mask for out-of-range exponent bits
+ 	mov	DBL0H,r0
+ 	mov.l	DBL0L,@-r15
+ 	sub	r3,r1
+ 	tst	r2,r1
+ 	shll8	r0			!
+ 	shll2	r0			! Isolate highpart fraction.
+ 	shll2	r0			!
+ 	bf	LOCAL(ill_exp)
+ 	shll2	r1
+ 	mov.l	LOCAL(x2fffffff),r2 /* Fraction lsb | lower guard bits.  */
+ 	shll2	r1
+ 	mov.l	LOCAL(xff000000),r3
+ 	shlr8	r0
+ 	tst	r2,DBL0L /* Check if msb guard bit wants rounding up.  */
+ 	shlr16	DBL0L
+ 	shlr8	DBL0L
+ 	shlr2	DBL0L
+ 	SL_(bt,	LOCAL(add_frac),
+ 	 shlr2	DBL0L)
+ 	add	#1,DBL0L
+ LOCAL(add_frac):
+ 	add	DBL0L,r0
+ 	mov.l	LOCAL(x01000000),r2
+ 	and	r3,r1
+ 	mov.l	@r15+,DBL0L
+ 	add	r1,r0
+ 	tst	r3,r0
+ 	bt	LOCAL(inf_denorm0)
+ 	cmp/hs	r3,r0
+ LOCAL(denorm_noup_sh1):
+ 	bt	LOCAL(inf)
+ 	div0s	DBL0H,r2	/* copy orig. sign into T.  */
+ 	rts
+ 	rotcr	r0
+ LOCAL(inf_denorm0):	!  We might need to undo previous rounding.
+ 	mov.l	LOCAL(x2fffffff),r3 /* Old fraction lsb | lower guard bits.  */
+ 	tst	r1,r1
+ 	bf	LOCAL(inf)
+ 	add	#-1,r0
+ 	tst	r3,DBL0L /* Check if msb guard bit was rounded up.  */
+ 	mov.l	LOCAL(x5fffffff),r3 /* Fraction lsb | lower guard bits.  */
+ 	addc	r2,r0
+ 	shlr	r0
+ 	tst	r3,DBL0L /* Check if msb guard bit wants rounding up.  */
+ #ifdef DELAYED_BRANCHES
+ 	bt/s	LOCAL(denorm_noup)
+ #else
+ 	bt	LOCAL(denorm_noup_sh1)
+ #endif
+ 	div0s	DBL0H,r2	/* copy orig. sign into T.  */
+ 	add	#1,r0
+ LOCAL(denorm_noup):
+ 	rts
+ 	rotcr	r0
+ LOCAL(ill_exp):
+ 	div0s	DBL0H,r1
+ 	mov.l	LOCAL(x7ff80000),r2
+ 	add	r1,r1
+ 	bf	LOCAL(inf_nan)
+ 	mov.w	LOCAL(m32),r3 /* Handle denormal or zero.  */
+ 	shlr16	r1
+ 	exts.w	r1,r1
+ 	shll2	r1
+ 	add	r1,r1
+ 	shlr8	r1
+ 	exts.w	r1,r1
+ 	add	#-8,r1	/* Go from 9 to 1 guard bit in MSW.  */
+ 	cmp/gt	r3,r1
+ 	mov.l	@r15+,r3 /* DBL0L */
+ 	bf	LOCAL(zero)
+ 	mov.l	DBL0L, @-r15
+ 	shll8	DBL0L
+ 	rotcr	r0	/* Insert leading 1.  */
+ 	shlr16	r3
+ 	shll2	r3
+ 	add	r3,r3
+ 	shlr8	r3
+ 	cmp/pl	DBL0L	/* Check lower 23 guard bits if guard bit 23 is 0.  */
+ 	addc	r3,r0	/* Assemble fraction with compressed guard bits.  */
+ 	mov.l	@r15+,DBL0L
+ 	mov	#0,r2
+ 	neg	r1,r1
+ LOCAL(denorm_loop):
+ 	shlr	r0
+ 	rotcl	r2
+ 	dt	r1
+ 	bf	LOCAL(denorm_loop)
+ 	tst	#2,r0
+ 	rotcl	r0
+ 	tst	r2,r2
+ 	rotcl	r0
+ 	xor	#3,r0
+ 	add	#3,r0	/* Even overflow gives the correct result.  */
+ 	shlr2	r0
+ 	div0s	r0,DBL0H
+ 	rts
+ 	rotcr	r0
+ LOCAL(zero):
+ 	mov	#0,r0
+ 	div0s	r0,DBL0H
+ 	rts
+ 	rotcr	r0
+ LOCAL(inf_nan):
+ 	not	DBL0H,r0
+ 	tst	r2,r0
+ 	mov.l	@r15+,DBL0L
+ 	bf	LOCAL(inf)
+ 	rts
+ 	mov	#-1,r0	/* NAN */
+ LOCAL(inf):	/* r2 must be positive here.  */
+ 	mov.l	LOCAL(xffe00000),r0
+ 	div0s	r2,DBL0H
+ 	rts
+ 	rotcr	r0
+ LOCAL(m32):
+ 	.word	-32
+ 	.balign	4
+ LOCAL(x38000000):
+ 	.long	0x38000000
+ LOCAL(x70000000):
+ 	.long	0x70000000
+ LOCAL(x2fffffff):
+ 	.long	0x2fffffff
+ LOCAL(x01000000):
+ 	.long	0x01000000
+ LOCAL(xff000000):
+ 	.long	0xff000000
+ LOCAL(x5fffffff):
+ 	.long	0x5fffffff
+ LOCAL(x7ff80000):
+ 	.long	0x7ff80000
+ LOCAL(xffe00000):
+ 	.long	0xffe00000
+ 	ENDFUNC(GLOBAL(truncdfsf2_))
+ #endif /*  L_truncdfsf2 */
+ #endif /* ! DYN_SHIFT */
+ 
+ /* The actual arithmetic uses dynamic shift.  Supporting SH1 / SH2 here would
+    make this code too hard to maintain, so if you want to add SH1 / SH2
+    support, do it in a separate copy.  */
+ #ifdef DYN_SHIFT
+ #ifdef L_extendsfdf2
+ 	.global GLOBAL(extendsfdf2_)
+ 	FUNC(GLOBAL(extendsfdf2_))
+ GLOBAL(extendsfdf2_):
+ 	mov.l	LOCAL(x7f800000),r2
+ 	mov	#29,r3
+ 	mov	r4,DBLRL
+ 	not	r4,DBLRH
+ 	tst	r2,r4
+ 	shld	r3,DBLRL
+ 	bt	LOCAL(zero_denorm)
+ 	mov	#-3,r3
+ 	tst	r2,DBLRH
+ 	mov	r4,DBLRH
+ 	bt/s	LOCAL(inf_nan)
+ 	 shll	DBLRH
+ 	shld	r3,DBLRH
+ 	mov.l	LOCAL(x38000000),r2
+ 	rotcr	DBLRH
+ 	rts
+ 	add	r2,DBLRH
+ 	.balign	4
+ LOCAL(inf_nan):
+ 	shld	r2,DBLRH
+ 	mov.l	LOCAL(x70000000),r2
+ 	rotcr	DBLRH
+ 	rts
+ 	add	r2,DBLRH
+ LOCAL(zero_denorm):
+ 	mov.l	r4,@-r15
+ 	add	r4,r4
+ 	tst	r4,r4
+ 	extu.w	r4,r2
+ 	bt	LOCAL(zero)
+ 	cmp/eq	r4,r2
+ 	extu.b	r4,r1
+ 	bf/s	LOCAL(three_bytes)
+ 	 mov.l	LOCAL(c__clz_tab),r0
+ 	cmp/eq	r4,r1
+ 	mov	#22,DBLRH
+ 	bt	LOCAL(one_byte)
+ 	shlr8	r2
+ 	mov	#14,DBLRH
+ LOCAL(one_byte):
+ #ifdef __pic__
+ 	add	r0,r2
+ 	mova  LOCAL(c__clz_tab),r0
+ #endif
+ 	mov.b	@(r0,r2),r2
+ 	mov	#21,r3
+ 	mov.w	LOCAL(x0),DBLRL
+ 	sub	r2,DBLRH
+ LOCAL(norm_shift):
+ 	shld	DBLRH,r4
+ 	mov.l	@r15+,r2
+ 	shld	r3,DBLRH
+ 	mov.l	LOCAL(xb7ffffff),r3
+ 	add	r4,DBLRH
+ 	cmp/pz	r2
+ 	mov	r2,r4
+ 	rotcr	DBLRH
+ 	rts
+ 	add	r3,DBLRH
+ LOCAL(three_bytes):
+ 	mov	r4,r2
+ 	shlr16	r2
+ #ifdef __pic__
+ 	add	r0,r2
+ 	mova  LOCAL(c__clz_tab),r0
+ #endif
+ 	mov.b	@(r0,r2),r2
+ 	mov	#21,r3
+ 	mov	#6-32,DBLRH
+ 	sub	r2,DBLRH
+ 	mov	r4,DBLRL
+ 	shld	DBLRH,DBLRL
+ 	bra	LOCAL(norm_shift)
+ 	add	#32,DBLRH
+ LOCAL(zero):
+ 	rts	/* DBLRL has already been zeroed above.  */
+ 	mov.l @r15+,DBLRH
+ LOCAL(x0):
+ 	.word 0
+ 	.balign	4
+ LOCAL(x7f800000):
+ 	.long	0x7f800000
+ LOCAL(x38000000):
+ 	.long	0x38000000
+ LOCAL(x70000000):
+ 	.long	0x70000000
+ LOCAL(xb7ffffff):
+ 	/* Flip sign back, do exponent adjustment, and remove leading one.  */
+ 	.long 0x80000000 + 0x38000000 - 1
+ LOCAL(c__clz_tab):
+ #ifdef __pic__
+ 	.long	GLOBAL(clz_tab) - .
+ #else
+ 	.long	GLOBAL(clz_tab)
+ #endif
+ 	ENDFUNC(GLOBAL(extendsfdf2_))
+ #endif /* L_extendsfdf2 */
+ 
+ #ifdef L_truncdfsf2
+ 	.global GLOBAL(truncdfsf2_)
+ 	FUNC(GLOBAL(truncdfsf2_))
+ GLOBAL(truncdfsf2_):
+ 	mov.l	LOCAL(x38000000),r3
+ 	mov	DBL0H,r1
+ 	mov.l	LOCAL(x70000000),r2
+ 	mov	DBL0H,r0
+ 	sub	r3,r1
+ 	mov.l	DBL0L,@-r15
+ 	tst	r2,r1
+ 	mov	#12,r3
+ 	shld	r3,r0			! Isolate highpart fraction.
+ 	bf	LOCAL(ill_exp)
+ 	shll2	r1
+ 	mov.l	LOCAL(x2fffffff),r2 /* Fraction lsb | lower guard bits.  */
+ 	shll2	r1
+ 	mov.l	LOCAL(xff000000),r3
+ 	shlr8	r0
+ 	tst	r2,DBL0L /* Check if msb guard bit wants rounding up.  */
+ 	mov	#-28,r2
+ 	bt/s	LOCAL(add_frac)
+ 	 shld	r2,DBL0L
+ 	add	#1,DBL0L
+ LOCAL(add_frac):
+ 	add	DBL0L,r0
+ 	mov.l	LOCAL(x01000000),r2
+ 	and	r3,r1
+ 	mov.l	@r15+,DBL0L
+ 	add	r1,r0
+ 	tst	r3,r0
+ 	bt	LOCAL(inf_denorm0)
+ #if 0	// No point checking overflow -> infinity if we dont't raise a signal.
+ 	cmp/hs	r3,r0
+ 	bt	LOCAL(inf)
+ #endif
+ 	div0s	DBL0H,r2	/* copy orig. sign into T.  */
+ 	rts
+ 	rotcr	r0
+ LOCAL(inf_denorm0):	! We might need to undo previous rounding.
+ 	mov.l	LOCAL(x2fffffff),r3 /* Old fraction lsb | lower guard bits.  */
+ 	tst	r1,r1
+ 	bf	LOCAL(inf)
+ 	add	#-1,r0
+ 	tst	r3,DBL0L /* Check if msb guard bit was rounded up.  */
+ 	mov.l	LOCAL(x5fffffff),r3 /* Fraction lsb | lower guard bits.  */
+ 	addc	r2,r0
+ 	shlr	r0
+ 	tst	r3,DBL0L /* Check if msb guard bit wants rounding up.  */
+ 	bt/s	LOCAL(denorm_noup)
+ 	 div0s	DBL0H,r2	/* copy orig. sign into T.  */
+ 	add	#1,r0
+ LOCAL(denorm_noup):
+ 	rts
+ 	rotcr	r0
+ LOCAL(ill_exp):
+ 	div0s	DBL0H,r1
+ 	mov.l	LOCAL(x7ff80000),r2
+ 	add	r1,r1
+ 	bf	LOCAL(inf_nan)
+ 	mov.w	LOCAL(m32),r3 /* Handle denormal or zero.  */
+ 	mov	#-21,r2
+ 	shad	r2,r1
+ 	add	#-8,r1	/* Go from 9 to 1 guard bit in MSW.  */
+ 	cmp/gt	r3,r1
+ 	mov.l	@r15+,r3 /* DBL0L */
+ 	bf	LOCAL(zero)
+ 	mov.l	DBL0L, @-r15
+ 	shll8	DBL0L
+ 	rotcr	r0	/* Insert leading 1.  */
+ 	shld	r2,r3
+ 	cmp/pl	DBL0L	/* Check lower 23 guard bits if guard bit 23 is 0.  */
+ 	addc	r3,r0	/* Assemble fraction with compressed guard bits.  */
+ 	mov	r0,r2
+ 	shld	r1,r0
+ 	mov.l	@r15+,DBL0L
+ 	add	#32,r1
+ 	shld	r1,r2
+ 	tst	#2,r0
+ 	rotcl	r0
+ 	tst	r2,r2
+ 	rotcl	r0
+ 	xor	#3,r0
+ 	add	#3,r0	/* Even overflow gives the correct result.  */
+ 	shlr2	r0
+ 	div0s	r0,DBL0H
+ 	rts
+ 	rotcr	r0
+ LOCAL(zero):
+ 	mov	#0,r0
+ 	div0s	r0,DBL0H
+ 	rts
+ 	rotcr	r0
+ LOCAL(inf_nan):
+ 	not	DBL0H,r0
+ 	tst	r2,r0
+ 	mov.l	@r15+,DBL0L
+ 	bf	LOCAL(inf)
+ 	rts
+ 	mov	#-1,r0	/* NAN */
+ LOCAL(inf):	/* r2 must be positive here.  */
+ 	mov.l	LOCAL(xffe00000),r0
+ 	div0s	r2,DBL0H
+ 	rts
+ 	rotcr	r0
+ LOCAL(m32):
+ 	.word	-32
+ 	.balign	4
+ LOCAL(x38000000):
+ 	.long	0x38000000
+ LOCAL(x70000000):
+ 	.long	0x70000000
+ LOCAL(x2fffffff):
+ 	.long	0x2fffffff
+ LOCAL(x01000000):
+ 	.long	0x01000000
+ LOCAL(xff000000):
+ 	.long	0xff000000
+ LOCAL(x5fffffff):
+ 	.long	0x5fffffff
+ LOCAL(x7ff80000):
+ 	.long	0x7ff80000
+ LOCAL(xffe00000):
+ 	.long	0xffe00000
+ 	ENDFUNC(GLOBAL(truncdfsf2_))
+ #endif /* L_truncdfsf2 */
+ 
+ #ifdef L_add_sub_sf3
+ 	.global GLOBAL(subsf3_)
+ 	FUNC(GLOBAL(subsf3_))
+ 	.global GLOBAL(addsf3_)
+ 	FUNC(GLOBAL(addsf3_))
+ GLOBAL(subsf3_):
+ 	cmp/pz	r5
+ 	add	r5,r5
+ 	rotcr	r5
+ GLOBAL(addsf3_):
+ 	mov.l	LOCAL(x7f800000),r3
+ 	mov	r4,r6
+ 	add	r6,r6
+ 	mov	r5,r7
+ 	add	r7,r7
+ 	mov	r4,r0
+ 	or	r3,r0
+ 	cmp/hi	r6,r7
+ 	mov	r5,r1
+ 	bf/s	LOCAL(r4_hs)
+ 	 or	r3,r1
+ 	cmp/eq	r5,r1
+ 	bt	LOCAL(ret_r5) /* sole Inf or NaN, return unchanged.  */
+ 	shll8	r0
+ 	tst	r6,r6
+ 	shll8	r1
+ 	mov	#-24,r2
+ 	bt	LOCAL(denorm_r4)
+ LOCAL(denorm_r4_done):
+ 	mov	r6,r3
+ 	shld	r2,r3
+ 	mov	r7,r6
+ 	shld	r2,r6
+ 	sub	r6,r3
+ 	mov	r0,r7
+ 	shld	r3,r0	/* Get 31 upper bits.  */
+ 	mov.l	LOCAL(xff000000),r2
+ 	add	#31,r3
+ 	mov.l	r5,@-r15 ! push result sign.
+ 	cmp/pl	r3
+ 	shld	r3,r7
+ 	bf	LOCAL(ret_stack)
+ 	div0s	r4,r5
+ 	bf/s	LOCAL(add)
+ 	 cmp/pl	r7	/* Is LSB in r0 clear, but any lower guards bit set?  */
+ 	subc	r0,r1
+ 	mov.l	LOCAL(c__clz_tab),r7
+ 	tst	r2,r1
+ 	mov	#-24,r3
+ 	bf/s LOCAL(norm_r0)
+ 	 mov	r1,r0
+ 	extu.w	r1,r1
+ 	bra	LOCAL(norm_check2)
+ 	 cmp/eq	r0,r1
+ LOCAL(ret_r5):
+ 	rts
+ 	mov	r5,r0
+ LOCAL(ret_stack):
+ 	rts
+ 	mov.l	@r15+,r0
+ 
+ /* We leave the numbers denormalized, but we change the bit position to be
+    consistent with normalized numbers.  This also removes the supurious
+    leading one that was inserted before.  */
+ LOCAL(denorm_r4):
+ 	tst	r7,r7
+ 	add	r0,r0
+ 	bf	LOCAL(denorm_r4_done)
+ 	bra	LOCAL(denorm_r4_done)
+ 	add	r1,r1
+ LOCAL(denorm_r5):
+ 	tst	r6,r6
+ 	add	r1,r1
+ 	bf	LOCAL(denorm_r5_done)
+ 	clrt
+ 	bra	LOCAL(denorm_r5_done)
+ 	add	r0,r0
+ 
+ /* If the exponent differs by two or more, normalization is minimal, and
+    few guard bits are needed for an exact final result, so sticky guard
+    bit compresion before subtraction (or add) works fine.
+    If the exponent differs by one, only one extra guard bit is generated,
+    and effectively no guard bit compression takes place.  */
+ 
+ LOCAL(r4_hs):
+ 	cmp/eq	r4,r0
+ 	shll8	r0
+ 	bt	LOCAL(inf_nan_arg0)
+ 	shll8	r1
+ 	mov	#-24,r2
+ 	tst	r7,r7
+ 	shld	r2,r7
+ 	bt	LOCAL(denorm_r5)
+ LOCAL(denorm_r5_done):
+ 	mov	r1,r3
+ 	shld	r2,r6
+ 	subc	r6,r7
+ 	mov.l	LOCAL(xff000000),r2
+ 	bf	LOCAL(same_exp)
+ 	shld	r7,r1	/* Get 31 upper bits.  */
+ 	add	#31,r7
+ 	mov.l	r4,@-r15 ! push result sign.
+ 	cmp/pl	r7
+ 	shld	r7,r3
+ 	bf	LOCAL(ret_stack)
+ 	div0s	r4,r5
+ 	bf/s	LOCAL(add)
+ 	 cmp/pl	r3	/* Is LSB in r1 clear, but any lower guard bit set?  */
+ 	subc	r1,r0
+ 	mov.l	LOCAL(c__clz_tab),r7
+ LOCAL(norm_check):
+ 	tst	r2,r0
+ 	mov	#-24,r3
+ 	bf LOCAL(norm_r0)
+ 	extu.w	r0,r1
+ 	cmp/eq	r0,r1
+ LOCAL(norm_check2):
+ 	mov	#-8,r3
+ 	bt LOCAL(norm_r0)
+ 	mov	#-16,r3
+ LOCAL(norm_r0):
+ 	mov	r0,r1
+ 	shld	r3,r0
+ #ifdef __pic__
+ 	add	r0,r7
+ 	mova  LOCAL(c__clz_tab),r0
+ #endif
+ 	mov.b	@(r0,r7),r7
+ 	add	#25,r3
+ 	add	#-9+1,r6
+ 	mov	r1,r0
+ 	sub	r7,r3
+ 	mov.l	LOCAL(xbfffffff),r7
+ 	sub	r3,r6	/* generate exp-1  */
+ 	mov.w	LOCAL(d24),r2
+ 	cmp/pz	r6	/* check exp > 0  */
+ 	shld	r3,r0	/* Leading 1 becomes +1 exp adjustment.  */
+ 	bf	LOCAL(zero_denorm)
+ LOCAL(denorm_done):
+ 	add	#30,r3
+ 	shld	r3,r1
+ 	mov.w   LOCAL(m1),r3
+ 	tst	r7,r1	! clear T if rounding up
+ 	shld	r2,r6
+ 	subc	r3,r0	! round - overflow will boost exp adjustment to 2.
+ 	mov.l	@r15+,r2
+ 	add	r6,r0	! overflow will generate inf
+ 	cmp/ge	r2,r3	! get sign into T
+ 	rts
+ 	rotcr	r0
+ LOCAL(ret_r4):
+ 	rts
+ 	mov	r4,r0
+ 
+ /* At worst, we are shifting the number back in place where an incoming
+    denormal was.  Thus, the shifts won't get out of range.  They still
+    might generate a zero fraction, but that's OK, that makes it 0.  */
+ LOCAL(zero_denorm):
+ 	add	r6,r3
+ 	mov	r1,r0
+ 	mov	#0,r6	/* leading one will become free (except for rounding) */
+ 	bra	LOCAL(denorm_done)
+ 	shld	r3,r0
+ 
+ /* Handle abs(r4) >= abs(r5), same exponents specially so we don't need
+    check for a zero fraction in the main path.  */
+ LOCAL(same_exp):
+ 	div0s	r4,r5
+ 	mov.l	r4,@-r15
+ 	bf	LOCAL(add)
+ 	cmp/eq	r1,r0
+ 	mov.l	LOCAL(c__clz_tab),r7
+ 	bf/s	LOCAL(norm_check)
+ 	 sub	r1,r0
+ 	mov.l	@r15+,r1
+ 	cmp/gt	r4,r0	! copy sign
+ 	rts
+ 	rotcr	r0
+ 
+ /* r2: 0xff000000 */
+ LOCAL(add):
+ 	addc	r1,r0
+ 	mov.w	LOCAL(x2ff),r7
+ 	shll8	r6
+ 	bf/s	LOCAL(no_carry)
+ 	shll16	r6
+ 	tst	r7,r0
+ 	shlr8	r0
+ 	mov.l	@r15+,r3	! discard saved sign
+ 	subc	r2,r0
+ 	sett
+ 	addc	r6,r0
+ 	cmp/hs	r2,r0
+ 	bt/s	LOCAL(inf)
+ 	 div0s	r7,r4 /* Copy sign.  */
+ 	rts
+ 	rotcr	r0
+ LOCAL(inf):
+ 	mov	r6,r0
+ 	rts
+ 	rotcr	r0
+ LOCAL(no_carry):
+ 	mov.w	LOCAL(m1),r3
+ 	shll	r0
+ 	bf	LOCAL(denorm_add)
+ 	tst	r7,r0
+ 	shlr8	r0
+ 	mov.l	@r15+,r1	! discard saved sign
+ 	subc	r3,r0	! round ; overflow -> exp++
+ 	cmp/ge	r4,r3	/* Copy sign.  */
+ 	add	r6,r0	! overflow -> inf
+ 	rts
+ 	rotcr	r0
+ 
+ LOCAL(denorm_add):
+ 	shlr	r0
+ 	cmp/ge	r4,r3	/* Copy sign.  */
+ 	shlr8	r0
+ 	mov.l	@r15+,r1	! discard saved sign
+ 	rts
+ 	rotcr	r0
+ 
+ LOCAL(inf_nan_arg0):
+ 	cmp/eq	r5,r1
+ 	bf	LOCAL(ret_r4)
+ 	div0s	r4,r5		/* Both are inf or NaN, check signs.  */
+ 	bt	LOCAL(ret_nan)	/* inf - inf, or NaN.  */
+ 	mov	r4,r0		! same sign; return NaN if either is NaN.
+ 	rts
+ 	or	r5,r0
+ LOCAL(ret_nan):
+ 	rts
+ 	mov	#-1,r0
+ 
+ LOCAL(d24):
+ 	.word	24
+ LOCAL(x2ff):
+ 	.word	0x2ff
+ LOCAL(m1):
+ 	.word	-1
+ 	.balign	4
+ LOCAL(x7f800000):
+ 	.long	0x7f800000
+ LOCAL(xbfffffff):
+ 	.long	0xbfffffff
+ LOCAL(xff000000):
+ 	.long	0xff000000
+ LOCAL(xfe000000):
+ 	.long	0xfe000000
+ LOCAL(c__clz_tab):
+ #ifdef __pic__
+ 	.long	GLOBAL(clz_tab) - .
+ #else
+ 	.long	GLOBAL(clz_tab)
+ #endif
+ 
+ 	ENDFUNC(GLOBAL(addsf3_))
+ 	ENDFUNC(GLOBAL(subsf3_))
+ #endif /* L_add_sub_sf3 */
+ 
+ #ifdef L_mulsf3
+ 	.global GLOBAL(mulsf3_)
+ 	FUNC(GLOBAL(mulsf3_))
+ GLOBAL(mulsf3_):
+ 	mov.l	LOCAL(x7f800000),r1
+ 	not	r4,r2
+ 	mov	r4,r3
+ 	not	r5,r0
+ 	tst	r1,r2
+ 	or	r1,r3
+ 	bt/s	LOCAL(inf_nan_arg0)
+ 	 tst	r1,r0
+ 	bt	LOCAL(inf_nan_arg1)
+ 	tst	r1,r5
+ 	mov	r1,r2
+ 	shll8	r3
+ 	or	r5,r1
+ 	bt/s	LOCAL(zero_denorm_arg1)
+ 	 shll8	r1
+ 	tst	r2,r4
+ 	bt	LOCAL(zero_denorm_arg0)
+ 	dmulu.l	r3,r1
+ 	mov	r4,r0
+ 	and	r2,r0
+ LOCAL(arg_norm):
+ 	and	r5,r2
+ 	mov.l	LOCAL(x3f800000),r3
+ 	sts	mach,r1
+ 	sub	r3,r0
+ 	sts	macl,r3
+ 	add	r2,r0
+ 	cmp/pz	r1
+ 	mov.w	LOCAL(x100),r2
+ 	bf/s	LOCAL(norm_frac)
+ 	 tst	r3,r3
+ 	shll2	r1	/* Shift one up, replace leading 1 with 0.  */
+ 	shlr	r1
+ 	tst	r3,r3
+ LOCAL(norm_frac):
+ 	mov.w	LOCAL(mx80),r3
+ 	bf	LOCAL(round_frac)
+ 	tst	r2,r1
+ LOCAL(round_frac):
+ 	mov.l	LOCAL(xff000000),r2
+ 	subc	r3,r1	/* Even overflow gives right result: exp++, frac=0.  */
+ 	shlr8	r1
+ 	add	r1,r0
+ 	shll	r0
+ 	bt	LOCAL(ill_exp)
+ 	tst	r2,r0
+ 	bt	LOCAL(denorm0)
+ 	cmp/hs	r2,r0
+ 	bt	LOCAL(inf)
+ LOCAL(insert_sign):
+ 	div0s	r4,r5
+ 	rts
+ 	rotcr	r0
+ LOCAL(denorm0):
+ 	sub	r2,r0
+ 	bra	LOCAL(insert_sign)
+ 	 shlr	r0
+ LOCAL(zero_denorm_arg1):
+ 	mov.l	LOCAL(x60000000),r2	/* Check exp0 >= -64	*/
+ 	add	r1,r1
+ 	tst	r1,r1	/* arg1 == 0 ? */
+ 	mov	#0,r0
+ 	bt	LOCAL(insert_sign) /* argument 1 is zero ==> return 0  */
+ 	tst	r4,r2
+ 	bt	LOCAL(insert_sign) /* exp0 < -64  ==> return 0 */
+ 	mov.l	LOCAL(c__clz_tab),r0
+ 	mov	r3,r2
+ 	mov	r1,r3
+ 	bra	LOCAL(arg_normalize)
+ 	mov	r2,r1
+ LOCAL(zero_denorm_arg0):
+ 	mov.l	LOCAL(x60000000),r2	/* Check exp1 >= -64	*/
+ 	add	r3,r3
+ 	tst	r3,r3	/* arg0 == 0 ? */
+ 	mov	#0,r0
+ 	bt	LOCAL(insert_sign) /* argument 0 is zero ==> return 0  */
+ 	tst	r5,r2
+ 	bt	LOCAL(insert_sign) /* exp1 < -64  ==> return 0 */
+ 	mov.l	LOCAL(c__clz_tab),r0
+ LOCAL(arg_normalize):
+ 	mov.l	r7,@-r15
+ 	extu.w	r3,r7
+ 	cmp/eq	r3,r7
+ 	mov.l	LOCAL(xff000000),r7
+ 	mov	#-8,r2
+ 	bt	0f
+ 	tst	r7,r3
+ 	mov	#-16,r2
+ 	bt	0f
+ 	mov	#-24,r2
+ 0:
+ 	mov	r3,r7
+ 	shld	r2,r7
+ #ifdef __pic__
+ 	add	r0,r7
+ 	mova  LOCAL(c__clz_tab),r0
+ #endif
+ 	mov.b	@(r0,r7),r0
+ 	add	#32,r2
+ 	mov	r2,r7
+ 	mov	#23,r2
+ 	sub	r0,r7
+ 	mov.l	LOCAL(x7f800000),r0
+ 	shld	r7,r3
+ 	shld	r2,r7
+ 	mov	r0,r2
+ 	and	r4,r0
+ 	sub	r7,r0
+ 	mov.l	@r15+,r7
+ 	bra	LOCAL(arg_norm)
+ 	 dmulu.l	r3,r1
+ #if 0 /* This is slightly slower, but could be used if table lookup causes
+          cache thrashing.  */
+ 	bt	LOCAL(insert_sign) /* exp1 < -64  ==> return 0 */
+ 	mov.l	LOCAL(xff000000),r2
+ 	mov	r4,r0
+ LOCAL(arg_normalize):
+ 	tst	r2,r3
+ 	bf	LOCAL(arg_bit_norm)
+ LOCAL(arg_byte_loop):
+ 	tst	r2,r3
+ 	add	r2,r0
+ 	shll8	r3
+ 	bt	LOCAL(arg_byte_loop)
+ 	add	r4,r0
+ LOCAL(arg_bit_norm):
+ 	mov.l	LOCAL(x7f800000),r2
+ 	rotl	r3
+ LOCAL(arg_bit_loop):
+ 	add	r2,r0
+ 	bf/s	LOCAL(arg_bit_loop)
+ 	 rotl	r3
+ 	rotr	r3
+ 	rotr	r3
+ 	sub	r2,r0
+ 	bra	LOCAL(arg_norm)
+ 	 dmulu.l	r3,r1
+ #endif /* 0 */
+ LOCAL(inf):
+ 	bra	LOCAL(insert_sign)
+ 	 mov	r2,r0
+ LOCAL(inf_nan_arg0):
+ 	bt	LOCAL(inf_nan_both)
+ 	add	r0,r0
+ 	cmp/eq	#-1,r0	/* arg1 zero? -> NAN */
+ 	bt	LOCAL(insert_sign)
+ 	mov	r4,r0
+ LOCAL(inf_insert_sign):
+ 	bra	LOCAL(insert_sign)
+ 	 add	r0,r0
+ LOCAL(inf_nan_both):
+ 	mov	r4,r0
+ 	bra	LOCAL(inf_insert_sign)
+ 	 or	r5,r0
+ LOCAL(inf_nan_arg1):
+ 	mov	r2,r0
+ 	add	r0,r0
+ 	cmp/eq	#-1,r0	/* arg0 zero? */
+ 	bt	LOCAL(insert_sign)
+ 	bra	LOCAL(inf_insert_sign)
+ 	 mov	r5,r0
+ LOCAL(ill_exp):
+ 	cmp/pz	r0
+ 	mov	#-24,r3
+ 	bt	LOCAL(inf)
+ 	add	r1,r1
+ 	mov	r0,r2
+ 	sub	r1,r2	! remove fraction to get back pre-rounding exponent.
+ 	sts	mach,r0
+ 	sts	macl,r1
+ 	shad	r3,r2
+ 	mov	r0,r3
+ 	shld	r2,r0
+ 	add	#32,r2
+ 	cmp/pz	r2
+ 	shld	r2,r3
+ 	bf	LOCAL(zero)
+ 	or	r1,r3
+ 	mov	#-1,r1
+ 	tst	r3,r3
+ 	mov.w	LOCAL(x100),r3
+ 	bf/s	LOCAL(denorm_round_up)
+ 	mov	#-0x80,r1
+ 	tst	r3,r0
+ LOCAL(denorm_round_up):
+ 	mov	#-7,r3
+ 	subc	r1,r0
+ 	bra	LOCAL(insert_sign)
+ 	 shld	r3,r0
+ LOCAL(zero):
+ 	bra	LOCAL(insert_sign)
+ 	 mov #0,r0
+ LOCAL(x100):
+ 	.word	0x100
+ LOCAL(mx80):
+ 	.word	-0x80
+ 	.balign	4
+ LOCAL(x7f800000):
+ 	.long 0x7f800000
+ LOCAL(x3f800000):
+ 	.long 0x3f800000
+ LOCAL(xff000000):
+ 	.long	0xff000000
+ LOCAL(x60000000):
+ 	.long	0x60000000
+ LOCAL(c__clz_tab):
+ #ifdef __pic__
+ 	.long	GLOBAL(clz_tab) - .
+ #else
+ 	.long	GLOBAL(clz_tab)
+ #endif
+ 	ENDFUNC(GLOBAL(mulsf3_))
+ #endif /* L_mulsf3 */
+ 
+ #ifdef L_hypotf
+ 	.global GLOBAL(hypotf)
+ 	FUNC(GLOBAL(hypotf))
+ GLOBAL(hypotf):
+ /* This integer implementation takes 71 to 72 cycles in the main path.
+    This is a bit slower than the SH4 can do this computation using double
+    precision hardware floating point - 57 cycles, or 69 with mode switches.  */
+  /* First, calculate x (r4) as the sum of the square of the fractions -
+     the exponent is calculated separately in r3.
+     Then, alculate sqrt(x) for the fraction by reciproot iteration.
+     We get an 7.5 bit inital value using linear approximation with two slopes
+     that are powers of two.
+     x (- [1. .. 2.)  y0 := 1.25 - x/4 - tab(x)   y (- (0.8 .. 1.0)
+     x (- [2. .. 4.)  y0 := 1.   - x/8 - tab(x)   y (- (0.5 .. 0.8)
+  x is represented with two bits before the point,
+  y with 0 bits before the binary point.
+  Thus, to calculate y0 := 1. - x/8 - tab(x), all you have to do is to shift x
+  right by 1, negate it, and subtract tab(x).  */
+ 
+  /* y1 := 1.5*y0 - 0.5 * (x * y0) * (y0 * y0)
+     z0 := x * y1
+     z1 := z0 + 0.5 * (y1 - (y1*y1) * z0) */
+ 
+ 	mov.l	LOCAL(xff000000),r1
+ 	add	r4,r4
+ 	mov	r4,r0
+ 	add	r5,r5
+ 	cmp/hs	r5,r4
+ 	sub	r5,r0
+ 	mov	#-24,r2
+ 	bf/s	LOCAL(r5_large)
+ 	shad	r2,r0
+ 	mov	r4,r3
+ 	shll8	r4
+ 	rotcr	r4
+ 	tst	#0xe0,r0
+ 	neg	r0,r0
+ 	bt	LOCAL(ret_abs_r3)
+ 	tst	r1,r5
+ 	shll8	r5
+ 	bt/s	LOCAL(denorm_r5)
+ 	cmp/hi	r3,r1
+ 	dmulu.l	r4,r4
+ 	bf	LOCAL(inf_nan)
+ 	rotcr	r5
+ 	shld	r0,r5
+ LOCAL(denorm_r5_done):
+ 	sts	mach,r4
+ 	dmulu.l	r5,r5
+ 	mov.l	r6,@-r15
+ 	mov	#20,r6
+ 
+ 	sts	mach,r5
+ LOCAL(add_frac):
+ 	mova	LOCAL(tab)-32,r0
+ 	mov.l	r7,@-r15
+ 	mov.w	LOCAL(x1380),r7
+ 	and	r1,r3
+ 	addc	r5,r4
+ 	mov.w	LOCAL(m25),r2	! -25
+ 	bf	LOCAL(frac_ok)
+ 	sub	r1,r3
+ 	rotcr	r4
+ 	cmp/eq	r1,r3	! did we generate infinity ?
+ 	bt	LOCAL(inf_nan)
+ 	shlr	r4
+ 	mov	r4,r1
+ 	shld	r2,r1
+ 	mov.b	@(r0,r1),r0
+ 	mov	r4,r1
+ 	shld	r6,r1
+ 	bra	LOCAL(frac_low2)
+ 	sub	r1,r7
+ 
+ LOCAL(frac_ok):
+ 	mov	r4,r1
+ 	shld	r2,r1
+ 	mov.b	@(r0,r1),r1
+ 	cmp/pz	r4
+ 	mov	r4,r0
+ 	bt/s	LOCAL(frac_low)
+ 	shld	r6,r0
+ 	mov.w	LOCAL(xf80),r7
+ 	shlr	r0
+ LOCAL(frac_low):
+ 	sub	r0,r7
+ LOCAL(frac_low2):
+ 	mov.l	LOCAL(x40000080),r0 ! avoid denorm results near 1. << r3
+ 	sub	r1,r7	! {0.12}
+ 	mov.l	LOCAL(xfffe0000),r5 ! avoid rounding overflow near 4. << r3
+ 	swap.w	r7,r1	! {0.28}
+ 	dmulu.l	r1,r4 /* two issue cycles */
+ 	mulu.w	r7,r7  /* two issue cycles */
+ 	sts	mach,r2	! {0.26}
+ 	mov	r1,r7
+ 	shlr	r1
+ 	sts	macl,r6	! {0.24}
+ 	cmp/hi	r0,r4
+ 	shlr2	r2
+ 	bf	LOCAL(near_one)
+ 	shlr	r2	! {0.23} systemic error of linear approximation keeps y1 < 1
+ 	dmulu.l	r2,r6
+ 	cmp/hs	r5,r4
+ 	add	r7,r1	! {1.28}
+ 	bt	LOCAL(near_four)
+ 	shlr2	r1	! {1.26}
+ 	sts	mach,r0	! {0.15} x*y0^3 == {0.16} 0.5*x*y0^3
+ 	shlr2	r1	! {1.24}
+ 	shlr8	r1	! {1.16}
+ 	sett		! compensate for truncation of subtrahend, keep y1 < 1
+ 	subc	r0,r1   ! {0.16} y1;  max error about 3.5 ulp
+ 	swap.w	r1,r0
+ 	dmulu.l	r0,r4	! { 1.30 }
+ 	mulu.w	r1,r1
+ 	sts	mach,r2
+ 	shlr2	r0
+ 	sts	macl,r1
+ 	add	r2,r0
+ 	mov.l	LOCAL(xff000000),r6
+ 	add	r2,r0
+ 	dmulu.l	r1,r2
+ 	add	#127,r0
+ 	add	r6,r3	! precompensation for adding leading 1
+ 	sts	mach,r1
+ 	shlr	r3
+ 	mov.l	@r15+,r7
+ 	sub	r1,r0	! {0.31} max error about 50 ulp (+127)
+ 	mov.l	@r15+,r6
+ 	shlr8	r0	! {0.23} max error about 0.7 ulp
+ 	rts
+ 	add	r3,r0
+ 	
+ LOCAL(r5_large):
+ 	mov	r5,r3
+ 	mov	#-31,r2
+ 	cmp/ge	r2,r0
+ 	shll8	r5
+ 	bf	LOCAL(ret_abs_r3)
+ 	rotcr	r5
+ 	tst	r1,r4
+ 	shll8	r4
+ 	bt/s	LOCAL(denorm_r4)
+ 	cmp/hi	r3,r1
+ 	dmulu.l	r5,r5
+ 	bf	LOCAL(inf_nan)
+ 	rotcr	r4
+ LOCAL(denorm_r4_done):
+ 	shld	r0,r4
+ 	sts	mach,r5
+ 	dmulu.l	r4,r4
+ 	mov.l	r6,@-r15
+ 	mov	#20,r6
+ 	bra	LOCAL(add_frac)
+ 	sts	mach,r4
+ 
+ LOCAL(near_one):
+ 	bra	LOCAL(assemble_sqrt)
+ 	mov	#0,r0
+ LOCAL(near_four):
+ 	! exact round-to-nearest would add 255.  We add 256 for speed & compactness.
+ 	mov	r4,r0
+ 	shlr8	r0
+ 	add	#1,r0
+ 	tst	r0,r0
+ 	addc	r0,r3	! might generate infinity.
+ LOCAL(assemble_sqrt):
+ 	mov.l	@r15+,r7
+ 	shlr	r3
+ 	mov.l	@r15+,r6
+ 	rts
+ 	add	r3,r0
+ LOCAL(inf_nan):
+ LOCAL(ret_abs_r3):
+ 	mov	r3,r0
+ 	rts
+ 	shlr	r0
+ LOCAL(denorm_r5):
+ 	bf	LOCAL(inf_nan)
+ 	tst	r1,r4
+ 	bt	LOCAL(denorm_both)
+ 	dmulu.l	r4,r4
+ 	bra	LOCAL(denorm_r5_done)
+ 	shld	r0,r5
+ LOCAL(denorm_r4):
+ 	bf	LOCAL(inf_nan)
+ 	tst	r1,r5
+ 	dmulu.l	r5,r5
+ 	bf	LOCAL(denorm_r4_done)
+ LOCAL(denorm_both):	! normalize according to r3.
+ 	extu.w	r3,r2
+ 	mov.l	LOCAL(c__clz_tab),r0
+ 	cmp/eq	r3,r2
+ 	mov	#-8,r2
+ 	bt	0f
+ 	tst	r1,r3
+ 	mov	#-16,r2
+ 	bt	0f
+ 	mov	#-24,r2
+ 0:
+ 	shld	r2,r3
+ 	mov.l	r7,@-r15
+ #ifdef __pic__
+ 	add	r0,r3
+ 	mova	 LOCAL(c__clz_tab),r0
+ #endif
+ 	mov.b	@(r0,r3),r0
+ 	add	#32,r2
+ 	sub	r0,r2
+ 	shld	r2,r4
+ 	mov	r2,r7
+ 	dmulu.l	r4,r4
+ 	sts.l	pr,@-r15
+ 	mov	#1,r3
+ 	bsr	LOCAL(denorm_r5_done)
+ 	shld	r2,r5
+ 	mov.l	LOCAL(x01000000),r1
+ 	neg	r7,r2
+ 	lds.l	@r15+,pr
+ 	tst	r1,r0
+ 	mov.l	@r15+,r7
+ 	bt	0f
+ 	add	#1,r2
+ 	sub	r1,r0
+ 0:
+ 	rts
+ 	shld	r2,r0
+ 
+ LOCAL(m25):
+ 	.word	-25
+ LOCAL(x1380):
+ 	.word	0x1380
+ LOCAL(xf80):
+ 	.word	0xf80
+ 	.balign	4
+ LOCAL(xff000000):
+ 	.long	0xff000000
+ LOCAL(x40000080):
+ 	.long	0x40000080
+ LOCAL(xfffe0000):
+ 	.long	0xfffe0000
+ LOCAL(x01000000):
+ 	.long	0x01000000
+ LOCAL(c__clz_tab):
+ #ifdef __pic__
+ 	.long	GLOBAL(clz_tab) - .
+ #else
+ 	.long	GLOBAL(clz_tab)
+ #endif
+ 
+ /*
+ double err(double x)
+ {
+   return (x < 2. ? 1.25 - x/4. : 1. - x/8.) - 1./sqrt(x);
+ }
+ 
+ int
+ main ()
+ {
+   int i = 0;
+   double x, s, v;
+   double lx, hx;
+ 
+   s = 1./32.;
+   for (x = 1.; x < 4; x += s, i++)
+     {
+       lx = x;
+       hx = x + s - 1. / (1 << 30);
+       v = 0.5 * (err (lx) + err (hx));
+       printf ("%s% 4d%c",
+               (i & 7) == 0 ? "\t.byte\t" : "",
+               (int)(v * 4096 + 0.5) - 128,
+               (i & 7) == 7 ? '\n' : ',');
+     }
+   return 0;
+ } */
+ 
+ 	.balign	4
+ LOCAL(tab):
+ 	.byte	-113, -84, -57, -33, -11,   8,  26,  41
+ 	.byte	  55,  67,  78,  87,  94, 101, 106, 110
+ 	.byte	 113, 115, 115, 115, 114, 112, 109, 106
+ 	.byte	 101,  96,  91,  84,  77,  69,  61,  52
+ 	.byte	  51,  57,  63,  68,  72,  77,  80,  84
+ 	.byte	  87,  89,  91,  93,  95,  96,  97,  97
+ 	.byte	  97,  97,  97,  96,  95,  94,  93,  91
+ 	.byte	  89,  87,  84,  82,  79,  76,  72,  69
+ 	.byte	  65,  61,  57,  53,  49,  44,  39,  34
+ 	.byte	  29,  24,  19,  13,   8,   2,  -4, -10
+ 	.byte	 -17, -23, -29, -36, -43, -50, -57, -64
+ 	.byte	 -71, -78, -85, -93,-101,-108,-116,-124
+ 	ENDFUNC(GLOBAL(hypotf))
+ #endif /* L_hypotf */
+ #endif /* DYN_SHIFT */
Index: sh-modes.def
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/sh-modes.def,v
retrieving revision 1.1
diff -p -r1.1 sh-modes.def
*** sh-modes.def	13 Oct 2003 21:16:32 -0000	1.1
--- sh-modes.def	2 Aug 2004 03:57:36 -0000
***************
*** 1,5 ****
! /* Alpha extra machine modes. 
!    Copyright (C) 2003 Free Software Foundation, Inc.
  
  This file is part of GCC.
  
--- 1,5 ----
! /* SH extra machine modes. 
!    Copyright (C) 2004 Free Software Foundation, Inc.
  
  This file is part of GCC.
  
*************** Boston, MA 02111-1307, USA.  */
*** 21,23 ****
--- 21,27 ----
  /* The SH uses a partial integer mode to represent the FPSCR register.  */
  PARTIAL_INT_MODE (SI);
  
+ /* For software floating point comparisons.  */
+ CC_MODE (CC_FP_NE);
+ CC_MODE (CC_FP_GT);
+ CC_MODE (CC_FP_UNLT);
Index: sh-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/sh-protos.h,v
retrieving revision 1.55
diff -p -r1.55 sh-protos.h
*** sh-protos.h	10 May 2004 23:25:13 -0000	1.55
--- sh-protos.h	2 Aug 2004 03:57:36 -0000
*************** extern void expand_sf_binop (rtx (*)(rtx
*** 93,98 ****
--- 93,102 ----
  extern void expand_df_unop (rtx (*)(rtx, rtx, rtx), rtx *);
  extern void expand_df_binop (rtx (*)(rtx, rtx, rtx, rtx), rtx *);
  extern void expand_fp_branch (rtx (*)(void), rtx (*)(void));
+ extern void expand_sfunc_unop (enum machine_mode, rtx (*) (rtx, rtx),
+ 			       const char *, enum rtx_code code, rtx *);
+ extern void expand_sfunc_binop (enum machine_mode, rtx (*) (rtx, rtx),
+ 				const char *, enum rtx_code code, rtx *);
  extern int sh_insn_length_adjustment (rtx);
  extern int sh_can_redirect_branch (rtx, rtx);
  extern void sh_expand_unop_v2sf (enum rtx_code, rtx, rtx);
Index: sh.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/sh.c,v
retrieving revision 1.270.2.3
diff -p -r1.270.2.3 sh.c
*** sh.c	29 Jun 2004 17:33:57 -0000	1.270.2.3
--- sh.c	2 Aug 2004 03:57:38 -0000
*************** prepare_scc_operands (enum rtx_code code
*** 1061,1066 ****
--- 1061,1128 ----
    return t_reg;
  }
  
+ static rtx
+ sh_soft_fp_cmp (int code, enum machine_mode op_mode)
+ {
+   const char *name;
+   rtx (*fun) (rtx, rtx), addr, tmp, first, last, equiv;
+   int df = op_mode == DFmode;
+   enum machine_mode mode;
+ 
+   if (flag_finite_math_only && ! df)
+     switch (code)
+       {
+       case EQ:
+ 	return gen_cmpeqsf_i1_finite (sh_compare_op0, sh_compare_op1);
+       case LE:
+       case UNLE:
+ 	return gen_cmplesf_i1_finite (sh_compare_op0, sh_compare_op1);
+       case GE:
+       case UNGE:
+ 	return gen_cmplesf_i1_finite (sh_compare_op1, sh_compare_op0);
+       default:
+ 	break;
+       }
+   if (flag_finite_math_only && df && code == EQ)
+     return gen_cmpeqdf_i1_finite (sh_compare_op0, sh_compare_op1);
+ 
+   switch (code)
+     {
+     case EQ:
+       name = df ? "__nedf2_" : "__nesf2_";
+       fun = df ? gen_cmpnedf_i1 : gen_cmpnesf_i1;
+       mode = CC_FP_NEmode;
+       break;
+     case UNLE:
+       name = df ? "__gtdf2t" : "__gtsf2t";
+       fun = df ? gen_cmpgtdf_i1 : gen_cmpgtsf_i1;
+       mode = CC_FP_GTmode;
+       break;
+     case GE:
+       name = df ? "__gedf2f" : "__gesf2f";
+       fun = df ? gen_cmpunltdf_i1 : gen_cmpunltsf_i1;
+       mode = CC_FP_UNLTmode;
+       break;
+     default: abort ();
+     }
+   tmp = gen_reg_rtx (mode);
+   addr = force_reg (Pmode, function_symbol (name));
+   first = emit_move_insn (gen_rtx_REG (op_mode, R4_REG), sh_compare_op0);
+   emit_move_insn (gen_rtx_REG (op_mode, R5_REG + df), sh_compare_op1);
+   last = emit_insn (fun (tmp, addr));
+   equiv = gen_rtx_fmt_ee (COMPARE, mode, sh_compare_op0, sh_compare_op1);
+   REG_NOTES (last) = gen_rtx_EXPR_LIST (REG_EQUAL, equiv, REG_NOTES (last));
+   /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
+      invariant code motion can move it.  */
+   REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
+   REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
+   /* Use fpcmp_i1 rather than cmpeqsi_t, so that the optimizers can grok
+      the computation.  */
+   return gen_rtx_SET (VOIDmode,
+ 		      gen_rtx_REG (SImode, T_REG),
+ 		      gen_rtx_fmt_ee (code, SImode, tmp, CONST0_RTX (mode)));
+ }
+ 
  /* Called from the md file, set up the operands of a compare instruction.  */
  
  void
*************** from_compare (rtx *operands, int code)
*** 1081,1091 ****
  	  || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
  	sh_compare_op1 = force_reg (mode, sh_compare_op1);
      }
!   if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
      {
        from_compare (operands, GT);
        insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
      }
    else
      insn = gen_rtx_SET (VOIDmode,
  			gen_rtx_REG (SImode, T_REG),
--- 1143,1158 ----
  	  || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
  	sh_compare_op1 = force_reg (mode, sh_compare_op1);
      }
!   if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_SH2E && code == GE
!       && (mode == SFmode || TARGET_SH4))
      {
        from_compare (operands, GT);
        insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
      }
+   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
+ 	   && ! TARGET_SH4 && TARGET_SH1
+ 	   && (mode == DFmode || ! TARGET_SH2E))
+     insn = sh_soft_fp_cmp (code, mode);
    else
      insn = gen_rtx_SET (VOIDmode,
  			gen_rtx_REG (SImode, T_REG),
*************** equality_comparison_operator (rtx op, en
*** 7582,7588 ****
  int
  greater_comparison_operator (rtx op, enum machine_mode mode)
  {
!   if (mode != VOIDmode && GET_MODE (op) == mode)
      return 0;
    switch (GET_CODE (op))
      {
--- 7649,7655 ----
  int
  greater_comparison_operator (rtx op, enum machine_mode mode)
  {
!   if (mode != VOIDmode && GET_MODE (op) != mode)
      return 0;
    switch (GET_CODE (op))
      {
*************** greater_comparison_operator (rtx op, enu
*** 7599,7605 ****
  int
  less_comparison_operator (rtx op, enum machine_mode mode)
  {
!   if (mode != VOIDmode && GET_MODE (op) == mode)
      return 0;
    switch (GET_CODE (op))
      {
--- 7666,7672 ----
  int
  less_comparison_operator (rtx op, enum machine_mode mode)
  {
!   if (mode != VOIDmode && GET_MODE (op) != mode)
      return 0;
    switch (GET_CODE (op))
      {
*************** less_comparison_operator (rtx op, enum m
*** 7613,7618 ****
--- 7680,7716 ----
      }
  }
  
+ int
+ soft_fp_comparison_operator (rtx op, enum machine_mode mode)
+ {
+   if (mode != VOIDmode && GET_MODE (op) != mode)
+     return 0;
+   switch (GET_CODE (op))
+     {
+     default:
+       return 0;
+     case EQ:	mode = CC_FP_NEmode;	break;
+     case UNLE:	mode = CC_FP_GTmode;	break;
+     case GE:	mode = CC_FP_UNLTmode;	break;
+     }
+   return register_operand (XEXP (op, 0), mode);
+ }
+ 
+ int
+ soft_fp_comparison_operand (rtx op, enum machine_mode mode)
+ {
+   switch (GET_MODE (op))
+     {
+       default:
+ 	return 0;
+       case CC_FP_NEmode: case CC_FP_GTmode: case CC_FP_UNLTmode:
+ 	break;
+     }
+   if (mode == SFmode && TARGET_SH2E)
+     return 0;
+   return register_operand (op, mode);
+ }
+ 
  /* Accept pseudos and branch target registers.  */
  int
  target_reg_operand (rtx op, enum machine_mode mode)
*************** expand_df_binop (rtx (*fun) (rtx, rtx, r
*** 7946,7951 ****
--- 8044,8097 ----
    emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
  			get_fpscr_rtx ()));
  }
+ 
+ /* Expand an sfunc operation taking NARGS MODE arguments, using generator
+    function FUN, which needs symbol NAME loaded int a register first.
+    Add a REG_EQUAL note using EQUIV.  */
+ static void
+ expand_sfunc_op (int nargs, enum machine_mode mode, rtx (*fun) (rtx, rtx),
+ 		    const char *name, rtx equiv, rtx *operands)
+ {
+   int next_reg = FIRST_PARM_REG, i;
+   rtx addr, first = NULL_RTX, last, insn;
+ 
+   addr = force_reg (Pmode, function_symbol (name));
+   for ( i = 1; i <= nargs; i++)
+     {
+       insn = emit_move_insn (gen_rtx_REG (mode, next_reg), operands[i]);
+       if (!first)
+ 	first = insn;
+       next_reg += GET_MODE_SIZE (mode) / UNITS_PER_WORD;
+     }
+   last = emit_insn ((*fun) (operands[0], addr));
+   REG_NOTES (last) = gen_rtx_EXPR_LIST (REG_EQUAL, equiv, REG_NOTES (last));
+   /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
+      invariant code motion can move it.  */
+   REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
+   REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
+ }
+ 
+ /* Expand an sfunc unary operation taking an MODE argument, using generator
+    function FUN, which needs symbol NAME loaded int a register first.
+    Add a REG_EQUAL note using CODE.  */
+ void
+ expand_sfunc_unop (enum machine_mode mode, rtx (*fun) (rtx, rtx),
+ 		   const char *name, enum rtx_code code, rtx *operands)
+ {
+   rtx equiv = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
+   expand_sfunc_op (1, mode, fun, name, equiv, operands);
+ }
+ 
+ /* Expand an sfunc binary operation in MODE, using generator function FUN,
+    which needs symbol NAME loaded int a register first.
+    Add a REG_EQUAL note using CODE.  */
+ void
+ expand_sfunc_binop (enum machine_mode mode, rtx (*fun) (rtx, rtx),
+ 		    const char *name, enum rtx_code code, rtx *operands)
+ {
+   rtx equiv = gen_rtx_fmt_ee (code, mode, operands[1], operands[2]);
+   expand_sfunc_op (2, mode, fun, name, equiv, operands);
+ }
  
  /* ??? gcc does flow analysis strictly after common subexpression
     elimination.  As a result, common subexpression elimination fails
Index: sh.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/sh.h,v
retrieving revision 1.240.2.4
diff -p -r1.240.2.4 sh.h
*** sh.h	29 Jun 2004 17:33:57 -0000	1.240.2.4
--- sh.h	2 Aug 2004 03:57:40 -0000
*************** extern int rtx_equal_function_value_matt
*** 3295,3300 ****
--- 3295,3302 ----
    {"noncommutative_float_operator", {MINUS, DIV}},			\
    {"shmedia_6bit_operand", {SUBREG, REG, CONST_INT}},			\
    {"sh_register_operand", {REG, SUBREG, CONST_INT}},			\
+   {"soft_fp_comparison_operand", {SUBREG, REG}},			\
+   {"soft_fp_comparison_operator", {EQ, UNLE, GE}},			\
    {"target_reg_operand", {SUBREG, REG}},				\
    {"target_operand", {SUBREG, REG, LABEL_REF, SYMBOL_REF, CONST, UNSPEC}},\
    {"trunc_hi_operand", {SUBREG, REG, TRUNCATE}},			\
*************** extern int rtx_equal_function_value_matt
*** 3308,3313 ****
--- 3310,3316 ----
  #define SPECIAL_MODE_PREDICATES \
    "any_register_operand", \
    "int_gpr_dest", \
+   "soft_fp_comparison_operand" \
    "trunc_hi_operand", \
    /* This line intentionally left blank.  */
  
Index: sh.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/sh.md,v
retrieving revision 1.172.2.1
diff -p -r1.172.2.1 sh.md
*** sh.md	8 Jun 2004 16:55:33 -0000	1.172.2.1
--- sh.md	2 Aug 2004 03:57:41 -0000
***************
*** 152,157 ****
--- 152,167 ----
    (UNSPECV_CONST8	6)
    (UNSPECV_WINDOW_END	10)
    (UNSPECV_CONST_END	11)
+ 
+   ;; NaN handling for software floating point:
+   ;; We require one bit specific for a precision to be set in all NaNs,
+   ;; so that we can test them with a not / tst sequence.
+   ;; ??? Ironically, this is the quiet bit for now, because that is the
+   ;; only bit set by __builtin_nan ("").
+   ;; ??? Should really use one bit lower and force it set by using
+   ;; a custom encoding function.
+   (SF_NAN_MASK		0x7fc00000)
+   (DF_NAN_MASK		0x7ff80000)
  ])
  
  ;; -------------------------------------------------------------------------
***************
*** 660,665 ****
--- 670,683 ----
  	cmp/eq	%1,%0"
     [(set_attr "type" "mt_group")])
  
+ (define_insn "fpcmp_i1"
+   [(set (reg:SI T_REG)
+ 	(match_operator:SI 1 "soft_fp_comparison_operator"
+ 	  [(match_operand 0 "soft_fp_comparison_operand" "r") (const_int 0)]))]
+   "TARGET_SH1 && !TARGET_SH4"
+   "tst	%0,%0"
+    [(set_attr "type" "mt_group")])
+ 
  (define_insn "cmpgtsi_t"
    [(set (reg:SI T_REG)
  	(gt:SI (match_operand:SI 0 "arith_reg_operand" "r,r")
***************
*** 5272,5277 ****
--- 5290,5303 ----
        DONE;
      }
  
+   if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT
+       && TARGET_SH1 && !TARGET_SH4
+       && (!TARGET_SH2E || GET_MODE (sh_compare_op0) == DFmode))
+     {
+       from_compare (operands, UNLE);
+       emit_jump_insn (gen_branch_false (operands[0]));
+       DONE;
+     }
    from_compare (operands, GT);
  }")
  
***************
*** 5308,5317 ****
        rtx tmp = sh_compare_op0;
        sh_compare_op0 = sh_compare_op1;
        sh_compare_op1 = tmp;
!       emit_insn (gen_bgt (operands[0]));
!       DONE;
      }
!   from_compare (operands, GE);
  }")
  
  (define_expand "ble"
--- 5334,5348 ----
        rtx tmp = sh_compare_op0;
        sh_compare_op0 = sh_compare_op1;
        sh_compare_op1 = tmp;
!       if (TARGET_SH4 || (TARGET_SH2E && GET_MODE (sh_compare_op0) == SFmode))
! 	{
! 	  emit_insn (gen_bgt (operands[0]));
! 	  DONE;
! 	}
!       from_compare (operands, UNLE);
      }
!   else
!     from_compare (operands, GE);
  }")
  
  (define_expand "ble"
***************
*** 5342,5350 ****
        DONE;
      }
  
!   if (TARGET_SH2E
!       && TARGET_IEEE
!       && GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
      {
        rtx tmp = sh_compare_op0;
        sh_compare_op0 = sh_compare_op1;
--- 5373,5381 ----
        DONE;
      }
  
!   if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT
!       && (!TARGET_SH2E || TARGET_IEEE
! 	  || (!TARGET_SH4 && GET_MODE (sh_compare_op0) == DFmode)))
      {
        rtx tmp = sh_compare_op0;
        sh_compare_op0 = sh_compare_op1;
***************
*** 5383,5391 ****
        DONE;
      }
  
!   if (TARGET_SH2E
!       && ! TARGET_IEEE
!       && GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
      {
        rtx tmp = sh_compare_op0;
        sh_compare_op0 = sh_compare_op1;
--- 5414,5422 ----
        DONE;
      }
  
!   if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT
!       && TARGET_SH2E && !TARGET_IEEE
!       && (TARGET_SH4 || GET_MODE (sh_compare_op0) == SFmode))
      {
        rtx tmp = sh_compare_op0;
        sh_compare_op0 = sh_compare_op1;
***************
*** 5484,5502 ****
    from_compare (operands, GTU);
  }")
  
  (define_expand "bunordered"
    [(set (match_dup 1) (unordered:DI (match_dup 2) (match_dup 3)))
     (set (pc)
  	(if_then_else (ne (match_dup 1) (const_int 0))
  		      (label_ref:DI (match_operand 0 "" ""))
  		      (pc)))]
!   "TARGET_SHMEDIA"
    "
  {
-   operands[1] = gen_reg_rtx (DImode);
    operands[2] = force_reg (GET_MODE (sh_compare_op0), sh_compare_op0);
    operands[3] = force_reg (GET_MODE (sh_compare_op1), sh_compare_op1);
  }")
  
  ;; ------------------------------------------------------------------------
  ;; Jump and linkage insns
--- 5515,5596 ----
    from_compare (operands, GTU);
  }")
  
+ ;; ??? Can't use DFmode bcc patterns for SH2E whwn there is no SFmode
+ ;; equivalent: the insn predicate has to be evaluable at compiler startup,
+ ;; and FAIL in bcc patterns causes crashes.
  (define_expand "bunordered"
    [(set (match_dup 1) (unordered:DI (match_dup 2) (match_dup 3)))
     (set (pc)
  	(if_then_else (ne (match_dup 1) (const_int 0))
  		      (label_ref:DI (match_operand 0 "" ""))
  		      (pc)))]
!   "(TARGET_SH1 && !TARGET_SH2E) || TARGET_SHMEDIA"
    "
  {
    operands[2] = force_reg (GET_MODE (sh_compare_op0), sh_compare_op0);
    operands[3] = force_reg (GET_MODE (sh_compare_op1), sh_compare_op1);
+   if (TARGET_SH1)
+     {
+       HOST_WIDE_INT mask;
+       switch (GET_MODE (operands[2]))
+ 	{
+ 	case SFmode:
+ 	  mask = SF_NAN_MASK;
+ 	  break;
+ 	case DFmode:
+ 	  mask = DF_NAN_MASK;
+ 	  break;
+ 	default:
+ 	  FAIL;
+ 	}
+       emit_insn (gen_cmpunsf_i1 (operands[2], operands[3],
+ 				 force_reg (SImode, GEN_INT (mask))));
+       emit_jump_insn (gen_branch_true (operands[0]));
+       DONE;
+     }
+   operands[1] = gen_reg_rtx (DImode);
  }")
+ 
+ (define_expand "bunle"
+   [(set (pc)
+ 	(if_then_else (ne (reg:SI T_REG) (const_int 0))
+ 		      (label_ref (match_operand 0 "" ""))
+ 		      (pc)))]
+   "(TARGET_SH1 && !TARGET_SH2E) || TARGET_SHMEDIA_FPU"
+   "
+ {
+   if (TARGET_SHMEDIA_FPU)
+     {
+       rtx tmp = gen_reg_rtx (DImode);
+ 
+       emit_insn (gen_sgt (tmp));
+       emit_jump_insn (gen_beq_media (operands[0], tmp, const0_rtx));
+       DONE;
+     }
+ 
+   from_compare (operands, UNLE);
+ }")
+ 
+ (define_expand "bunlt"
+   [(set (pc)
+ 	(if_then_else (eq (reg:SI T_REG) (const_int 0))
+ 		      (label_ref (match_operand 0 "" ""))
+ 		      (pc)))]
+   "(TARGET_SH1 && !TARGET_SH2E) || TARGET_SHMEDIA_FPU"
+   "
+ {
+   if (TARGET_SHMEDIA_FPU)
+     {
+       rtx tmp = gen_reg_rtx (DImode);
+ 
+       emit_insn (gen_sge (tmp));
+       emit_jump_insn (gen_beq_media (operands[0], tmp, const0_rtx));
+       DONE;
+     }
+ 
+   from_compare (operands, GE);
+ }")
+ 
  
  ;; ------------------------------------------------------------------------
  ;; Jump and linkage insns
*************** mov.l\\t1f,r0\\n\\
*** 7495,7500 ****
--- 7589,7601 ----
      DONE;
    if (! rtx_equal_function_value_matters)
      FAIL;
+   if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT
+       && !TARGET_SH4 && (!TARGET_SH2E || GET_MODE (sh_compare_op0) == DFmode))
+    {
+       from_compare (operands, EQ);
+       emit_insn (gen_movt (operands[0]));
+       DONE;
+     }
    operands[1] = prepare_scc_operands (EQ);
  }")
  
*************** mov.l\\t1f,r0\\n\\
*** 7543,7548 ****
--- 7644,7652 ----
      }
    if (! rtx_equal_function_value_matters)
      FAIL;
+   if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT && !TARGET_SH4
+       && (!TARGET_SH2E || GET_MODE (sh_compare_op0) == DFmode))
+     FAIL;
    operands[1] = prepare_scc_operands (LT);
  }")
  
*************** mov.l\\t1f,r0\\n\\
*** 7647,7652 ****
--- 7751,7759 ----
      }
    if (! rtx_equal_function_value_matters)
      FAIL;
+   if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT && !TARGET_SH4
+       && (!TARGET_SH2E || GET_MODE (sh_compare_op0) == DFmode))
+     FAIL;
    operands[1] = prepare_scc_operands (GT);
  }")
  
*************** mov.l\\t1f,r0\\n\\
*** 7703,7709 ****
      FAIL;
    if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
      {
!       if (TARGET_IEEE)
  	{
  	  rtx lab = gen_label_rtx ();
  	  prepare_scc_operands (EQ);
--- 7810,7822 ----
      FAIL;
    if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
      {
!       if (!TARGET_SH4
! 	  && (!TARGET_SH2E || GET_MODE (sh_compare_op0) == DFmode))
! 	{
! 	  from_compare (operands, GE);
! 	  emit_insn (gen_movt (operands[0]));
! 	}
!       else if (TARGET_IEEE)
  	{
  	  rtx lab = gen_label_rtx ();
  	  prepare_scc_operands (EQ);
*************** mov.l\\t1f,r0\\n\\
*** 7834,7839 ****
--- 7947,7967 ----
    operands[1] = prepare_scc_operands (GEU);
  }")
  
+ (define_expand "sunle"
+   [(set (match_operand:SI 0 "arith_reg_operand" "")
+ 	(match_dup 1))]
+   "TARGET_SH1 && !TARGET_SH4"
+   "
+ {
+   if (TARGET_SH2E && GET_MODE (sh_compare_op0) == SFmode)
+     FAIL;
+   if (! rtx_equal_function_value_matters)
+     FAIL;
+   from_compare (operands, UNLE);
+   emit_insn (gen_movt (operands[0]));
+   DONE;
+ }")
+ 
  ;; sne moves the complement of the T reg to DEST like this:
  ;;      cmp/eq ...
  ;;      mov    #-1,temp
*************** mov.l\\t1f,r0\\n\\
*** 7882,7888 ****
      DONE;
    if (! rtx_equal_function_value_matters)
      FAIL;
!   operands[1] = prepare_scc_operands (EQ);
    operands[2] = gen_reg_rtx (SImode);
  }")
  
--- 8010,8024 ----
      DONE;
    if (! rtx_equal_function_value_matters)
      FAIL;
!   if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT
!       && !TARGET_SH4
!       && (!TARGET_SH2E || GET_MODE (sh_compare_op0) == DFmode))
!     {
!       from_compare (operands, EQ);
!       operands[1] = gen_rtx_REG (SImode, T_REG);
!     }
!   else
!     operands[1] = prepare_scc_operands (EQ);
    operands[2] = gen_reg_rtx (SImode);
  }")
  
*************** mov.l\\t1f,r0\\n\\
*** 8257,8263 ****
    [(set (match_operand:SF 0 "arith_reg_operand" "")
  	(plus:SF (match_operand:SF 1 "arith_reg_operand" "")
  		 (match_operand:SF 2 "arith_reg_operand" "")))]
!   "TARGET_SH2E || TARGET_SHMEDIA_FPU"
    "
  {
    if (TARGET_SH2E)
--- 8393,8399 ----
    [(set (match_operand:SF 0 "arith_reg_operand" "")
  	(plus:SF (match_operand:SF 1 "arith_reg_operand" "")
  		 (match_operand:SF 2 "arith_reg_operand" "")))]
!   "TARGET_SH2E || TARGET_SH3 || TARGET_SHMEDIA_FPU"
    "
  {
    if (TARGET_SH2E)
*************** mov.l\\t1f,r0\\n\\
*** 8265,8270 ****
--- 8401,8412 ----
        expand_sf_binop (&gen_addsf3_i, operands);
        DONE;
      }
+   else if (TARGET_SH3)
+     {
+       expand_sfunc_binop (SFmode, &gen_addsf3_i3, \"__addsf3_\", PLUS,
+ 			  operands);
+       DONE;
+     }
  }")
  
  (define_insn "*addsf3_media"
*************** mov.l\\t1f,r0\\n\\
*** 8341,8346 ****
--- 8483,8504 ----
  }"
    [(set_attr "type" "fparith_media")])
  
+ (define_insn "addsf3_i3"
+   [(set (match_operand:SF 0 "arith_reg_operand" "=z")
+ 	(plus:SF (reg:SF R4_REG) (reg:SF R5_REG)))
+    (clobber (reg:SI T_REG))
+    (clobber (reg:SI PR_REG))
+    (clobber (reg:SI R1_REG))
+    (clobber (reg:SI R2_REG))
+    (clobber (reg:SI R3_REG))
+    (clobber (reg:SI R6_REG))
+    (clobber (reg:SI R7_REG))
+    (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   "TARGET_SH3 && ! TARGET_SH2E"
+   "jsr	@%1%#"
+   [(set_attr "type" "sfunc")
+    (set_attr "needs_delay_slot" "yes")])
+ 
  (define_insn "addsf3_i"
    [(set (match_operand:SF 0 "arith_reg_operand" "=f")
  	(plus:SF (match_operand:SF 1 "arith_reg_operand" "%0")
*************** mov.l\\t1f,r0\\n\\
*** 8355,8361 ****
    [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
  	(minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "")
  		  (match_operand:SF 2 "fp_arith_reg_operand" "")))]
!   "TARGET_SH2E || TARGET_SHMEDIA_FPU"
    "
  {
    if (TARGET_SH2E)
--- 8513,8519 ----
    [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
  	(minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "")
  		  (match_operand:SF 2 "fp_arith_reg_operand" "")))]
!   "TARGET_SH2E || TARGET_SH3 || TARGET_SHMEDIA_FPU"
    "
  {
    if (TARGET_SH2E)
*************** mov.l\\t1f,r0\\n\\
*** 8363,8368 ****
--- 8521,8532 ----
        expand_sf_binop (&gen_subsf3_i, operands);
        DONE;
      }
+   else if (TARGET_SH3)
+     {
+       expand_sfunc_binop (SFmode, &gen_subsf3_i3, \"__subsf3_\", MINUS,
+ 			  operands);
+       DONE;
+     }
  }")
  
  (define_insn "*subsf3_media"
*************** mov.l\\t1f,r0\\n\\
*** 8373,8378 ****
--- 8537,8559 ----
    "fsub.s	%1, %2, %0"
    [(set_attr "type" "fparith_media")])
  
+ (define_insn "subsf3_i3"
+   [(set (match_operand:SF 0 "arith_reg_operand" "=z")
+ 	(minus:SF (reg:SF R4_REG) (reg:SF R5_REG)))
+    (clobber (reg:SI T_REG))
+    (clobber (reg:SI PR_REG))
+    (clobber (reg:SI R1_REG))
+    (clobber (reg:SI R2_REG))
+    (clobber (reg:SI R3_REG))
+    (clobber (reg:SI R5_REG))
+    (clobber (reg:SI R6_REG))
+    (clobber (reg:SI R7_REG))
+    (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   "TARGET_SH3 && ! TARGET_SH2E"
+   "jsr	@%1%#"
+   [(set_attr "type" "sfunc")
+    (set_attr "needs_delay_slot" "yes")])
+ 
  (define_insn "subsf3_i"
    [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
  	(minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "0")
*************** mov.l\\t1f,r0\\n\\
*** 8392,8404 ****
    [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
  	(mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "")
  		 (match_operand:SF 2 "fp_arith_reg_operand" "")))]
!   "TARGET_SH2E || TARGET_SHMEDIA_FPU"
    "
  {
    if (TARGET_SH4)
      expand_sf_binop (&gen_mulsf3_i4, operands);
    else if (TARGET_SH2E)
      emit_insn (gen_mulsf3_ie (operands[0], operands[1], operands[2]));
    if (! TARGET_SHMEDIA)
      DONE;
  }")
--- 8573,8591 ----
    [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
  	(mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "")
  		 (match_operand:SF 2 "fp_arith_reg_operand" "")))]
!   "TARGET_SH2E || TARGET_SH3 || TARGET_SHMEDIA_FPU"
    "
  {
    if (TARGET_SH4)
      expand_sf_binop (&gen_mulsf3_i4, operands);
    else if (TARGET_SH2E)
      emit_insn (gen_mulsf3_ie (operands[0], operands[1], operands[2]));
+   else if (TARGET_SH3)
+     {
+       expand_sfunc_binop (SFmode, &gen_mulsf3_i3, \"__mulsf3_\", MULT,
+ 			  operands);
+       DONE;
+     }
    if (! TARGET_SHMEDIA)
      DONE;
  }")
*************** mov.l\\t1f,r0\\n\\
*** 8429,8434 ****
--- 8616,8637 ----
    "fmul	%2,%0"
    [(set_attr "type" "fp")])
  
+ (define_insn "mulsf3_i3"
+   [(set (match_operand:SF 0 "arith_reg_operand" "=z")
+ 	(mult:SF (reg:SF R4_REG) (reg:SF R5_REG)))
+    (clobber (reg:SI MACH_REG))
+    (clobber (reg:SI MACL_REG))
+    (clobber (reg:SI T_REG))
+    (clobber (reg:SI PR_REG))
+    (clobber (reg:SI R1_REG))
+    (clobber (reg:SI R2_REG))
+    (clobber (reg:SI R3_REG))
+    (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   "TARGET_SH3 && ! TARGET_SH2E"
+   "jsr	@%1%#"
+   [(set_attr "type" "sfunc")
+    (set_attr "needs_delay_slot" "yes")])
+ 
  (define_insn "*mac_media"
    [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
  	(plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f")
*************** mov.l\\t1f,r0\\n\\
*** 8589,8594 ****
--- 8792,8886 ----
    "ftrc	%1,%0"
    [(set_attr "type" "fp")])
  
+ (define_insn "cmpnesf_i1"
+   [(set (match_operand:CC_FP_NE 0 "register_operand" "=z")
+ 	(compare:CC_FP_NE (reg:SF R4_REG) (reg:SF R5_REG)))
+    (clobber (reg:SI T_REG))
+    (clobber (reg:SI PR_REG))
+    (clobber (reg:SI R1_REG))
+    (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   "TARGET_SH1 && ! TARGET_SH2E"
+   "jsr	@%1%#"
+   [(set_attr "type" "sfunc")
+    (set_attr "needs_delay_slot" "yes")])
+ 
+ (define_insn "cmpgtsf_i1"
+   [(set (match_operand:CC_FP_GT 0 "register_operand" "=z")
+ 	(compare:CC_FP_GT (reg:SF R4_REG) (reg:SF R5_REG)))
+    (clobber (reg:SI T_REG))
+    (clobber (reg:SI PR_REG))
+    (clobber (reg:SI R1_REG))
+    (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   "TARGET_SH1 && ! TARGET_SH2E"
+   "jsr	@%1%#"
+   [(set_attr "type" "sfunc")
+    (set_attr "needs_delay_slot" "yes")])
+ 
+ (define_insn "cmpunltsf_i1"
+   [(set (match_operand:CC_FP_UNLT 0 "register_operand" "=z")
+ 	(compare:CC_FP_UNLT (reg:SF R4_REG) (reg:SF R5_REG)))
+    (clobber (reg:SI T_REG))
+    (clobber (reg:SI PR_REG))
+    (clobber (reg:SI R1_REG))
+    (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   "TARGET_SH1 && ! TARGET_SH2E"
+   "jsr	@%1%#"
+   [(set_attr "type" "sfunc")
+    (set_attr "needs_delay_slot" "yes")])
+ 
+ (define_insn "cmpeqsf_i1_finite"
+   [(set (reg:SI T_REG)
+ 	(eq:SI (match_operand:SF 0 "arith_reg_operand" "r,r,r")
+ 	       (match_operand:SF 1 "arith_reg_operand" "r,r,r")))
+    (clobber (match_scratch:SI 2 "=0,1,?r"))]
+   "TARGET_SH1 && ! TARGET_SH2E && flag_finite_math_only"
+   "*
+ {
+   if (which_alternative == 0)
+      output_asm_insn (\"cmp/eq\t%0,%1\;or\t%1,%2\;bt\t0f\", operands);
+   else if (which_alternative == 1)
+      output_asm_insn (\"cmp/eq\t%0,%1\;or\t%0,%2\;bt\t0f\", operands);
+   else
+     output_asm_insn (\"cmp/eq\t%0,%1\;mov\t%0,%2\;bt\t0f\;or\t%1,%2\",
+ 		     operands);
+   return \"add\t%2,%2\;tst\t%2,%2\\n0:\";
+ }"
+   [(set_attr "length" "10,10,12")])
+ 
+ (define_insn "cmplesf_i1_finite"
+   [(set (reg:SI T_REG)
+ 	(le:SI (match_operand:SF 0 "arith_reg_operand" "r,r,r")
+ 	       (match_operand:SF 1 "arith_reg_operand" "r,r,r")))
+    (clobber (match_scratch:SI 2 "=0,1,r"))]
+   "TARGET_SH1 && ! TARGET_SH2E && flag_finite_math_only"
+   "*
+ {
+   output_asm_insn (\"cmp/pz\t%0\", operands);
+   if (which_alternative == 2)
+     output_asm_insn (\"mov\t%0,%2\", operands);
+   if (TARGET_SH2)
+     output_asm_insn (\"bf/s\t0f\;cmp/hs\t%1,%0\;cmp/ge\t%0,%1\", operands);
+   else
+     output_asm_insn (\"bt\t1f\;bra\t0f\;cmp/hs\t%1,%0\\n1:\tcmp/ge\t%0,%1\",
+ 		     operands);
+   if (which_alternative == 1)
+     output_asm_insn (\"or\t%0,%2\", operands);
+   else
+     output_asm_insn (\"or\t%1,%2\", operands);
+   return \"bt\t0f\;add\t%2,%2\;tst\t%2,%2\\n0:\";
+ }"
+   [(set_attr "length" "18,18,20")])
+ 
+ (define_insn "cmpunsf_i1"
+   [(set (reg:SI T_REG)
+ 	(unordered:SI (match_operand:SF 0 "arith_reg_operand" "r,r")
+ 		      (match_operand:SF 1 "arith_reg_operand" "r,r")))
+    (use (match_operand:SI 2 "arith_reg_operand" "r,r"))
+    (clobber (match_scratch:SI 3 "=0,&r"))]
+   "TARGET_SH1 && ! TARGET_SH2E"
+   "not\t%0,%3\;tst\t%2,%3\;not\t%1,%3\;bt\t0f\;tst\t%2,%3\;0:"
+   [(set_attr "length" "10")])
+ 
  (define_insn "cmpgtsf_t"
    [(set (reg:SI T_REG)
  	(gt:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
*************** mov.l\\t1f,r0\\n\\
*** 8684,8690 ****
    [(set (reg:SI T_REG)
  	(compare (match_operand:SF 0 "arith_operand" "")
  		 (match_operand:SF 1 "arith_operand" "")))]
!   "TARGET_SH2E || TARGET_SHMEDIA_FPU"
    "
  {
    sh_compare_op0 = operands[0];
--- 8976,8982 ----
    [(set (reg:SI T_REG)
  	(compare (match_operand:SF 0 "arith_operand" "")
  		 (match_operand:SF 1 "arith_operand" "")))]
!   "TARGET_SH1 || TARGET_SHMEDIA_FPU"
    "
  {
    sh_compare_op0 = operands[0];
*************** mov.l\\t1f,r0\\n\\
*** 8779,8784 ****
--- 9071,9109 ----
    [(set_attr "type" "fmove")
     (set_attr "fp_mode" "single")])
  
+ (define_expand "abssc2"
+   [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+ 	(abs:SF (match_operand:SC 1 "fp_arith_reg_operand" "")))]
+   "TARGET_SH3 && ! TARGET_SH2E"
+   "
+ {
+   if (TARGET_SH3)
+     {
+       expand_sfunc_unop (SCmode, &gen_abssc2_i3, \"__hypotf\", ABS,
+ 			  operands);
+       DONE;
+     }
+   FAIL;
+ }")
+ 
+ (define_insn "abssc2_i3"
+   [(set (match_operand:SF 0 "arith_reg_operand" "=z")
+ 	(abs:SF (reg:SC R4_REG)))
+    (clobber (reg:SI MACH_REG))
+    (clobber (reg:SI MACL_REG))
+    (clobber (reg:SI T_REG))
+    (clobber (reg:SI PR_REG))
+    (clobber (reg:SI R1_REG))
+    (clobber (reg:SI R2_REG))
+    (clobber (reg:SI R3_REG))
+    (clobber (reg:SI R4_REG))
+    (clobber (reg:SI R5_REG))
+    (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   "TARGET_SH3 && ! TARGET_SH2E"
+   "jsr	@%1%#"
+   [(set_attr "type" "sfunc")
+    (set_attr "needs_delay_slot" "yes")])
+ 
  (define_expand "adddf3"
    [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
  	(plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
*************** mov.l\\t1f,r0\\n\\
*** 9004,9009 ****
--- 9329,9389 ----
  ;; 	      (use (match_dup 2))])
  ;;    (set (match_dup 0) (reg:SI FPUL_REG))])
  
+ (define_insn "cmpnedf_i1"
+   [(set (match_operand:CC_FP_NE 0 "register_operand" "=z")
+ 	(compare:CC_FP_NE (reg:DF R4_REG) (reg:DF R6_REG)))
+    (clobber (reg:SI T_REG))
+    (clobber (reg:SI PR_REG))
+    (clobber (reg:SI R1_REG))
+    (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   "TARGET_SH1 && ! TARGET_SH2E"
+   "jsr	@%1%#"
+   [(set_attr "type" "sfunc")
+    (set_attr "needs_delay_slot" "yes")])
+ 
+ (define_insn "cmpgtdf_i1"
+   [(set (match_operand:CC_FP_GT 0 "register_operand" "=z")
+ 	(compare:CC_FP_GT (reg:DF R4_REG) (reg:DF R6_REG)))
+    (clobber (reg:SI T_REG))
+    (clobber (reg:SI PR_REG))
+    (clobber (reg:SI R1_REG))
+    (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   "TARGET_SH1 && ! TARGET_SH4"
+   "jsr	@%1%#"
+   [(set_attr "type" "sfunc")
+    (set_attr "needs_delay_slot" "yes")])
+ 
+ (define_insn "cmpunltdf_i1"
+   [(set (match_operand:CC_FP_UNLT 0 "register_operand" "=z")
+ 	(compare:CC_FP_UNLT (reg:DF R4_REG) (reg:DF R6_REG)))
+    (clobber (reg:SI T_REG))
+    (clobber (reg:SI PR_REG))
+    (clobber (reg:SI R1_REG))
+    (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   "TARGET_SH1 && ! TARGET_SH4"
+   "jsr	@%1%#"
+   [(set_attr "type" "sfunc")
+    (set_attr "needs_delay_slot" "yes")])
+ 
+ (define_insn "cmpeqdf_i1_finite"
+   [(set (reg:SI T_REG)
+ 	(eq:SI (match_operand:DF 0 "arith_reg_operand" "r")
+ 	       (match_operand:DF 1 "arith_reg_operand" "r")))
+    (clobber (match_scratch:SI 2 "=&r"))]
+   "TARGET_SH1 && ! TARGET_SH4 && flag_finite_math_only"
+   "cmp/eq\t%R0,%R1\;mov\t%S0,%2\;bf\t0f\;cmp/eq\t%S0,%S1\;bt\t0f\;or\t%S1,%2\;add\t%2,%2\;or\t%R0,%2\;tst\t%2,%2\\n0:"
+   [(set_attr "length" "18")])
+ 
+ (define_insn "cmpundf_i1"
+   [(set (reg:SI T_REG)
+ 	(unordered:SI (match_operand:DF 0 "arith_reg_operand" "r,r")
+ 		      (match_operand:DF 1 "arith_reg_operand" "r,r")))
+    (use (match_operand:SI 2 "arith_reg_operand" "r,r"))
+    (clobber (match_scratch:SI 3 "=0,&r"))]
+   "TARGET_SH1 && ! TARGET_SH2E"
+   "not\t%S0,%3\;tst\t%2,%3\;not\t%S1,%3\;bt\t0f\;tst\t%2,%3\;0:"
+   [(set_attr "length" "10")])
+ 
  (define_insn "cmpgtdf_t"
    [(set (reg:SI T_REG)
  	(gt:SI (match_operand:DF 0 "arith_reg_operand" "f")
*************** mov.l\\t1f,r0\\n\\
*** 9071,9077 ****
    [(set (reg:SI T_REG)
  	(compare (match_operand:DF 0 "arith_operand" "")
  		 (match_operand:DF 1 "arith_operand" "")))]
!   "TARGET_SH4 || TARGET_SHMEDIA_FPU"
    "
  {
    sh_compare_op0 = operands[0];
--- 9451,9457 ----
    [(set (reg:SI T_REG)
  	(compare (match_operand:DF 0 "arith_operand" "")
  		 (match_operand:DF 1 "arith_operand" "")))]
!   "TARGET_SH1 || TARGET_SHMEDIA_FPU"
    "
  {
    sh_compare_op0 = operands[0];
*************** mov.l\\t1f,r0\\n\\
*** 9169,9175 ****
  (define_expand "extendsfdf2"
    [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
  	(float_extend:DF (match_operand:SF 1 "fpul_operand" "")))]
!   "TARGET_SH4 || TARGET_SHMEDIA_FPU"
    "
  {
    if (TARGET_SH4)
--- 9549,9555 ----
  (define_expand "extendsfdf2"
    [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
  	(float_extend:DF (match_operand:SF 1 "fpul_operand" "")))]
!   "TARGET_SH1 || TARGET_SHMEDIA_FPU"
    "
  {
    if (TARGET_SH4)
*************** mov.l\\t1f,r0\\n\\
*** 9178,9183 ****
--- 9558,9569 ----
  					get_fpscr_rtx ()));
        DONE;
      }
+   else if (TARGET_SH1)
+     {
+       expand_sfunc_unop (SFmode, &gen_extendsfdf2_i1, \"__extendsfdf2_\",
+ 			 FLOAT_EXTEND, operands);
+       DONE;
+     }
  }")
  
  (define_insn "*extendsfdf2_media"
*************** mov.l\\t1f,r0\\n\\
*** 9196,9205 ****
    [(set_attr "type" "fp")
     (set_attr "fp_mode" "double")])
  
  (define_expand "truncdfsf2"
    [(set (match_operand:SF 0 "fpul_operand" "")
  	(float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "")))]
!   "TARGET_SH4 || TARGET_SHMEDIA_FPU"
    "
  {
    if (TARGET_SH4)
--- 9582,9624 ----
    [(set_attr "type" "fp")
     (set_attr "fp_mode" "double")])
  
+ ;; ??? In order to use this efficiently, we'd have to have an extra
+ ;; register class for r0 and r1 - and that would cause repercussions in
+ ;; register allocation elsewhere.  So just say we clobber r0 / r1, and
+ ;; that we can use an arbitrary target.  */
+ (define_insn_and_split "extendsfdf2_i1"
+   [(set (match_operand:DF 0 "arith_reg_operand" "=r")
+ 	(float_extend:DF (reg:SF R4_REG)))
+    (clobber (reg:SI T_REG))
+    (clobber (reg:SI PR_REG))
+    (clobber (reg:SI R0_REG))
+    (clobber (reg:SI R1_REG))
+    (clobber (reg:SI R2_REG))
+    (clobber (reg:SI R3_REG))
+    (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   "TARGET_SH1 && !TARGET_SH4"
+   "#"
+   "&& reload_completed"
+   [(set (match_dup 0) (reg:DF R0_REG))]
+   "emit_insn (gen_extendsfdf2_i1_r0 (operands[1]));"
+   [(set_attr "type" "sfunc")])
+ 
+ (define_insn "extendsfdf2_i1_r0"
+   [(set (reg:DF R0_REG) (float_extend:DF (reg:SF R4_REG)))
+    (clobber (reg:SI T_REG))
+    (clobber (reg:SI PR_REG))
+    (clobber (reg:SI R2_REG))
+    (clobber (reg:SI R3_REG))
+    (use (match_operand:SI 0 "arith_reg_operand" "r"))]
+   "TARGET_SH1 && !TARGET_SH4"
+   "jsr	@%0%#"
+   [(set_attr "type" "sfunc")
+    (set_attr "needs_delay_slot" "yes")])
+ 
  (define_expand "truncdfsf2"
    [(set (match_operand:SF 0 "fpul_operand" "")
  	(float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "")))]
!   "TARGET_SH1 || TARGET_SHMEDIA_FPU"
    "
  {
    if (TARGET_SH4)
*************** mov.l\\t1f,r0\\n\\
*** 9208,9213 ****
--- 9627,9638 ----
  				       get_fpscr_rtx ()));
        DONE;
      }
+   else if (TARGET_SH1)
+     {
+       expand_sfunc_unop (DFmode, &gen_truncdfsf2_i1, \"__truncdfsf2_\",
+ 			 FLOAT_TRUNCATE, operands);
+       DONE;
+     }
  }")
  
  (define_insn "*truncdfsf2_media"
*************** mov.l\\t1f,r0\\n\\
*** 9225,9230 ****
--- 9650,9670 ----
    "fcnvds  %1,%0"
    [(set_attr "type" "fp")
     (set_attr "fp_mode" "double")])
+ 
+ (define_insn "truncdfsf2_i1"
+   [(set (match_operand:SF 0 "arith_reg_operand" "=z")
+ 	(float_truncate:SF (reg:DF R4_REG)))
+    (clobber (reg:SI T_REG))
+    (clobber (reg:SI PR_REG))
+    (clobber (reg:SI R1_REG))
+    (clobber (reg:SI R2_REG))
+    (clobber (reg:SI R3_REG))
+    (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   "TARGET_SH1 && ! TARGET_SH4"
+   "jsr	@%1%#"
+   [(set_attr "type" "sfunc")
+    (set_attr "needs_delay_slot" "yes")])
+ 
  
  ;; Bit field extract patterns.  These give better code for packed bitfields,
  ;; because they allow auto-increment addresses to be generated.
Index: t-sh
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/t-sh,v
retrieving revision 1.16.30.1
diff -p -r1.16.30.1 t-sh
*** t-sh	16 Jun 2004 19:58:35 -0000	1.16.30.1
--- t-sh	2 Aug 2004 03:57:41 -0000
***************
*** 1,6 ****
--- 1,8 ----
  LIB1ASMSRC = sh/lib1funcs.asm
  LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movstr \
    _movstr_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
+   _nesf2 _nedf2 _gtsf2t _gtdf2t _gesf2f _gedf2f _extendsfdf2  _truncdfsf2 \
+   _add_sub_sf3 _mulsf3 _hypotf \
    $(LIB1ASMFUNCS_CACHE)
  
  # We want fine grained libraries, so use the new code to build the
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]