Optimize SSE floatuns sequence
Jan Hubicka
jh@suse.cz
Wed Feb 5 21:31:00 GMT 2003
Hi,
the default way optabs imitates floatuns for Pmode is to convert into XFmode.
That result in x87->SSE transfer that is rather expensive. The alternate
sequence used by Alpha and Sparc avoids this and for some reason even results
in shorter code at the average.
Bootstrapped/regtested x86-64. OK for mainline?
Honza
Wed Feb 5 22:27:51 CET 2003 Jan Hubicka <jh@suse.cz>
* i386-protos.h (x86_emit_floatuns): Declare.
* i386.c (x86_emit_floatuns): New global function.
* i386.md (floatunssisf2, floatunsdisf2,
floatunsdidf2): New patterns.
Index: i386-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386-protos.h,v
retrieving revision 1.91
diff -c -3 -p -r1.91 i386-protos.h
*** i386-protos.h 28 Jan 2003 18:08:50 -0000 1.91
--- i386-protos.h 4 Feb 2003 23:43:14 -0000
*************** extern void emit_i387_cw_initialization
*** 189,194 ****
--- 189,195 ----
extern bool ix86_fp_jump_nontrivial_p PARAMS ((enum rtx_code));
extern void x86_order_regs_for_local_alloc PARAMS ((void));
extern void x86_function_profiler PARAMS ((FILE *, int));
+ extern void x86_emit_floatuns PARAMS ((rtx [2]));
#ifdef TREE_CODE
Index: i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.525
diff -c -3 -p -r1.525 i386.c
*** i386.c 4 Feb 2003 18:01:20 -0000 1.525
--- i386.c 4 Feb 2003 23:43:16 -0000
*************** x86_extended_reg_mentioned_p (insn)
*** 15337,15340 ****
--- 15337,15375 ----
return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
}
+ /* Generate an unsigned DImode to FP conversion. This is the same code
+ optabs would emit if we didn't have TFmode patterns. */
+
+ void
+ x86_emit_floatuns (operands)
+ rtx operands[2];
+ {
+ rtx neglab, donelab, i0, i1, f0, in, out;
+ enum machine_mode mode;
+
+ out = operands[0];
+ in = force_reg (DImode, operands[1]);
+ mode = GET_MODE (out);
+ neglab = gen_label_rtx ();
+ donelab = gen_label_rtx ();
+ i1 = gen_reg_rtx (DImode);
+ f0 = gen_reg_rtx (mode);
+
+ emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
+
+ emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
+ emit_jump_insn (gen_jump (donelab));
+ emit_barrier ();
+
+ emit_label (neglab);
+
+ i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
+ i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
+ i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
+ expand_float (f0, i0, 0);
+ emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
+
+ emit_label (donelab);
+ }
+
#include "gt-i386.h"
Index: i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.423
diff -c -3 -p -r1.423 i386.md
*** i386.md 4 Feb 2003 20:47:46 -0000 1.423
--- i386.md 4 Feb 2003 23:43:17 -0000
***************
*** 4925,4930 ****
--- 4925,4949 ----
ix86_free_from_memory (GET_MODE (operands[1]));
DONE;
})
+
+ (define_expand "floatunssisf2"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:SI 1 "register_operand" ""))]
+ "TARGET_SSE && TARGET_SSE_MATH && !TARGET_64BIT"
+ "x86_emit_floatuns (operands); DONE;")
+
+ (define_expand "floatunsdisf2"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:DI 1 "register_operand" ""))]
+ "TARGET_SSE && TARGET_SSE_MATH && TARGET_64BIT"
+ "x86_emit_floatuns (operands); DONE;")
+
+ (define_expand "floatunsdidf2"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:DI 1 "register_operand" ""))]
+ "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_64BIT"
+ "x86_emit_floatuns (operands); DONE;")
;; Add instructions
More information about the Gcc-patches
mailing list