This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, take 2] New x87 fscale pattern and exp?f2 insn patterns
- From: Uros Bizjak <uros at kss-loka dot si>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Roger Sayle <roger at eyesopen dot com>
- Date: Mon, 26 Apr 2004 07:58:31 +0200
- Subject: [PATCH, take 2] New x87 fscale pattern and exp?f2 insn patterns
Hello!
This patch implements _only_ new x87 fscale pattern and exp?f2 insn
patterns. I stripped out REG_DEAD handling for unrelated UNSPEC_*
patterns in reg-stack.c (this will be separate patch). Also stripped out
is unrelated move of (atansf2, atandf2, atanxf2) patterns to atan2?f3
expanders.
2004-04-26 Uros Bizjak <uros@kss-loka.si>
* reg-stack.c (swap_to_top): New function.
(subst_stack_regs_pat): UNSPEC_FPATAN, UNSPEC_FYL2X: Use
swap_to_top().
(subst_stack_regs_pat): UNSPEC_FSCALE: Remove.
(subst_stack_regs_pat): Handle UNSPEC_FSCALE_FRACT and
UNSPEC_FSCALE_EXP.
* config/i386/i386.md (UNSPEC_FSCALE): Remove.
(*fscale_sfxf3, *fscale_dfxf3, *fscale_xf3): Remove insn pattern.
(UNSPEC_FSCALE_FRACT, UNSPEC_FSCALE_EXP): New unspecs to represent
x87's fscale insn.
(*fscalexf4: Define new insn pattern to implement x87 fscale insn.
(exp?f2, exp10?f2, exp2?f2): Use *fscalexf4 and float_truncate
patterns.
Following patch was bootstrapped on i686-pc-linux-gnu, patch is tested
with relevant builtin tests, Whetstone benchmark was successfully built
and its run produced expected results.
Uros.
? config.cache
? config.log
? maybedep.tmp
? serdep.tmp
Index: gcc/reg-stack.c
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/reg-stack.c,v
retrieving revision 1.148
diff -u -p -r1.148 reg-stack.c
--- gcc/reg-stack.c 15 Apr 2004 02:43:44 -0000 1.148
+++ gcc/reg-stack.c 26 Apr 2004 05:43:15 -0000
@@ -249,6 +249,7 @@ static void remove_regno_note (rtx, enum
static int get_hard_regnum (stack, rtx);
static rtx emit_pop_insn (rtx, stack, rtx, enum emit_where);
static void emit_swap_insn (rtx, stack, rtx);
+static void swap_to_top(rtx, stack, rtx, rtx);
static bool move_for_stack_reg (rtx, stack, rtx);
static int swap_rtx_condition_1 (rtx);
static int swap_rtx_condition (rtx);
@@ -1034,6 +1035,54 @@ emit_swap_insn (rtx insn, stack regstack
emit_insn_before (swap_rtx, insn);
}
+/* Emit an insns before INSN to swap virtual register SRC1 with
+ the top of stack and virtual register SRC2 with second stack
+ slot. REGSTACK is the stack state before the swaps, and
+ is updated to reflect the swaps. A swap insn is represented as a
+ PARALLEL of two patterns: each pattern moves one reg to the other.
+
+ If SRC1 and/or SRC2 are already at the right place, no swap insn
+ is emitted. */
+
+static void
+swap_to_top (rtx insn, stack regstack, rtx src1, rtx src2)
+{
+ struct stack_def temp_stack;
+ int regno, j, k, temp;
+
+ temp_stack = *regstack;
+
+ /* Place operand 1 at the top of stack. */
+ regno = get_hard_regnum (&temp_stack, src1);
+ if (regno < 0)
+ abort ();
+ if (regno != FIRST_STACK_REG)
+ {
+ k = temp_stack.top - (regno - FIRST_STACK_REG);
+ j = temp_stack.top;
+
+ temp = temp_stack.reg[k];
+ temp_stack.reg[k] = temp_stack.reg[j];
+ temp_stack.reg[j] = temp;
+ }
+
+ /* Place operand 2 next on the stack. */
+ regno = get_hard_regnum (&temp_stack, src2);
+ if (regno < 0)
+ abort ();
+ if (regno != FIRST_STACK_REG + 1)
+ {
+ k = temp_stack.top - (regno - FIRST_STACK_REG);
+ j = temp_stack.top - 1;
+
+ temp = temp_stack.reg[k];
+ temp_stack.reg[k] = temp_stack.reg[j];
+ temp_stack.reg[j] = temp;
+ }
+
+ change_stack (insn, regstack, &temp_stack, EMIT_BEFORE);
+}
+
/* Handle a move to or from a stack register in PAT, which is in INSN.
REGSTACK is the current stack. Return whether a control flow insn
was deleted in the process. */
@@ -1701,7 +1750,6 @@ subst_stack_regs_pat (rtx insn, stack re
case UNSPEC_FPATAN:
case UNSPEC_FYL2X:
- case UNSPEC_FSCALE:
/* These insns operate on the top two stack slots. */
src1 = get_true_reg (&XVECEXP (pat_src, 0, 0));
@@ -1710,42 +1758,7 @@ subst_stack_regs_pat (rtx insn, stack re
src1_note = find_regno_note (insn, REG_DEAD, REGNO (*src1));
src2_note = find_regno_note (insn, REG_DEAD, REGNO (*src2));
- {
- struct stack_def temp_stack;
- int regno, j, k, temp;
-
- temp_stack = *regstack;
-
- /* Place operand 1 at the top of stack. */
- regno = get_hard_regnum (&temp_stack, *src1);
- if (regno < 0)
- abort ();
- if (regno != FIRST_STACK_REG)
- {
- k = temp_stack.top - (regno - FIRST_STACK_REG);
- j = temp_stack.top;
-
- temp = temp_stack.reg[k];
- temp_stack.reg[k] = temp_stack.reg[j];
- temp_stack.reg[j] = temp;
- }
-
- /* Place operand 2 next on the stack. */
- regno = get_hard_regnum (&temp_stack, *src2);
- if (regno < 0)
- abort ();
- if (regno != FIRST_STACK_REG + 1)
- {
- k = temp_stack.top - (regno - FIRST_STACK_REG);
- j = temp_stack.top - 1;
-
- temp = temp_stack.reg[k];
- temp_stack.reg[k] = temp_stack.reg[j];
- temp_stack.reg[j] = temp;
- }
-
- change_stack (insn, regstack, &temp_stack, EMIT_BEFORE);
- }
+ swap_to_top (insn, regstack, *src1, *src2);
replace_reg (src1, FIRST_STACK_REG);
replace_reg (src2, FIRST_STACK_REG + 1);
@@ -1766,6 +1779,64 @@ subst_stack_regs_pat (rtx insn, stack re
regstack->reg[++regstack->top] = REGNO (*dest);
SET_HARD_REG_BIT (regstack->reg_set, REGNO (*dest));
replace_reg (dest, FIRST_STACK_REG);
+ break;
+
+ case UNSPEC_FSCALE_FRACT:
+ /* These insns operate on the top two stack slots.
+ first part of double input, double output insn. */
+
+ src1 = get_true_reg (&XVECEXP (pat_src, 0, 0));
+ src2 = get_true_reg (&XVECEXP (pat_src, 0, 1));
+
+ src1_note = find_regno_note (insn, REG_DEAD, REGNO (*src1));
+ src2_note = find_regno_note (insn, REG_DEAD, REGNO (*src2));
+
+ /* Inputs should never die, they are
+ replaced with outputs. */
+ if ((src1_note) || (src2_note))
+ abort();
+
+ swap_to_top (insn, regstack, *src1, *src2);
+
+ /* Push the result back onto stack. Empty stack slot
+ will be filled in second part of insn. */
+ if (STACK_REG_P (*dest)) {
+ regstack->reg[regstack->top] = REGNO (*dest);
+ SET_HARD_REG_BIT (regstack->reg_set, REGNO (*dest));
+ replace_reg (dest, FIRST_STACK_REG);
+ }
+
+ replace_reg (src1, FIRST_STACK_REG);
+ replace_reg (src2, FIRST_STACK_REG + 1);
+ break;
+
+ case UNSPEC_FSCALE_EXP:
+ /* These insns operate on the top two stack slots./
+ second part of double input, double output insn. */
+
+ src1 = get_true_reg (&XVECEXP (pat_src, 0, 0));
+ src2 = get_true_reg (&XVECEXP (pat_src, 0, 1));
+
+ src1_note = find_regno_note (insn, REG_DEAD, REGNO (*src1));
+ src2_note = find_regno_note (insn, REG_DEAD, REGNO (*src2));
+
+ /* Inputs should never die, they are
+ replaced with outputs. */
+ if ((src1_note) || (src2_note))
+ abort();
+
+ swap_to_top (insn, regstack, *src1, *src2);
+
+ /* Push the result back onto stack. Fill empty slot from
+ first part of insn and fix top of stack pointer. */
+ if (STACK_REG_P (*dest)) {
+ regstack->reg[regstack->top - 1] = REGNO (*dest);
+ SET_HARD_REG_BIT (regstack->reg_set, REGNO (*dest));
+ replace_reg (dest, FIRST_STACK_REG + 1);
+ }
+
+ replace_reg (src1, FIRST_STACK_REG);
+ replace_reg (src2, FIRST_STACK_REG + 1);
break;
case UNSPEC_SINCOS_COS:
Index: gcc/config/i386/i386.md
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.529
diff -u -p -r1.529 i386.md
--- gcc/config/i386/i386.md 20 Apr 2004 19:40:27 -0000 1.529
+++ gcc/config/i386/i386.md 26 Apr 2004 05:43:19 -0000
@@ -117,7 +117,6 @@
; x87 Floating point
(UNSPEC_FPATAN 65)
(UNSPEC_FYL2X 66)
- (UNSPEC_FSCALE 67)
(UNSPEC_FRNDINT 68)
(UNSPEC_F2XM1 69)
@@ -128,6 +127,8 @@
(UNSPEC_TAN_TAN 83)
(UNSPEC_XTRACT_FRACT 84)
(UNSPEC_XTRACT_EXP 85)
+ (UNSPEC_FSCALE_FRACT 86)
+ (UNSPEC_FSCALE_EXP 87)
; REP instruction
(UNSPEC_REP 75)
@@ -15669,42 +15670,6 @@
operands[3] = gen_reg_rtx (XFmode);
})
-(define_insn "*fscale_sfxf3"
- [(set (match_operand:SF 0 "register_operand" "=f")
- (unspec:SF [(match_operand:XF 2 "register_operand" "0")
- (match_operand:XF 1 "register_operand" "u")]
- UNSPEC_FSCALE))
- (clobber (match_scratch:SF 3 "=1"))]
- "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
- && flag_unsafe_math_optimizations"
- "fscale\;fstp\t%y1"
- [(set_attr "type" "fpspc")
- (set_attr "mode" "SF")])
-
-(define_insn "*fscale_dfxf3"
- [(set (match_operand:DF 0 "register_operand" "=f")
- (unspec:DF [(match_operand:XF 2 "register_operand" "0")
- (match_operand:XF 1 "register_operand" "u")]
- UNSPEC_FSCALE))
- (clobber (match_scratch:DF 3 "=1"))]
- "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
- && flag_unsafe_math_optimizations"
- "fscale\;fstp\t%y1"
- [(set_attr "type" "fpspc")
- (set_attr "mode" "DF")])
-
-(define_insn "*fscale_xf3"
- [(set (match_operand:XF 0 "register_operand" "=f")
- (unspec:XF [(match_operand:XF 2 "register_operand" "0")
- (match_operand:XF 1 "register_operand" "u")]
- UNSPEC_FSCALE))
- (clobber (match_scratch:XF 3 "=1"))]
- "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
- && flag_unsafe_math_optimizations"
- "fscale\;fstp\t%y1"
- [(set_attr "type" "fpspc")
- (set_attr "mode" "XF")])
-
(define_insn "*frndintxf2"
[(set (match_operand:XF 0 "register_operand" "=f")
(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
@@ -15725,6 +15690,20 @@
[(set_attr "type" "fpspc")
(set_attr "mode" "XF")])
+(define_insn "*fscalexf4"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "0")
+ (match_operand:XF 3 "register_operand" "1")]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_operand:XF 1 "register_operand" "=u")
+ (unspec:XF [(match_dup 2) (match_dup 3)]
+ UNSPEC_FSCALE_EXP))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+ "fscale"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "DF")])
+
(define_expand "expsf2"
[(set (match_dup 2)
(float_extend:XF (match_operand:SF 1 "register_operand" "")))
@@ -15733,16 +15712,21 @@
(set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5)))
(set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1))
(set (match_dup 9) (plus:XF (match_dup 7) (match_dup 8)))
- (parallel [(set (match_operand:SF 0 "register_operand" "")
- (unspec:SF [(match_dup 9) (match_dup 5)] UNSPEC_FSCALE))
- (clobber (match_scratch:SF 5 ""))])]
+ (parallel [(set (match_dup 10)
+ (unspec:XF [(match_dup 9) (match_dup 5)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 11)
+ (unspec:XF [(match_dup 9) (match_dup 5)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF (match_dup 10)))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
{
rtx temp;
int i;
- for (i=2; i<10; i++)
+ for (i=2; i<12; i++)
operands[i] = gen_reg_rtx (XFmode);
temp = standard_80387_constant_rtx (5); /* fldl2e */
emit_move_insn (operands[3], temp);
@@ -15755,19 +15739,23 @@
(set (match_dup 4) (mult:XF (match_dup 2) (match_dup 3)))
(set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_FRNDINT))
(set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5)))
-
(set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1))
(set (match_dup 9) (plus:XF (match_dup 7) (match_dup 8)))
- (parallel [(set (match_operand:DF 0 "register_operand" "")
- (unspec:DF [(match_dup 9) (match_dup 5)] UNSPEC_FSCALE))
- (clobber (match_scratch:DF 5 ""))])]
+ (parallel [(set (match_dup 10)
+ (unspec:XF [(match_dup 9) (match_dup 5)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 11)
+ (unspec:XF [(match_dup 9) (match_dup 5)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_operand:DF 0 "register_operand" "")
+ (float_truncate:DF (match_dup 10)))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
{
rtx temp;
int i;
- for (i=2; i<10; i++)
+ for (i=2; i<12; i++)
operands[i] = gen_reg_rtx (XFmode);
temp = standard_80387_constant_rtx (5); /* fldl2e */
emit_move_insn (operands[3], temp);
@@ -15782,15 +15770,18 @@
(set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
(set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7)))
(parallel [(set (match_operand:XF 0 "register_operand" "")
- (unspec:XF [(match_dup 8) (match_dup 4)] UNSPEC_FSCALE))
- (clobber (match_scratch:XF 5 ""))])]
+ (unspec:XF [(match_dup 8) (match_dup 4)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 9)
+ (unspec:XF [(match_dup 8) (match_dup 4)]
+ UNSPEC_FSCALE_EXP))])]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
{
rtx temp;
int i;
- for (i=2; i<9; i++)
+ for (i=2; i<10; i++)
operands[i] = gen_reg_rtx (XFmode);
temp = standard_80387_constant_rtx (5); /* fldl2e */
emit_move_insn (operands[2], temp);
@@ -15805,16 +15796,21 @@
(set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5)))
(set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1))
(set (match_dup 9) (plus:XF (match_dup 7) (match_dup 8)))
- (parallel [(set (match_operand:SF 0 "register_operand" "")
- (unspec:SF [(match_dup 9) (match_dup 5)] UNSPEC_FSCALE))
- (clobber (match_scratch:SF 5 ""))])]
+ (parallel [(set (match_dup 10)
+ (unspec:XF [(match_dup 9) (match_dup 5)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 11)
+ (unspec:XF [(match_dup 9) (match_dup 5)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF (match_dup 10)))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
{
rtx temp;
int i;
- for (i=2; i<10; i++)
+ for (i=2; i<12; i++)
operands[i] = gen_reg_rtx (XFmode);
temp = standard_80387_constant_rtx (6); /* fldl2t */
emit_move_insn (operands[3], temp);
@@ -15829,16 +15825,21 @@
(set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5)))
(set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1))
(set (match_dup 9) (plus:XF (match_dup 7) (match_dup 8)))
- (parallel [(set (match_operand:DF 0 "register_operand" "")
- (unspec:DF [(match_dup 9) (match_dup 5)] UNSPEC_FSCALE))
- (clobber (match_scratch:DF 5 ""))])]
+ (parallel [(set (match_dup 10)
+ (unspec:XF [(match_dup 9) (match_dup 5)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 11)
+ (unspec:XF [(match_dup 9) (match_dup 5)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_operand:DF 0 "register_operand" "")
+ (float_truncate:DF (match_dup 10)))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
{
rtx temp;
int i;
- for (i=2; i<10; i++)
+ for (i=2; i<12; i++)
operands[i] = gen_reg_rtx (XFmode);
temp = standard_80387_constant_rtx (6); /* fldl2t */
emit_move_insn (operands[3], temp);
@@ -15853,15 +15854,18 @@
(set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
(set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7)))
(parallel [(set (match_operand:XF 0 "register_operand" "")
- (unspec:XF [(match_dup 8) (match_dup 4)] UNSPEC_FSCALE))
- (clobber (match_scratch:XF 5 ""))])]
+ (unspec:XF [(match_dup 8) (match_dup 4)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 9)
+ (unspec:XF [(match_dup 8) (match_dup 4)]
+ UNSPEC_FSCALE_EXP))])]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
{
rtx temp;
int i;
- for (i=2; i<9; i++)
+ for (i=2; i<10; i++)
operands[i] = gen_reg_rtx (XFmode);
temp = standard_80387_constant_rtx (6); /* fldl2t */
emit_move_insn (operands[2], temp);
@@ -15875,15 +15879,20 @@
(set (match_dup 4) (minus:XF (match_dup 2) (match_dup 3)))
(set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_F2XM1))
(set (match_dup 7) (plus:XF (match_dup 5) (match_dup 6)))
- (parallel [(set (match_operand:SF 0 "register_operand" "")
- (unspec:SF [(match_dup 7) (match_dup 3)] UNSPEC_FSCALE))
- (clobber (match_scratch:SF 3 ""))])]
+ (parallel [(set (match_dup 8)
+ (unspec:XF [(match_dup 7) (match_dup 3)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 9)
+ (unspec:XF [(match_dup 7) (match_dup 3)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF (match_dup 8)))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
{
int i;
- for (i=2; i<8; i++)
+ for (i=2; i<10; i++)
operands[i] = gen_reg_rtx (XFmode);
emit_move_insn (operands[6], CONST1_RTX (XFmode)); /* fld1 */
})
@@ -15895,15 +15904,20 @@
(set (match_dup 4) (minus:XF (match_dup 2) (match_dup 3)))
(set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_F2XM1))
(set (match_dup 7) (plus:XF (match_dup 5) (match_dup 6)))
- (parallel [(set (match_operand:DF 0 "register_operand" "")
- (unspec:DF [(match_dup 7) (match_dup 3)] UNSPEC_FSCALE))
- (clobber (match_scratch:DF 3 ""))])]
+ (parallel [(set (match_dup 8)
+ (unspec:XF [(match_dup 7) (match_dup 3)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 9)
+ (unspec:XF [(match_dup 7) (match_dup 3)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_operand:DF 0 "register_operand" "")
+ (float_truncate:DF (match_dup 8)))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
{
int i;
- for (i=2; i<8; i++)
+ for (i=2; i<10; i++)
operands[i] = gen_reg_rtx (XFmode);
emit_move_insn (operands[6], CONST1_RTX (XFmode)); /* fld1 */
})
@@ -15915,14 +15929,17 @@
(set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_F2XM1))
(set (match_dup 7) (plus:XF (match_dup 5) (match_dup 6)))
(parallel [(set (match_operand:XF 0 "register_operand" "")
- (unspec:XF [(match_dup 7) (match_dup 3)] UNSPEC_FSCALE))
- (clobber (match_scratch:XF 3 ""))])]
+ (unspec:XF [(match_dup 7) (match_dup 3)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 8)
+ (unspec:XF [(match_dup 7) (match_dup 3)]
+ UNSPEC_FSCALE_EXP))])]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
{
int i;
- for (i=2; i<8; i++)
+ for (i=2; i<9; i++)
operands[i] = gen_reg_rtx (XFmode);
emit_move_insn (operands[6], CONST1_RTX (XFmode)); /* fld1 */
})