This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[RFC PATCH, i386] Unify TARGET_SSE_MATH for trunc* patterns
- From: Uros Bizjak <uros at kss-loka dot si>
- To: Richard Henderson <rth at redhat dot com>
- Cc: gcc-patches at gcc dot gnu dot org, jh at suse dot cz
- Date: Mon, 20 Dec 2004 12:21:15 +0100
- Subject: [RFC PATCH, i386] Unify TARGET_SSE_MATH for trunc* patterns
- References: <41C2B76C.1050000@kss-loka.si> <20041217230642.GA13887@redhat.com>
Richard Henderson wrote:
Missed truncdfsf2_1? You're not ordering these as we discussed,
and you had been doing for the other patches. Which makes me
doubt we'll select the proper pattern.
I'm a bit confused about the choices here in the trunc patterns
in general. We have choices:
(1) -ffast-math.
(2) allow 80387 reg-reg, but require a scratch memory,
(3) disallow 80387 reg-reg, no scratch memory.
However, the patterns are not consistent in their selection of
2 or 3, and don't seem to allow for all alternatives allowed by
-mfpmath.
Perhaps merging TARGET_80387 and TARGET_MIX_SSE_I387 was not a good
idea... I have splitted them to separate patterns and ordered them as
they were ordered in previous cleanups. Also, names are a lot more
descriptive and splitted/unsplited patterns are grouped together in some
logical sense. And a couple of comments were added.
Unfortunatelly, with current state of affairs, we have to provide both
unsplitted (with memory clobber) and splitted patterns (without memory
clobber) for *_387 and *_mixed patterns. The splitted patterns without
mem clobber have their names suffixed with _1.
The patterns now shadow next patterns with respect to TARGET_SSE2,
TARGET_MIX_SSE_I387 and additionally TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS.
BTW: I have changed "truncdfsf2" expander to:
(define_expand "truncdfsf2"
[(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "")))
(clobber (match_dup 2))])]
"TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
"
if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
operands[1] = force_reg (DFmode, operands[1]);
...
As an aside, I would support removing TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS,
since it's currently always zero. Honza, is this the remnants of
of an experiment to see what's faster, or is this supposed to be
set for Athlon, or what?
I suggest to leave TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS as it is at the
moment. Your suggested patch is IMHO a lot more intrusive than my
rename/move/enable patch.
I have attached relevant part of i386.md to this message. The change was
bootstrapped on pentium4-pc-linux-gnu, regtest is in progress.
Uros.
;; %%% This seems bad bad news.
;; This cannot output into an f-reg because there is no way to be sure
;; of truncating in that case. Otherwise this is just like a simple move
;; insn. So we pretend we can output to a reg in order to get better
;; register preferencing, but we really use a stack slot.
;; Conversion from DFmode to SFmode.
(define_expand "truncdfsf2"
[(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "")))
(clobber (match_dup 2))])]
"TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
"
if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
operands[1] = force_reg (DFmode, operands[1]);
if (TARGET_SSE2 && TARGET_SSE_MATH)
{
emit_insn (gen_truncdfsf2_sse (operands[0], operands[1]));
DONE;
}
else if (flag_unsafe_math_optimizations)
{
rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SFmode);
emit_insn (gen_truncdfsf2_i387_noop (reg, operands[1]));
if (reg != operands[0])
emit_move_insn (operands[0], reg);
DONE;
}
else
operands[2] = assign_386_stack_local (SFmode, 0);
")
(define_insn "*truncdfsf2_mixed_sse2_nooverlap"
[(set (match_operand:SF 0 "nonimmediate_operand" "=*!m,?f#rx,?r#fx,?x#rf,&Y")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "f#Y,f#Y,f#Y,f#Y,mY#f")))
(clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))]
"TARGET_SSE2 && TARGET_MIX_SSE_I387
&& TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS"
{
switch (which_alternative)
{
case 0:
if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
return "fstp%z0\t%y0";
else
return "fst%z0\t%y0";
case 4:
return "#";
default:
abort ();
}
}
[(set_attr "type" "fmov,multi,multi,multi,ssecvt")
(set_attr "mode" "SF,SF,SF,SF,DF")])
(define_insn "*truncdfsf2_mixed_sse2_nooverlap_1"
[(set (match_operand:SF 0 "nonimmediate_operand" "=&Y,!m")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "mY,f")))]
"TARGET_SSE2 && TARGET_MIX_SSE_I387
&& TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
{
switch (which_alternative)
{
case 0:
return "#";
case 1:
if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
return "fstp%z0\t%y0";
else
return "fst%z0\t%y0";
default:
abort ();
}
}
[(set_attr "type" "ssecvt,fmov")
(set_attr "mode" "DF,SF")])
(define_insn "*truncdfsf2_mixed_sse2"
[(set (match_operand:SF 0 "nonimmediate_operand" "=*!m#fxr,?f#xr,?r#fx,?x#fr,Y#fr")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "f#Y,f#Y,f#Y,f#Y,mY#f")))
(clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))]
"TARGET_SSE2 && TARGET_MIX_SSE_I387"
{
switch (which_alternative)
{
case 0:
if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
return "fstp%z0\t%y0";
else
return "fst%z0\t%y0";
case 4:
return "#";
default:
abort ();
}
}
[(set_attr "type" "fmov,multi,multi,multi,ssecvt")
(set_attr "mode" "SF,SF,SF,SF,DF")])
(define_insn "*truncdfsf2_mixed_sse2_1"
[(set (match_operand:SF 0 "nonimmediate_operand" "=Y,Y,!m")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "Y,mY,f#Y")))]
"TARGET_SSE2 && TARGET_MIX_SSE_I387
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
{
switch (which_alternative)
{
case 0:
case 1:
return "cvtsd2ss\t{%1, %0|%0, %1}";
case 2:
if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
return "fstp%z0\t%y0";
else
return "fst%z0\t%y0";
default:
abort ();
}
}
[(set_attr "type" "ssecvt,ssecvt,fmov")
(set_attr "athlon_decode" "vector,double,*")
(set_attr "mode" "SF,SF,SF")])
(define_insn "*truncdfsf2_mixed_sse1"
[(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#rx,?r#fx,?x#rf")
(float_truncate:SF
(match_operand:DF 1 "register_operand" "f,f,f,f")))
(clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))]
"TARGET_MIX_SSE_I387"
{
switch (which_alternative)
{
case 0:
if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
return "fstp%z0\t%y0";
else
return "fst%z0\t%y0";
default:
abort ();
}
}
[(set_attr "type" "fmov,multi,multi,multi")
(set_attr "mode" "SF")])
(define_insn "*truncdfsf2_sse_nooverlap"
[(set (match_operand:SF 0 "register_operand" "=&Y")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "mY")))]
"TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS"
"#"
[(set_attr "type" "ssecvt")
(set_attr "mode" "DF")])
(define_insn "truncdfsf2_sse"
[(set (match_operand:SF 0 "register_operand" "=Y,Y")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "Y,mY")))]
"TARGET_SSE2 && TARGET_SSE_MATH"
"cvtsd2ss\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "athlon_decode" "vector,double")
(set_attr "mode" "SF")])
(define_insn "truncdfsf2_i387_noop"
[(set (match_operand:SF 0 "register_operand" "=f")
(float_truncate:SF (match_operand:DF 1 "register_operand" "f")))]
"TARGET_80387 && flag_unsafe_math_optimizations"
{
return output_387_reg_move (insn, operands);
}
[(set_attr "type" "fmov")
(set_attr "mode" "SF")])
(define_insn "*truncdfsf2_i387"
[(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#r,?r#f")
(float_truncate:SF
(match_operand:DF 1 "register_operand" "f,f,f")))
(clobber (match_operand:SF 2 "memory_operand" "=X,m,m"))]
"TARGET_80387"
{
switch (which_alternative)
{
case 0:
if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
return "fstp%z0\t%y0";
else
return "fst%z0\t%y0";
default:
abort ();
}
}
[(set_attr "type" "fmov,multi,multi")
(set_attr "mode" "SF")])
(define_insn "*truncdfsf2_i387_1"
[(set (match_operand:SF 0 "memory_operand" "=m")
(float_truncate:SF
(match_operand:DF 1 "register_operand" "f")))]
"TARGET_80387"
{
if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
return "fstp%z0\t%y0";
else
return "fst%z0\t%y0";
}
[(set_attr "type" "fmov")
(set_attr "mode" "SF")])
; Avoid possible reformatting penalty on the destination by first
; zeroing it out
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "")))
(clobber (match_operand 2 "" ""))]
"TARGET_SSE2 && TARGET_SSE_MATH
&& reload_completed
&& SSE_REG_P (operands[0]) && !STACK_REG_P (operands[1])"
[(const_int 0)]
{
rtx src, dest;
if (!TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS)
emit_insn (gen_truncdfsf2_sse (operands[0], operands[1]));
else
{
dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
/* simplify_gen_subreg refuses to widen memory references. */
if (GET_CODE (src) == SUBREG)
alter_subreg (&src);
if (reg_overlap_mentioned_p (operands[0], operands[1]))
abort ();
emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
emit_insn (gen_cvtsd2ss (dest, dest, src));
}
DONE;
})
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "")))]
"TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS
&& reload_completed
&& SSE_REG_P (operands[0])"
[(const_int 0)]
{
rtx src, dest;
dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
/* simplify_gen_subreg refuses to widen memory references. */
if (GET_CODE (src) == SUBREG)
alter_subreg (&src);
if (reg_overlap_mentioned_p (operands[0], operands[1]))
abort ();
emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
emit_insn (gen_cvtsd2ss (dest, dest, src));
DONE;
})
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(float_truncate:SF
(match_operand:DF 1 "fp_register_operand" "")))
(clobber (match_operand:SF 2 "memory_operand" ""))]
"TARGET_80387
&& reload_completed"
[(set (match_dup 2) (float_truncate:SF (match_dup 1)))
(set (match_dup 0) (match_dup 2))]
"")
(define_split
[(set (match_operand:SF 0 "memory_operand" "")
(float_truncate:SF
(match_operand:DF 1 "register_operand" "")))
(clobber (match_operand:SF 2 "memory_operand" ""))]
"TARGET_80387"
[(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
"")
;; Conversion from XFmode to SFmode.
(define_expand "truncxfsf2"
[(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "")
(float_truncate:SF
(match_operand:XF 1 "register_operand" "")))
(clobber (match_dup 2))])]
"TARGET_80387"
"
if (flag_unsafe_math_optimizations)
{
rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SFmode);
emit_insn (gen_truncxfsf2_i387_noop (reg, operands[1]));
if (reg != operands[0])
emit_move_insn (operands[0], reg);
DONE;
}
else
operands[2] = assign_386_stack_local (SFmode, 0);
")
(define_insn "*truncxfsf2_mixed"
[(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#rx,?r#fx,?x#rf")
(float_truncate:SF
(match_operand:XF 1 "register_operand" "f,f,f,f")))
(clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))]
"TARGET_MIX_SSE_I387"
{
switch (which_alternative)
{
case 0:
if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
return "fstp%z0\t%y0";
else
return "fst%z0\t%y0";
default:
abort();
}
}
[(set_attr "type" "fmov,multi,multi,multi")
(set_attr "mode" "SF")])
(define_insn "truncxfsf2_i387_noop"
[(set (match_operand:SF 0 "register_operand" "=f")
(float_truncate:SF (match_operand:XF 1 "register_operand" "f")))]
"TARGET_80387 && flag_unsafe_math_optimizations"
{
return output_387_reg_move (insn, operands);
}
[(set_attr "type" "fmov")
(set_attr "mode" "SF")])
(define_insn "*truncxfsf2_i387"
[(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#r,?r#f")
(float_truncate:SF
(match_operand:XF 1 "register_operand" "f,f,f")))
(clobber (match_operand:SF 2 "memory_operand" "=X,m,m"))]
"TARGET_80387"
{
switch (which_alternative)
{
case 0:
if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
return "fstp%z0\t%y0";
else
return "fst%z0\t%y0";
default:
abort ();
}
}
[(set_attr "type" "fmov,multi,multi")
(set_attr "mode" "SF")])
(define_insn "*truncxfsf2_i387_1"
[(set (match_operand:SF 0 "memory_operand" "=m")
(float_truncate:SF
(match_operand:XF 1 "register_operand" "f")))]
"TARGET_80387"
{
if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
return "fstp%z0\t%y0";
else
return "fst%z0\t%y0";
}
[(set_attr "type" "fmov")
(set_attr "mode" "SF")])
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(float_truncate:SF
(match_operand:XF 1 "register_operand" "")))
(clobber (match_operand:SF 2 "memory_operand" ""))]
"TARGET_80387 && reload_completed"
[(set (match_dup 2) (float_truncate:SF (match_dup 1)))
(set (match_dup 0) (match_dup 2))]
"")
(define_split
[(set (match_operand:SF 0 "memory_operand" "")
(float_truncate:SF
(match_operand:XF 1 "register_operand" "")))
(clobber (match_operand:SF 2 "memory_operand" ""))]
"TARGET_80387"
[(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
"")
;; Conversion from XFmode to DFmode.
(define_expand "truncxfdf2"
[(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "")
(float_truncate:DF
(match_operand:XF 1 "register_operand" "")))
(clobber (match_dup 2))])]
"TARGET_80387"
"
if (flag_unsafe_math_optimizations)
{
rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DFmode);
emit_insn (gen_truncxfdf2_i387_noop (reg, operands[1]));
if (reg != operands[0])
emit_move_insn (operands[0], reg);
DONE;
}
else
operands[2] = assign_386_stack_local (DFmode, 0);
")
(define_insn "*truncxfdf2_mixed"
[(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f#rY,?r#fY,?Y#rf")
(float_truncate:DF
(match_operand:XF 1 "register_operand" "f,f,f,f")))
(clobber (match_operand:DF 2 "memory_operand" "=X,m,m,m"))]
"TARGET_SSE2 && TARGET_MIX_SSE_I387"
{
switch (which_alternative)
{
case 0:
if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
return "fstp%z0\t%y0";
else
return "fst%z0\t%y0";
default:
abort();
}
abort ();
}
[(set_attr "type" "fmov,multi,multi,multi")
(set_attr "mode" "DF")])
(define_insn "truncxfdf2_i387_noop"
[(set (match_operand:DF 0 "register_operand" "=f")
(float_truncate:DF (match_operand:XF 1 "register_operand" "f")))]
"TARGET_80387 && flag_unsafe_math_optimizations"
{
return output_387_reg_move (insn, operands);
}
[(set_attr "type" "fmov")
(set_attr "mode" "DF")])
(define_insn "*truncxfdf2_i387"
[(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f#r,?r#f")
(float_truncate:DF
(match_operand:XF 1 "register_operand" "f,f,f")))
(clobber (match_operand:DF 2 "memory_operand" "=X,m,m"))]
"TARGET_80387"
{
switch (which_alternative)
{
case 0:
if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
return "fstp%z0\t%y0";
else
return "fst%z0\t%y0";
default:
abort ();
}
}
[(set_attr "type" "fmov,multi,multi")
(set_attr "mode" "DF")])
(define_insn "*truncxfdf2_i387_1"
[(set (match_operand:DF 0 "memory_operand" "=m")
(float_truncate:DF
(match_operand:XF 1 "register_operand" "f")))]
"TARGET_80387"
{
if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
return "fstp%z0\t%y0";
else
return "fst%z0\t%y0";
}
[(set_attr "type" "fmov")
(set_attr "mode" "DF")])
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(float_truncate:DF
(match_operand:XF 1 "register_operand" "")))
(clobber (match_operand:DF 2 "memory_operand" ""))]
"TARGET_80387 && reload_completed"
[(set (match_dup 2) (float_truncate:DF (match_dup 1)))
(set (match_dup 0) (match_dup 2))]
"")
(define_split
[(set (match_operand:DF 0 "memory_operand" "")
(float_truncate:DF
(match_operand:XF 1 "register_operand" "")))
(clobber (match_operand:DF 2 "memory_operand" ""))]
"TARGET_80387"
[(set (match_dup 0) (float_truncate:DF (match_dup 1)))]
"")