[3.4-bib] SSE moves patch
Jan Hubicka
jh@suse.cz
Mon Oct 7 16:27:00 GMT 2002
> On Mon, Oct 07, 2002 at 12:06:38AM +0200, Jan Hubicka wrote:
> > * i386.c (x86_sse_partial_reg_dependency, x86_sse_partial_regs,
> > x86_sse_typeless_stores, x86_sse_load0_by_pxor): New global
> > variables.
> > * i386.h (x86_sse_partial_reg_dependency, x86_sse_partial_regs,
> > x86_sse_typeless_stores, x86_sse_load0_by_pxor): Declare.
> > (TARGET_SSE_PARTIAL_REG_DEPENDENCY, TARGET_SSE_PARTIAL_REGS,
> > TARGET_SSE_TYPELESS_STORES, TARGET_SSE_TYPELESS_LOAD0): New
> > macros.
> > * i386.md (movsf*, movdf*, movti, movv4sf, movv2df, movv16qi, movv8hi,
> > movv4si): Obey the new flags.
> > (floatsi2sf, floatdi2sf, truncatedf2sf): Emit extra load of 0 to avoid
> > reformating penalty.
> > (anddf, cmov patterns): Avoid reformating by first converting.
> > (sse_cvtsd2ss): Fix predicate.
> > (sse2_clrti): Fix mode,
> > (sse2_clrv4sf): New.
>
> Ok.
Hi,
There has been missed case of truncatedfsf pattern that caused GCC to
abort in 64bit mode and I also noticed that I've created duplicate of
sse_clrv4sf just without the unnecesary unspec.
I've installed the attached patch with those two problems fixed.
Thanks!
Sun Oct 6 22:53:18 CEST 2002 Jan Hubicka <jh@suse.cz>
* i386.c (x86_sse_partial_reg_dependency, x86_sse_partial_regs,
x86_sse_typeless_stores, x86_sse_load0_by_pxor): New global
variables.
(safe_vector_operand): Update sse_clrv4sf call.
(ix86_expand_buildin): Likewise
* i386.h (x86_sse_partial_reg_dependency, x86_sse_partial_regs,
x86_sse_typeless_stores, x86_sse_load0_by_pxor): Declare.
(TARGET_SSE_PARTIAL_REG_DEPENDENCY, TARGET_SSE_PARTIAL_REGS,
TARGET_SSE_TYPELESS_STORES, TARGET_SSE_TYPELESS_LOAD0): New
macros.
* i386.md (movsf*, movdf*, movti, movv4sf, movv2df, movv16qi, movv8hi,
movv4si): Obey the new flags.
(floatsi2sf, floatdi2sf, truncatedf2sf): Emit extra load of 0 to avoid
reformating penalty.
(anddf, cmov patterns): Avoid reformating by first converting.
(sse_cvtsd2ss): Fix predicate.
(sse2_clrti): Fix mode,
(sse_clrv4sf): Avoid unspec.
*** i386.c.old1 Sun Oct 6 02:18:27 2002
--- i386.c Mon Oct 7 12:55:51 2002
*************** const int x86_epilogue_using_move = m_AT
*** 404,409 ****
--- 404,416 ----
const int x86_decompose_lea = m_PENT4;
const int x86_shift1 = ~m_486;
const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
+ const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
+ /* Set for machines where the type and dependencies are resolved on SSE register
+ parts insetad of whole registers, so we may maintain just lower part of
+ scalar values in proper format leaving the upper part undefined. */
+ const int x86_sse_partial_regs = m_ATHLON;
+ const int x86_sse_typeless_stores = m_ATHLON;
+ const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
/* In case the avreage insn count for single function invocation is
lower than this constant, emit fast (but longer) prologue and
*************** safe_vector_operand (x, mode)
*** 12583,12589 ****
: gen_rtx_SUBREG (DImode, x, 0)));
else
emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
! : gen_rtx_SUBREG (V4SFmode, x, 0)));
return x;
}
--- 12590,12597 ----
: gen_rtx_SUBREG (DImode, x, 0)));
else
emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
! : gen_rtx_SUBREG (V4SFmode, x, 0),
! CONST0_RTX (V4SFmode)));
return x;
}
*************** ix86_expand_builtin (exp, target, subtar
*** 13273,13279 ****
case IX86_BUILTIN_SSE_ZERO:
target = gen_reg_rtx (V4SFmode);
! emit_insn (gen_sse_clrv4sf (target));
return target;
case IX86_BUILTIN_MMX_ZERO:
--- 13281,13287 ----
case IX86_BUILTIN_SSE_ZERO:
target = gen_reg_rtx (V4SFmode);
! emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
return target;
case IX86_BUILTIN_MMX_ZERO:
*** i386.h.old1 Sun Oct 6 02:18:31 2002
--- i386.h Mon Oct 7 01:58:04 2002
*************** extern const int x86_partial_reg_depende
*** 207,212 ****
--- 207,214 ----
extern const int x86_accumulate_outgoing_args, x86_prologue_using_move;
extern const int x86_epilogue_using_move, x86_decompose_lea;
extern const int x86_arch_always_fancy_math_387, x86_shift1;
+ extern const int x86_sse_partial_reg_dependency, x86_sse_partial_regs;
+ extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor;
extern int x86_prefetch_sse;
#define TARGET_USE_LEAVE (x86_use_leave & CPUMASK)
*************** extern int x86_prefetch_sse;
*** 243,248 ****
--- 245,256 ----
#define TARGET_SUB_ESP_8 (x86_sub_esp_8 & CPUMASK)
#define TARGET_INTEGER_DFMODE_MOVES (x86_integer_DFmode_moves & CPUMASK)
#define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & CPUMASK)
+ #define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
+ (x86_sse_partial_reg_dependency & CPUMASK)
+ #define TARGET_SSE_PARTIAL_REGS (x86_sse_partial_regs & CPUMASK)
+ #define TARGET_SSE_TYPELESS_STORES (x86_sse_typeless_stores & CPUMASK)
+ #define TARGET_SSE_TYPELESS_LOAD0 (x86_sse_typeless_load0 & CPUMASK)
+ #define TARGET_SSE_LOAD0_BY_PXOR (x86_sse_load0_by_pxor & CPUMASK)
#define TARGET_MEMORY_MISMATCH_STALL (x86_memory_mismatch_stall & CPUMASK)
#define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & CPUMASK)
#define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & CPUMASK)
*** i386.md.old Sun Oct 6 22:52:23 2002
--- i386.md Mon Oct 7 13:22:06 2002
***************
*** 2133,2144 ****
case 4:
return "mov{l}\t{%1, %0|%0, %1}";
case 5:
! if (TARGET_SSE2 && !TARGET_ATHLON)
return "pxor\t%0, %0";
else
return "xorps\t%0, %0";
case 6:
! if (TARGET_PARTIAL_REG_DEPENDENCY)
return "movaps\t{%1, %0|%0, %1}";
else
return "movss\t{%1, %0|%0, %1}";
--- 2133,2144 ----
case 4:
return "mov{l}\t{%1, %0|%0, %1}";
case 5:
! if (get_attr_mode (insn) == MODE_TI)
return "pxor\t%0, %0";
else
return "xorps\t%0, %0";
case 6:
! if (get_attr_mode (insn) == MODE_V4SF)
return "movaps\t{%1, %0|%0, %1}";
else
return "movss\t{%1, %0|%0, %1}";
***************
*** 2158,2164 ****
}
}
[(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov")
! (set_attr "mode" "SF,SF,SF,SI,SI,TI,SF,SF,SF,SI,SI,DI")])
(define_insn "*swapsf"
[(set (match_operand:SF 0 "register_operand" "+f")
--- 2158,2197 ----
}
}
[(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov")
! (set (attr "mode")
! (cond [(eq_attr "alternative" "3,4,9,10")
! (const_string "SI")
! (eq_attr "alternative" "5")
! (if_then_else
! (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
! (const_int 0))
! (ne (symbol_ref "TARGET_SSE2")
! (const_int 0)))
! (eq (symbol_ref "optimize_size")
! (const_int 0)))
! (const_string "TI")
! (const_string "V4SF"))
! /* For architectures resolving dependencies on
! whole SSE registers use APS move to break dependency
! chains, otherwise use short move to avoid extra work.
!
! Do the same for architectures resolving dependencies on
! the parts. While in DF mode it is better to always handle
! just register parts, the SF mode is different due to lack
! of instructions to load just part of the register. It is
! better to maintain the whole registers in single format
! to avoid problems on using packed logical operations. */
! (eq_attr "alternative" "6")
! (if_then_else
! (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
! (const_int 0))
! (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
! (const_int 0)))
! (const_string "V4SF")
! (const_string "SF"))
! (eq_attr "alternative" "11")
! (const_string "DI")]
! (const_string "SF")))])
(define_insn "*swapsf"
[(set (match_operand:SF 0 "register_operand" "+f")
***************
*** 2319,2343 ****
case 4:
return "#";
case 5:
! if (TARGET_ATHLON)
! return "xorpd\t%0, %0";
! else
! return "pxor\t%0, %0";
case 6:
! if (TARGET_PARTIAL_REG_DEPENDENCY)
! return "movapd\t{%1, %0|%0, %1}";
else
return "movsd\t{%1, %0|%0, %1}";
- case 7:
case 8:
! return "movsd\t{%1, %0|%0, %1}";
default:
abort();
}
}
[(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
! (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")])
(define_insn "*movdf_integer"
[(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m")
--- 2352,2429 ----
case 4:
return "#";
case 5:
! switch (get_attr_mode (insn))
! {
! case MODE_V4SF:
! return "xorps\t%0, %0";
! case MODE_V2DF:
! return "xorpd\t%0, %0";
! case MODE_TI:
! return "pxor\t%0, %0";
! default:
! abort ();
! }
case 6:
! switch (get_attr_mode (insn))
! {
! case MODE_V4SF:
! return "movaps\t{%1, %0|%0, %1}";
! case MODE_V2DF:
! return "movapd\t{%1, %0|%0, %1}";
! case MODE_DF:
! return "movsd\t{%1, %0|%0, %1}";
! default:
! abort ();
! }
! case 7:
! if (get_attr_mode (insn) == MODE_V2DF)
! return "movlpd\t{%1, %0|%0, %1}";
else
return "movsd\t{%1, %0|%0, %1}";
case 8:
! return "movsd\t{%1, %0|%0, %1}";
default:
abort();
}
}
[(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
! (set (attr "mode")
! (cond [(eq_attr "alternative" "3,4")
! (const_string "SI")
! /* xorps is one byte shorter. */
! (eq_attr "alternative" "5")
! (cond [(ne (symbol_ref "optimize_size")
! (const_int 0))
! (const_string "V4SF")
! (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
! (const_int 0))
! (const_string "TI")]
! (const_string "V2DF"))
! /* For architectures resolving dependencies on
! whole SSE registers use APD move to break dependency
! chains, otherwise use short move to avoid extra work.
!
! movaps encodes one byte shorter. */
! (eq_attr "alternative" "6")
! (cond
! [(ne (symbol_ref "optimize_size")
! (const_int 0))
! (const_string "V4SF")
! (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
! (const_int 0))
! (const_string "V2DF")]
! (const_string "DF"))
! /* For achitectures resolving dependencies on register
! parts we may avoid extra work to zero out upper part
! of register. */
! (eq_attr "alternative" "7")
! (if_then_else
! (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
! (const_int 0))
! (const_string "V2DF")
! (const_string "DF"))]
! (const_string "DF")))])
(define_insn "*movdf_integer"
[(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m")
***************
*** 2381,2396 ****
return "#";
case 5:
! if (TARGET_ATHLON)
! return "xorpd\t%0, %0";
! else
! return "pxor\t%0, %0";
case 6:
! if (TARGET_PARTIAL_REG_DEPENDENCY)
! return "movapd\t{%1, %0|%0, %1}";
else
return "movsd\t{%1, %0|%0, %1}";
- case 7:
case 8:
return "movsd\t{%1, %0|%0, %1}";
--- 2467,2500 ----
return "#";
case 5:
! switch (get_attr_mode (insn))
! {
! case MODE_V4SF:
! return "xorps\t%0, %0";
! case MODE_V2DF:
! return "xorpd\t%0, %0";
! case MODE_TI:
! return "pxor\t%0, %0";
! default:
! abort ();
! }
case 6:
! switch (get_attr_mode (insn))
! {
! case MODE_V4SF:
! return "movaps\t{%1, %0|%0, %1}";
! case MODE_V2DF:
! return "movapd\t{%1, %0|%0, %1}";
! case MODE_DF:
! return "movsd\t{%1, %0|%0, %1}";
! default:
! abort ();
! }
! case 7:
! if (get_attr_mode (insn) == MODE_V2DF)
! return "movlpd\t{%1, %0|%0, %1}";
else
return "movsd\t{%1, %0|%0, %1}";
case 8:
return "movsd\t{%1, %0|%0, %1}";
***************
*** 2399,2405 ****
}
}
[(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
! (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")])
(define_split
[(set (match_operand:DF 0 "nonimmediate_operand" "")
--- 2503,2544 ----
}
}
[(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
! (set (attr "mode")
! (cond [(eq_attr "alternative" "3,4")
! (const_string "SI")
! /* xorps is one byte shorter. */
! (eq_attr "alternative" "5")
! (cond [(ne (symbol_ref "optimize_size")
! (const_int 0))
! (const_string "V4SF")
! (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
! (const_int 0))
! (const_string "TI")]
! (const_string "V2DF"))
! /* For architectures resolving dependencies on
! whole SSE registers use APD move to break dependency
! chains, otherwise use short move to avoid extra work.
!
! movaps encodes one byte shorter. */
! (eq_attr "alternative" "6")
! (cond
! [(ne (symbol_ref "optimize_size")
! (const_int 0))
! (const_string "V4SF")
! (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
! (const_int 0))
! (const_string "V2DF")]
! (const_string "DF"))
! /* For achitectures resolving dependencies on register
! parts we may avoid extra work to zero out upper part
! of register. */
! (eq_attr "alternative" "7")
! (if_then_else
! (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
! (const_int 0))
! (const_string "V2DF")
! (const_string "DF"))]
! (const_string "DF")))])
(define_split
[(set (match_operand:DF 0 "nonimmediate_operand" "")
***************
*** 3706,3712 ****
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "f,f,f,f,mY")))
(clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))]
! "TARGET_80387 && TARGET_SSE2"
{
switch (which_alternative)
{
--- 3845,3851 ----
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "f,f,f,f,mY")))
(clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))]
! "TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS"
{
switch (which_alternative)
{
***************
*** 3716,3722 ****
else
return "fst%z0\t%y0";
case 4:
! return "cvtsd2ss\t{%1, %0|%0, %1}";
default:
abort ();
}
--- 3855,3884 ----
else
return "fst%z0\t%y0";
case 4:
! return "#";
! default:
! abort ();
! }
! }
! [(set_attr "type" "fmov,multi,multi,multi,ssecvt")
! (set_attr "mode" "SF,SF,SF,SF,DF")])
!
! (define_insn "*truncdfsf2_1_sse_nooverlap"
! [(set (match_operand:SF 0 "nonimmediate_operand" "=*!m,?f#rx,?r#fx,?x#rf,&Y")
! (float_truncate:SF
! (match_operand:DF 1 "nonimmediate_operand" "f,f,f,f,mY")))
! (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))]
! "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS"
! {
! switch (which_alternative)
! {
! case 0:
! if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
! return "fstp%z0\t%y0";
! else
! return "fst%z0\t%y0";
! case 4:
! return "#";
default:
abort ();
}
***************
*** 3728,3734 ****
[(set (match_operand:SF 0 "nonimmediate_operand" "=Y,!m")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "mY,f")))]
! "TARGET_80387 && TARGET_SSE2
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
{
switch (which_alternative)
--- 3890,3896 ----
[(set (match_operand:SF 0 "nonimmediate_operand" "=Y,!m")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "mY,f")))]
! "TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
{
switch (which_alternative)
***************
*** 3747,3753 ****
[(set_attr "type" "ssecvt,fmov")
(set_attr "mode" "DF,SF")])
! (define_insn "truncdfsf2_3"
[(set (match_operand:SF 0 "memory_operand" "=m")
(float_truncate:SF
(match_operand:DF 1 "register_operand" "f")))]
--- 3909,3938 ----
[(set_attr "type" "ssecvt,fmov")
(set_attr "mode" "DF,SF")])
! (define_insn "*truncdfsf2_2_nooverlap"
! [(set (match_operand:SF 0 "nonimmediate_operand" "=&Y,!m")
! (float_truncate:SF
! (match_operand:DF 1 "nonimmediate_operand" "mY,f")))]
! "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS
! && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
! {
! switch (which_alternative)
! {
! case 0:
! return "#";
! case 1:
! if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
! return "fstp%z0\t%y0";
! else
! return "fst%z0\t%y0";
! default:
! abort ();
! }
! }
! [(set_attr "type" "ssecvt,fmov")
! (set_attr "mode" "DF,SF")])
!
! (define_insn "*truncdfsf2_3"
[(set (match_operand:SF 0 "memory_operand" "=m")
(float_truncate:SF
(match_operand:DF 1 "register_operand" "f")))]
***************
*** 3765,3775 ****
[(set (match_operand:SF 0 "register_operand" "=Y")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "mY")))]
! "!TARGET_80387 && TARGET_SSE2"
"cvtsd2ss\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "DF")])
(define_split
[(set (match_operand:SF 0 "memory_operand" "")
(float_truncate:SF
--- 3950,3969 ----
[(set (match_operand:SF 0 "register_operand" "=Y")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "mY")))]
! "!TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS"
"cvtsd2ss\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "DF")])
+ (define_insn "*truncdfsf2_sse_only_nooverlap"
+ [(set (match_operand:SF 0 "register_operand" "=&Y")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "mY")))]
+ "!TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS"
+ "#"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DF")])
+
(define_split
[(set (match_operand:SF 0 "memory_operand" "")
(float_truncate:SF
***************
*** 3779,3793 ****
[(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
"")
(define_split
! [(set (match_operand:SF 0 "nonimmediate_operand" "")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "")))
(clobber (match_operand 2 "" ""))]
"TARGET_80387 && reload_completed
! && !FP_REG_P (operands[0]) && !FP_REG_P (operands[1])"
! [(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
! "")
(define_split
[(set (match_operand:SF 0 "register_operand" "")
--- 3973,4027 ----
[(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
"")
+ ; Avoid possible reformating penalty on the destination by first
+ ; zeroing it out
(define_split
! [(set (match_operand:SF 0 "register_operand" "")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "")))
(clobber (match_operand 2 "" ""))]
"TARGET_80387 && reload_completed
! && SSE_REG_P (operands[0])"
! [(const_int 0)]
! {
! rtx src, dest;
! if (!TARGET_SSE_PARTIAL_REGS)
! emit_insn (gen_truncdfsf2_sse_only (operands[0], operands[1]));
! else
! {
! dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
! src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
! /* simplify_gen_subreg refuses to widen memory references. */
! if (GET_CODE (src) == SUBREG)
! alter_subreg (&src);
! if (reg_overlap_mentioned_p (operands[0], operands[1]))
! abort ();
! emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
! emit_insn (gen_cvtsd2ss (dest, dest, src));
! }
! DONE;
! })
!
! (define_split
! [(set (match_operand:SF 0 "register_operand" "")
! (float_truncate:SF
! (match_operand:DF 1 "nonimmediate_operand" "")))]
! "TARGET_80387 && reload_completed
! && SSE_REG_P (operands[0]) && TARGET_SSE_PARTIAL_REGS"
! [(const_int 0)]
! {
! rtx src, dest;
! dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
! src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
! /* simplify_gen_subreg refuses to widen memory references. */
! if (GET_CODE (src) == SUBREG)
! alter_subreg (&src);
! if (reg_overlap_mentioned_p (operands[0], operands[1]))
! abort ();
! emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
! emit_insn (gen_cvtsd2ss (dest, dest, src));
! DONE;
! })
(define_split
[(set (match_operand:SF 0 "register_operand" "")
***************
*** 4491,4496 ****
--- 4725,4746 ----
(set_attr "mode" "SF")
(set_attr "fp_int_src" "true")])
+ ; Avoid possible reformating penalty on the destination by first
+ ; zeroing it out
+ (define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 && reload_completed
+ && SSE_REG_P (operands[0]) && TARGET_SSE_PARTIAL_REGS"
+ [(const_int 0)]
+ {
+ rtx dest;
+ dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+ emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
+ emit_insn (gen_cvtsi2ss (dest, dest, operands[1]));
+ DONE;
+ })
+
(define_expand "floatdisf2"
[(set (match_operand:SF 0 "register_operand" "")
(float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
***************
*** 4529,4534 ****
--- 4779,4800 ----
(set_attr "mode" "SF")
(set_attr "fp_int_src" "true")])
+ ; Avoid possible reformating penalty on the destination by first
+ ; zeroing it out
+ (define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 && reload_completed
+ && SSE_REG_P (operands[0]) && TARGET_SSE_PARTIAL_REGS"
+ [(const_int 0)]
+ {
+ rtx dest;
+ dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+ emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
+ emit_insn (gen_cvtsi2ssq (dest, dest, operands[1]));
+ DONE;
+ })
+
(define_insn "floathidf2"
[(set (match_operand:DF 0 "register_operand" "=f,f")
(float:DF (match_operand:HI 1 "nonimmediate_operand" "m,r")))]
***************
*** 9492,9497 ****
--- 9758,9771 ----
(xor:TI (subreg:TI (match_dup 1) 0)
(subreg:TI (match_dup 2) 0)))]
{
+ /* Avoid possible reformating on the operands. */
+ if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
+ {
+ rtx op = gen_rtx_SUBREG (V2DFmode, operands[1], 0);
+ emit_insn (gen_sse2_unpcklpd (op, op, op));
+ op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
+ emit_insn (gen_sse2_unpcklpd (op, op, op));
+ }
if (operands_match_p (operands[0], operands[2]))
{
rtx tmp;
***************
*** 9925,9931 ****
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
(and:TI (not:TI (subreg:TI (match_dup 2) 0))
! (subreg:TI (match_dup 1) 0)))])
;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
--- 10199,10215 ----
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
(and:TI (not:TI (subreg:TI (match_dup 2) 0))
! (subreg:TI (match_dup 1) 0)))]
! {
! /* Avoid possible reformating on the operands. */
! if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
! {
! rtx op = gen_rtx_SUBREG (V2DFmode, operands[1], 0);
! emit_insn (gen_sse2_unpcklpd (op, op, op));
! op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
! emit_insn (gen_sse2_unpcklpd (op, op, op));
! }
! })
;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
***************
*** 16596,16601 ****
--- 16880,16893 ----
(set (subreg:TI (match_dup 0) 0) (ior:TI (subreg:TI (match_dup 6) 0)
(subreg:TI (match_dup 7) 0)))]
{
+ if (GET_MODE (operands[2]) == DFmode
+ && TARGET_SSE_PARTIAL_REGS && !optimize_size)
+ {
+ rtx op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
+ emit_insn (gen_sse2_unpcklpd (op, op, op));
+ op = gen_rtx_SUBREG (V2DFmode, operands[3], 0);
+ emit_insn (gen_sse2_unpcklpd (op, op, op));
+ }
/* If op2 == op3, op3 will be clobbered before it is used.
This should be optimized out though. */
if (operands_match_p (operands[2], operands[3]))
***************
*** 16704,16709 ****
--- 16996,17015 ----
(set (subreg:TI (match_dup 0) 0) (and:TI (match_dup 6)
(subreg:TI (match_dup 7) 0)))]
{
+ if (TARGET_SSE_PARTIAL_REGS && !optimize_size
+ && GET_MODE (operands[2]) == DFmode)
+ {
+ if (REG_P (operands[2]))
+ {
+ rtx op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
+ emit_insn (gen_sse2_unpcklpd (op, op, op));
+ }
+ if (REG_P (operands[3]))
+ {
+ rtx op = gen_rtx_SUBREG (V2DFmode, operands[3], 0);
+ emit_insn (gen_sse2_unpcklpd (op, op, op));
+ }
+ }
PUT_MODE (operands[1], GET_MODE (operands[0]));
if (!sse_comparison_operator (operands[1], VOIDmode))
{
***************
*** 17810,17816 ****
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
(match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE"
- ;; @@@ let's try to use movaps here.
"movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF")])
--- 18116,18121 ----
***************
*** 17819,17828 ****
[(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
(match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE"
! ;; @@@ let's try to use movaps here.
! "movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
! (set_attr "mode" "V4SF")])
(define_insn "movv8qi_internal"
[(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
--- 18124,18152 ----
[(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
(match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE"
! {
! if (get_attr_mode (insn) == MODE_V4SF)
! return "movaps\t{%1, %0|%0, %1}";
! else
! return "movdqa\t{%1, %0|%0, %1}";
! }
[(set_attr "type" "ssemov")
! (set (attr "mode")
! (cond [(eq_attr "alternative" "0")
! (if_then_else
! (ne (symbol_ref "optimize_size")
! (const_int 0))
! (const_string "V4SF")
! (const_string "TI"))
! (eq_attr "alternative" "1")
! (if_then_else
! (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
! (const_int 0))
! (ne (symbol_ref "optimize_size")
! (const_int 0)))
! (const_string "V4SF")
! (const_string "TI"))]
! (const_string "TI")))])
(define_insn "movv8qi_internal"
[(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
***************
*** 17872,17899 ****
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
(match_operand:V2DF 1 "general_operand" "xm,x"))]
"TARGET_SSE2"
! ;; @@@ let's try to use movaps here.
! "movapd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
! (set_attr "mode" "V2DF")])
(define_insn "movv8hi_internal"
[(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
(match_operand:V8HI 1 "general_operand" "xm,x"))]
"TARGET_SSE2"
! ;; @@@ let's try to use movaps here.
! "movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
! (set_attr "mode" "V4SF")])
(define_insn "movv16qi_internal"
[(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
(match_operand:V16QI 1 "general_operand" "xm,x"))]
"TARGET_SSE2"
! ;; @@@ let's try to use movaps here.
! "movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
! (set_attr "mode" "V4SF")])
(define_expand "movv2df"
[(set (match_operand:V2DF 0 "general_operand" "")
--- 18196,18280 ----
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
(match_operand:V2DF 1 "general_operand" "xm,x"))]
"TARGET_SSE2"
! {
! if (get_attr_mode (insn) == MODE_V4SF)
! return "movaps\t{%1, %0|%0, %1}";
! else
! return "movapd\t{%1, %0|%0, %1}";
! }
[(set_attr "type" "ssemov")
! (set (attr "mode")
! (cond [(eq_attr "alternative" "0")
! (if_then_else
! (ne (symbol_ref "optimize_size")
! (const_int 0))
! (const_string "V4SF")
! (const_string "V2DF"))
! (eq_attr "alternative" "1")
! (if_then_else
! (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
! (const_int 0))
! (ne (symbol_ref "optimize_size")
! (const_int 0)))
! (const_string "V4SF")
! (const_string "V2DF"))]
! (const_string "V2DF")))])
(define_insn "movv8hi_internal"
[(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
(match_operand:V8HI 1 "general_operand" "xm,x"))]
"TARGET_SSE2"
! {
! if (get_attr_mode (insn) == MODE_V4SF)
! return "movaps\t{%1, %0|%0, %1}";
! else
! return "movdqa\t{%1, %0|%0, %1}";
! }
[(set_attr "type" "ssemov")
! (set (attr "mode")
! (cond [(eq_attr "alternative" "0")
! (if_then_else
! (ne (symbol_ref "optimize_size")
! (const_int 0))
! (const_string "V4SF")
! (const_string "TI"))
! (eq_attr "alternative" "1")
! (if_then_else
! (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
! (const_int 0))
! (ne (symbol_ref "optimize_size")
! (const_int 0)))
! (const_string "V4SF")
! (const_string "TI"))]
! (const_string "TI")))])
(define_insn "movv16qi_internal"
[(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
(match_operand:V16QI 1 "general_operand" "xm,x"))]
"TARGET_SSE2"
! {
! if (get_attr_mode (insn) == MODE_V4SF)
! return "movaps\t{%1, %0|%0, %1}";
! else
! return "movdqa\t{%1, %0|%0, %1}";
! }
[(set_attr "type" "ssemov")
! (set (attr "mode")
! (cond [(eq_attr "alternative" "0")
! (if_then_else
! (ne (symbol_ref "optimize_size")
! (const_int 0))
! (const_string "V4SF")
! (const_string "TI"))
! (eq_attr "alternative" "1")
! (if_then_else
! (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
! (const_int 0))
! (ne (symbol_ref "optimize_size")
! (const_int 0)))
! (const_string "V4SF")
! (const_string "TI"))]
! (const_string "TI")))])
(define_expand "movv2df"
[(set (match_operand:V2DF 0 "general_operand" "")
***************
*** 18090,18115 ****
[(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
(match_operand:TI 1 "general_operand" "O,xm,x"))]
"TARGET_SSE && !TARGET_64BIT"
! "@
! xorps\t%0, %0
! movaps\t{%1, %0|%0, %1}
! movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov,ssemov,ssemov")
! (set_attr "mode" "V4SF")])
(define_insn "*movti_rex64"
[(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x")
(match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))]
"TARGET_64BIT
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
! "@
! #
! #
! xorps\t%0, %0
! movaps\\t{%1, %0|%0, %1}
! movaps\\t{%1, %0|%0, %1}"
[(set_attr "type" "*,*,ssemov,ssemov,ssemov")
! (set_attr "mode" "V4SF")])
(define_split
[(set (match_operand:TI 0 "nonimmediate_operand" "")
--- 18471,18553 ----
[(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
(match_operand:TI 1 "general_operand" "O,xm,x"))]
"TARGET_SSE && !TARGET_64BIT"
! {
! switch (which_alternative)
! {
! case 0:
! if (get_attr_mode (insn) == MODE_V4SF)
! return "xorps\t%0, %0";
! else
! return "pxor\t%0, %0";
! case 1:
! case 2:
! if (get_attr_mode (insn) == MODE_V4SF)
! return "movaps\t{%1, %0|%0, %1}";
! else
! return "movdqa\t{%1, %0|%0, %1}";
! default:
! abort ();
! }
! }
[(set_attr "type" "ssemov,ssemov,ssemov")
! (set (attr "mode")
! (cond [(eq_attr "alternative" "0,1")
! (if_then_else
! (ne (symbol_ref "optimize_size")
! (const_int 0))
! (const_string "V4SF")
! (const_string "TI"))
! (eq_attr "alternative" "2")
! (if_then_else
! (ne (symbol_ref "optimize_size")
! (const_int 0))
! (const_string "V4SF")
! (const_string "TI"))]
! (const_string "TI")))])
(define_insn "*movti_rex64"
[(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x")
(match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))]
"TARGET_64BIT
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
! {
! switch (which_alternative)
! {
! case 0:
! case 1:
! return "#";
! case 2:
! if (get_attr_mode (insn) == MODE_V4SF)
! return "xorps\t%0, %0";
! else
! return "pxor\t%0, %0";
! case 3:
! case 4:
! if (get_attr_mode (insn) == MODE_V4SF)
! return "movaps\t{%1, %0|%0, %1}";
! else
! return "movdqa\t{%1, %0|%0, %1}";
! default:
! abort ();
! }
! }
[(set_attr "type" "*,*,ssemov,ssemov,ssemov")
! (set (attr "mode")
! (cond [(eq_attr "alternative" "2,3")
! (if_then_else
! (ne (symbol_ref "optimize_size")
! (const_int 0))
! (const_string "V4SF")
! (const_string "TI"))
! (eq_attr "alternative" "4")
! (if_then_else
! (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
! (const_int 0))
! (ne (symbol_ref "optimize_size")
! (const_int 0)))
! (const_string "V4SF")
! (const_string "TI"))]
! (const_string "DI")))])
(define_split
[(set (match_operand:TI 0 "nonimmediate_operand" "")
***************
*** 18709,18720 ****
;; this insn.
(define_insn "sse_clrv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (unspec:V4SF [(const_int 0)] UNSPEC_NOP))]
"TARGET_SSE"
! "xorps\t{%0, %0|%0, %0}"
[(set_attr "type" "sselog")
(set_attr "memory" "none")
! (set_attr "mode" "V4SF")])
;; SSE mask-generating compares
--- 19147,19172 ----
;; this insn.
(define_insn "sse_clrv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (match_operand:V4SF 1 "const0_operand" "X"))]
"TARGET_SSE"
! {
! if (get_attr_mode (insn) == MODE_TI)
! return "pxor\t{%0, %0|%0, %0}";
! else
! return "xorps\t{%0, %0|%0, %0}";
! }
[(set_attr "type" "sselog")
(set_attr "memory" "none")
! (set (attr "mode")
! (if_then_else
! (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
! (const_int 0))
! (ne (symbol_ref "TARGET_SSE2")
! (const_int 0)))
! (eq (symbol_ref "optimize_size")
! (const_int 0)))
! (const_string "TI")
! (const_string "V4SF")))])
;; SSE mask-generating compares
***************
*** 18938,18943 ****
--- 19390,19407 ----
[(set_attr "type" "ssecvt")
(set_attr "mode" "SF")])
+ (define_insn "cvtsi2ssq"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ (const_int 14)))]
+ "TARGET_SSE && TARGET_64BIT"
+ "cvtsi2ssq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SF")])
+
(define_insn "cvtss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
(vec_select:SI
***************
*** 20586,20592 ****
(vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
(vec_duplicate:V4SF
(float_truncate:V2SF
! (match_operand:V2DF 2 "register_operand" "xm")))
(const_int 14)))]
"TARGET_SSE2"
"cvtsd2ss\t{%2, %0|%0, %2}"
--- 21050,21056 ----
(vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
(vec_duplicate:V4SF
(float_truncate:V2SF
! (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
(const_int 14)))]
"TARGET_SSE2"
"cvtsd2ss\t{%2, %0|%0, %2}"
***************
*** 20598,20604 ****
(vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0")
(float_extend:V2DF
(vec_select:V2SF
! (match_operand:V4SF 2 "register_operand" "xm")
(parallel [(const_int 0)
(const_int 1)])))
(const_int 2)))]
--- 21062,21068 ----
(vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0")
(float_extend:V2DF
(vec_select:V2SF
! (match_operand:V4SF 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
(const_int 1)])))
(const_int 2)))]
***************
*** 20874,20883 ****
(define_insn "sse2_clrti"
[(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))]
"TARGET_SSE2"
! "pxor\t{%0, %0|%0, %0}"
! [(set_attr "type" "sseiadd")
(set_attr "memory" "none")
! (set_attr "mode" "TI")])
;; MMX unsigned averages/sum of absolute differences
--- 21338,21357 ----
(define_insn "sse2_clrti"
[(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))]
"TARGET_SSE2"
! {
! if (get_attr_mode (insn) == MODE_TI)
! return "pxor\t%0, %0";
! else
! return "xorps\t%0, %0";
! }
! [(set_attr "type" "ssemov")
(set_attr "memory" "none")
! (set (attr "mode")
! (if_then_else
! (ne (symbol_ref "optimize_size")
! (const_int 0))
! (const_string "V4SF")
! (const_string "TI")))])
;; MMX unsigned averages/sum of absolute differences
More information about the Gcc-patches
mailing list