[3.4-bib] SSE moves patch

Jan Hubicka jh@suse.cz
Mon Oct 7 16:27:00 GMT 2002


> On Mon, Oct 07, 2002 at 12:06:38AM +0200, Jan Hubicka wrote:
> > 	* i386.c (x86_sse_partial_reg_dependency, x86_sse_partial_regs,
> > 	x86_sse_typeless_stores, x86_sse_load0_by_pxor): New global
> > 	variables.
> > 	* i386.h (x86_sse_partial_reg_dependency, x86_sse_partial_regs,
> > 	x86_sse_typeless_stores, x86_sse_load0_by_pxor): Declare.
> > 	(TARGET_SSE_PARTIAL_REG_DEPENDENCY, TARGET_SSE_PARTIAL_REGS,
> > 	TARGET_SSE_TYPELESS_STORES, TARGET_SSE_TYPELESS_LOAD0): New
> > 	macros.
> > 	* i386.md (movsf*, movdf*, movti, movv4sf, movv2df, movv16qi, movv8hi,
> > 	movv4si):  Obey the new flags.
> > 	(floatsi2sf, floatdi2sf, truncatedf2sf): Emit extra load of 0 to avoid
> > 	reformating penalty.
> > 	(anddf, cmov patterns): Avoid reformating by first converting.
> > 	(sse_cvtsd2ss): Fix predicate.
> > 	(sse2_clrti): Fix mode,
> > 	(sse2_clrv4sf): New.
> 
> Ok.
Hi,
There has been missed case of truncatedfsf pattern that caused GCC to
abort in 64bit mode and I also noticed that I've created duplicate of
sse_clrv4sf just without the unnecesary unspec.
I've installed the attached patch with those two problems fixed.
Thanks!

Sun Oct  6 22:53:18 CEST 2002  Jan Hubicka  <jh@suse.cz>
	* i386.c (x86_sse_partial_reg_dependency, x86_sse_partial_regs,
	x86_sse_typeless_stores, x86_sse_load0_by_pxor): New global
	variables.
	(safe_vector_operand): Update sse_clrv4sf call.
	(ix86_expand_buildin): Likewise
	* i386.h (x86_sse_partial_reg_dependency, x86_sse_partial_regs,
	x86_sse_typeless_stores, x86_sse_load0_by_pxor): Declare.
	(TARGET_SSE_PARTIAL_REG_DEPENDENCY, TARGET_SSE_PARTIAL_REGS,
	TARGET_SSE_TYPELESS_STORES, TARGET_SSE_TYPELESS_LOAD0): New
	macros.
	* i386.md (movsf*, movdf*, movti, movv4sf, movv2df, movv16qi, movv8hi,
	movv4si):  Obey the new flags.
	(floatsi2sf, floatdi2sf, truncatedf2sf): Emit extra load of 0 to avoid
	reformating penalty.
	(anddf, cmov patterns): Avoid reformating by first converting.
	(sse_cvtsd2ss): Fix predicate.
	(sse2_clrti): Fix mode,
	(sse_clrv4sf): Avoid unspec.

*** i386.c.old1	Sun Oct  6 02:18:27 2002
--- i386.c	Mon Oct  7 12:55:51 2002
*************** const int x86_epilogue_using_move = m_AT
*** 404,409 ****
--- 404,416 ----
  const int x86_decompose_lea = m_PENT4;
  const int x86_shift1 = ~m_486;
  const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
+ const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
+ /* Set for machines where the type and dependencies are resolved on SSE register
+    parts insetad of whole registers, so we may maintain just lower part of
+    scalar values in proper format leaving the upper part undefined.  */
+ const int x86_sse_partial_regs = m_ATHLON;
+ const int x86_sse_typeless_stores = m_ATHLON;
+ const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
  
  /* In case the avreage insn count for single function invocation is
     lower than this constant, emit fast (but longer) prologue and
*************** safe_vector_operand (x, mode)
*** 12583,12589 ****
  			      : gen_rtx_SUBREG (DImode, x, 0)));
    else
      emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
! 				: gen_rtx_SUBREG (V4SFmode, x, 0)));
    return x;
  }
  
--- 12590,12597 ----
  			      : gen_rtx_SUBREG (DImode, x, 0)));
    else
      emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
! 				: gen_rtx_SUBREG (V4SFmode, x, 0),
! 				CONST0_RTX (V4SFmode)));
    return x;
  }
  
*************** ix86_expand_builtin (exp, target, subtar
*** 13273,13279 ****
  
      case IX86_BUILTIN_SSE_ZERO:
        target = gen_reg_rtx (V4SFmode);
!       emit_insn (gen_sse_clrv4sf (target));
        return target;
  
      case IX86_BUILTIN_MMX_ZERO:
--- 13281,13287 ----
  
      case IX86_BUILTIN_SSE_ZERO:
        target = gen_reg_rtx (V4SFmode);
!       emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
        return target;
  
      case IX86_BUILTIN_MMX_ZERO:
*** i386.h.old1	Sun Oct  6 02:18:31 2002
--- i386.h	Mon Oct  7 01:58:04 2002
*************** extern const int x86_partial_reg_depende
*** 207,212 ****
--- 207,214 ----
  extern const int x86_accumulate_outgoing_args, x86_prologue_using_move;
  extern const int x86_epilogue_using_move, x86_decompose_lea;
  extern const int x86_arch_always_fancy_math_387, x86_shift1;
+ extern const int x86_sse_partial_reg_dependency, x86_sse_partial_regs;
+ extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor;
  extern int x86_prefetch_sse;
  
  #define TARGET_USE_LEAVE (x86_use_leave & CPUMASK)
*************** extern int x86_prefetch_sse;
*** 243,248 ****
--- 245,256 ----
  #define TARGET_SUB_ESP_8 (x86_sub_esp_8 & CPUMASK)
  #define TARGET_INTEGER_DFMODE_MOVES (x86_integer_DFmode_moves & CPUMASK)
  #define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & CPUMASK)
+ #define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
+ 				      (x86_sse_partial_reg_dependency & CPUMASK)
+ #define TARGET_SSE_PARTIAL_REGS (x86_sse_partial_regs & CPUMASK)
+ #define TARGET_SSE_TYPELESS_STORES (x86_sse_typeless_stores & CPUMASK)
+ #define TARGET_SSE_TYPELESS_LOAD0 (x86_sse_typeless_load0 & CPUMASK)
+ #define TARGET_SSE_LOAD0_BY_PXOR (x86_sse_load0_by_pxor & CPUMASK)
  #define TARGET_MEMORY_MISMATCH_STALL (x86_memory_mismatch_stall & CPUMASK)
  #define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & CPUMASK)
  #define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & CPUMASK)
*** i386.md.old	Sun Oct  6 22:52:23 2002
--- i386.md	Mon Oct  7 13:22:06 2002
***************
*** 2133,2144 ****
      case 4:
        return "mov{l}\t{%1, %0|%0, %1}";
      case 5:
!       if (TARGET_SSE2 && !TARGET_ATHLON)
  	return "pxor\t%0, %0";
        else
  	return "xorps\t%0, %0";
      case 6:
!       if (TARGET_PARTIAL_REG_DEPENDENCY)
  	return "movaps\t{%1, %0|%0, %1}";
        else
  	return "movss\t{%1, %0|%0, %1}";
--- 2133,2144 ----
      case 4:
        return "mov{l}\t{%1, %0|%0, %1}";
      case 5:
!       if (get_attr_mode (insn) == MODE_TI)
  	return "pxor\t%0, %0";
        else
  	return "xorps\t%0, %0";
      case 6:
!       if (get_attr_mode (insn) == MODE_V4SF)
  	return "movaps\t{%1, %0|%0, %1}";
        else
  	return "movss\t{%1, %0|%0, %1}";
***************
*** 2158,2164 ****
      }
  }
    [(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov")
!    (set_attr "mode" "SF,SF,SF,SI,SI,TI,SF,SF,SF,SI,SI,DI")])
  
  (define_insn "*swapsf"
    [(set (match_operand:SF 0 "register_operand" "+f")
--- 2158,2197 ----
      }
  }
    [(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov")
!    (set (attr "mode")
!         (cond [(eq_attr "alternative" "3,4,9,10")
! 		 (const_string "SI")
! 	       (eq_attr "alternative" "5")
! 		 (if_then_else
! 		   (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
! 			    	 (const_int 0))
! 			     (ne (symbol_ref "TARGET_SSE2")
! 				 (const_int 0)))
! 			(eq (symbol_ref "optimize_size")
! 			    (const_int 0)))
! 		   (const_string "TI")
! 		   (const_string "V4SF"))
! 	       /* For architectures resolving dependencies on
! 		  whole SSE registers use APS move to break dependency
! 		  chains, otherwise use short move to avoid extra work. 
! 
! 		  Do the same for architectures resolving dependencies on
! 		  the parts.  While in DF mode it is better to always handle
! 		  just register parts, the SF mode is different due to lack
! 		  of instructions to load just part of the register.  It is
! 		  better to maintain the whole registers in single format
! 		  to avoid problems on using packed logical operations.  */
! 	       (eq_attr "alternative" "6")
! 		 (if_then_else
! 		   (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
! 			    (const_int 0))
! 			(ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
! 			    (const_int 0)))
! 		   (const_string "V4SF")
! 		   (const_string "SF"))
! 	       (eq_attr "alternative" "11")
! 		 (const_string "DI")]
! 	       (const_string "SF")))])
  
  (define_insn "*swapsf"
    [(set (match_operand:SF 0 "register_operand" "+f")
***************
*** 2319,2343 ****
      case 4:
        return "#";
      case 5:
!       if (TARGET_ATHLON)
!         return "xorpd\t%0, %0";
!       else
!         return "pxor\t%0, %0";
      case 6:
!       if (TARGET_PARTIAL_REG_DEPENDENCY)
! 	return "movapd\t{%1, %0|%0, %1}";
        else
  	return "movsd\t{%1, %0|%0, %1}";
-     case 7:
      case 8:
!         return "movsd\t{%1, %0|%0, %1}";
  
      default:
        abort();
      }
  }
    [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
!    (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")])
  
  (define_insn "*movdf_integer"
    [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m")
--- 2352,2429 ----
      case 4:
        return "#";
      case 5:
!       switch (get_attr_mode (insn))
! 	{
! 	case MODE_V4SF:
! 	  return "xorps\t%0, %0";
! 	case MODE_V2DF:
! 	  return "xorpd\t%0, %0";
! 	case MODE_TI:
! 	  return "pxor\t%0, %0";
! 	default:
! 	  abort ();
! 	}
      case 6:
!       switch (get_attr_mode (insn))
! 	{
! 	case MODE_V4SF:
! 	  return "movaps\t{%1, %0|%0, %1}";
! 	case MODE_V2DF:
! 	  return "movapd\t{%1, %0|%0, %1}";
! 	case MODE_DF:
! 	  return "movsd\t{%1, %0|%0, %1}";
! 	default:
! 	  abort ();
! 	}
!     case 7:
!       if (get_attr_mode (insn) == MODE_V2DF)
! 	return "movlpd\t{%1, %0|%0, %1}";
        else
  	return "movsd\t{%1, %0|%0, %1}";
      case 8:
!       return "movsd\t{%1, %0|%0, %1}";
  
      default:
        abort();
      }
  }
    [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
!    (set (attr "mode")
!         (cond [(eq_attr "alternative" "3,4")
! 		 (const_string "SI")
! 	       /* xorps is one byte shorter.  */
! 	       (eq_attr "alternative" "5")
! 		 (cond [(ne (symbol_ref "optimize_size")
! 			    (const_int 0))
! 			  (const_string "V4SF")
! 			(ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
! 			    (const_int 0))
! 			  (const_string "TI")]
! 		       (const_string "V2DF"))
! 	       /* For architectures resolving dependencies on
! 		  whole SSE registers use APD move to break dependency
! 		  chains, otherwise use short move to avoid extra work.
! 
! 		  movaps encodes one byte shorter.  */
! 	       (eq_attr "alternative" "6")
! 		 (cond
! 		  [(ne (symbol_ref "optimize_size")
! 		       (const_int 0))
! 		     (const_string "V4SF")
! 		   (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
! 		       (const_int 0))
! 		     (const_string "V2DF")]
! 		   (const_string "DF"))
! 	       /* For achitectures resolving dependencies on register
! 		  parts we may avoid extra work to zero out upper part
! 		  of register.  */
! 	       (eq_attr "alternative" "7")
! 		 (if_then_else
! 		   (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
! 		       (const_int 0))
! 		   (const_string "V2DF")
! 		   (const_string "DF"))]
! 	       (const_string "DF")))])
  
  (define_insn "*movdf_integer"
    [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m")
***************
*** 2381,2396 ****
        return "#";
  
      case 5:
!       if (TARGET_ATHLON)
!         return "xorpd\t%0, %0";
!       else
!         return "pxor\t%0, %0";
      case 6:
!       if (TARGET_PARTIAL_REG_DEPENDENCY)
! 	return "movapd\t{%1, %0|%0, %1}";
        else
  	return "movsd\t{%1, %0|%0, %1}";
-     case 7:
      case 8:
        return "movsd\t{%1, %0|%0, %1}";
  
--- 2467,2500 ----
        return "#";
  
      case 5:
!       switch (get_attr_mode (insn))
! 	{
! 	case MODE_V4SF:
! 	  return "xorps\t%0, %0";
! 	case MODE_V2DF:
! 	  return "xorpd\t%0, %0";
! 	case MODE_TI:
! 	  return "pxor\t%0, %0";
! 	default:
! 	  abort ();
! 	}
      case 6:
!       switch (get_attr_mode (insn))
! 	{
! 	case MODE_V4SF:
! 	  return "movaps\t{%1, %0|%0, %1}";
! 	case MODE_V2DF:
! 	  return "movapd\t{%1, %0|%0, %1}";
! 	case MODE_DF:
! 	  return "movsd\t{%1, %0|%0, %1}";
! 	default:
! 	  abort ();
! 	}
!     case 7:
!       if (get_attr_mode (insn) == MODE_V2DF)
! 	return "movlpd\t{%1, %0|%0, %1}";
        else
  	return "movsd\t{%1, %0|%0, %1}";
      case 8:
        return "movsd\t{%1, %0|%0, %1}";
  
***************
*** 2399,2405 ****
      }
  }
    [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
!    (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")])
  
  (define_split
    [(set (match_operand:DF 0 "nonimmediate_operand" "")
--- 2503,2544 ----
      }
  }
    [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
!    (set (attr "mode")
!         (cond [(eq_attr "alternative" "3,4")
! 		 (const_string "SI")
! 	       /* xorps is one byte shorter.  */
! 	       (eq_attr "alternative" "5")
! 		 (cond [(ne (symbol_ref "optimize_size")
! 			    (const_int 0))
! 			  (const_string "V4SF")
! 			(ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
! 			    (const_int 0))
! 			  (const_string "TI")]
! 		       (const_string "V2DF"))
! 	       /* For architectures resolving dependencies on
! 		  whole SSE registers use APD move to break dependency
! 		  chains, otherwise use short move to avoid extra work.  
! 
! 		  movaps encodes one byte shorter.  */
! 	       (eq_attr "alternative" "6")
! 		 (cond
! 		  [(ne (symbol_ref "optimize_size")
! 		       (const_int 0))
! 		     (const_string "V4SF")
! 		   (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
! 		       (const_int 0))
! 		     (const_string "V2DF")]
! 		   (const_string "DF"))
! 	       /* For achitectures resolving dependencies on register
! 		  parts we may avoid extra work to zero out upper part
! 		  of register.  */
! 	       (eq_attr "alternative" "7")
! 		 (if_then_else
! 		   (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
! 		       (const_int 0))
! 		   (const_string "V2DF")
! 		   (const_string "DF"))]
! 	       (const_string "DF")))])
  
  (define_split
    [(set (match_operand:DF 0 "nonimmediate_operand" "")
***************
*** 3706,3712 ****
  	(float_truncate:SF
  	 (match_operand:DF 1 "nonimmediate_operand" "f,f,f,f,mY")))
     (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))]
!   "TARGET_80387 && TARGET_SSE2"
  {
    switch (which_alternative)
      {
--- 3845,3851 ----
  	(float_truncate:SF
  	 (match_operand:DF 1 "nonimmediate_operand" "f,f,f,f,mY")))
     (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))]
!   "TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS"
  {
    switch (which_alternative)
      {
***************
*** 3716,3722 ****
        else
  	return "fst%z0\t%y0";
      case 4:
!       return "cvtsd2ss\t{%1, %0|%0, %1}";
      default:
        abort ();
      }
--- 3855,3884 ----
        else
  	return "fst%z0\t%y0";
      case 4:
!       return "#";
!     default:
!       abort ();
!     }
! }
!   [(set_attr "type" "fmov,multi,multi,multi,ssecvt")
!    (set_attr "mode" "SF,SF,SF,SF,DF")])
! 
! (define_insn "*truncdfsf2_1_sse_nooverlap"
!   [(set (match_operand:SF 0 "nonimmediate_operand" "=*!m,?f#rx,?r#fx,?x#rf,&Y")
! 	(float_truncate:SF
! 	 (match_operand:DF 1 "nonimmediate_operand" "f,f,f,f,mY")))
!    (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))]
!   "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS"
! {
!   switch (which_alternative)
!     {
!     case 0:
!       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
! 	return "fstp%z0\t%y0";
!       else
! 	return "fst%z0\t%y0";
!     case 4:
!       return "#";
      default:
        abort ();
      }
***************
*** 3728,3734 ****
    [(set (match_operand:SF 0 "nonimmediate_operand" "=Y,!m")
  	(float_truncate:SF
  	 (match_operand:DF 1 "nonimmediate_operand" "mY,f")))]
!   "TARGET_80387 && TARGET_SSE2
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
  {
    switch (which_alternative)
--- 3890,3896 ----
    [(set (match_operand:SF 0 "nonimmediate_operand" "=Y,!m")
  	(float_truncate:SF
  	 (match_operand:DF 1 "nonimmediate_operand" "mY,f")))]
!   "TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
  {
    switch (which_alternative)
***************
*** 3747,3753 ****
    [(set_attr "type" "ssecvt,fmov")
     (set_attr "mode" "DF,SF")])
  
! (define_insn "truncdfsf2_3"
    [(set (match_operand:SF 0 "memory_operand" "=m")
  	(float_truncate:SF
  	 (match_operand:DF 1 "register_operand" "f")))]
--- 3909,3938 ----
    [(set_attr "type" "ssecvt,fmov")
     (set_attr "mode" "DF,SF")])
  
! (define_insn "*truncdfsf2_2_nooverlap"
!   [(set (match_operand:SF 0 "nonimmediate_operand" "=&Y,!m")
! 	(float_truncate:SF
! 	 (match_operand:DF 1 "nonimmediate_operand" "mY,f")))]
!   "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS
!    && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
! {
!   switch (which_alternative)
!     {
!     case 0:
!       return "#";
!     case 1:
!       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
! 	return "fstp%z0\t%y0";
!       else
! 	return "fst%z0\t%y0";
!     default:
!       abort ();
!     }
! }
!   [(set_attr "type" "ssecvt,fmov")
!    (set_attr "mode" "DF,SF")])
! 
! (define_insn "*truncdfsf2_3"
    [(set (match_operand:SF 0 "memory_operand" "=m")
  	(float_truncate:SF
  	 (match_operand:DF 1 "register_operand" "f")))]
***************
*** 3765,3775 ****
    [(set (match_operand:SF 0 "register_operand" "=Y")
  	(float_truncate:SF
  	 (match_operand:DF 1 "nonimmediate_operand" "mY")))]
!   "!TARGET_80387 && TARGET_SSE2"
    "cvtsd2ss\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssecvt")
     (set_attr "mode" "DF")])
  
  (define_split
    [(set (match_operand:SF 0 "memory_operand" "")
  	(float_truncate:SF
--- 3950,3969 ----
    [(set (match_operand:SF 0 "register_operand" "=Y")
  	(float_truncate:SF
  	 (match_operand:DF 1 "nonimmediate_operand" "mY")))]
!   "!TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS"
    "cvtsd2ss\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssecvt")
     (set_attr "mode" "DF")])
  
+ (define_insn "*truncdfsf2_sse_only_nooverlap"
+   [(set (match_operand:SF 0 "register_operand" "=&Y")
+ 	(float_truncate:SF
+ 	 (match_operand:DF 1 "nonimmediate_operand" "mY")))]
+   "!TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS"
+   "#"
+   [(set_attr "type" "ssecvt")
+    (set_attr "mode" "DF")])
+ 
  (define_split
    [(set (match_operand:SF 0 "memory_operand" "")
  	(float_truncate:SF
***************
*** 3779,3793 ****
    [(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
    "")
  
  (define_split
!   [(set (match_operand:SF 0 "nonimmediate_operand" "")
  	(float_truncate:SF
  	 (match_operand:DF 1 "nonimmediate_operand" "")))
     (clobber (match_operand 2 "" ""))]
    "TARGET_80387 && reload_completed
!    && !FP_REG_P (operands[0]) && !FP_REG_P (operands[1])"
!   [(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
!   "")
  
  (define_split
    [(set (match_operand:SF 0 "register_operand" "")
--- 3973,4027 ----
    [(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
    "")
  
+ ; Avoid possible reformating penalty on the destination by first
+ ; zeroing it out
  (define_split
!   [(set (match_operand:SF 0 "register_operand" "")
  	(float_truncate:SF
  	 (match_operand:DF 1 "nonimmediate_operand" "")))
     (clobber (match_operand 2 "" ""))]
    "TARGET_80387 && reload_completed
!    && SSE_REG_P (operands[0])"
!   [(const_int 0)]
! {
!   rtx src, dest;
!   if (!TARGET_SSE_PARTIAL_REGS)
!     emit_insn (gen_truncdfsf2_sse_only (operands[0], operands[1]));
!   else
!     {
!       dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
!       src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
!       /* simplify_gen_subreg refuses to widen memory references.  */
!       if (GET_CODE (src) == SUBREG)
! 	alter_subreg (&src);
!       if (reg_overlap_mentioned_p (operands[0], operands[1]))
! 	abort ();
!       emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
!       emit_insn (gen_cvtsd2ss (dest, dest, src));
!     }
!   DONE;
! })
! 
! (define_split
!   [(set (match_operand:SF 0 "register_operand" "")
! 	(float_truncate:SF
! 	 (match_operand:DF 1 "nonimmediate_operand" "")))]
!   "TARGET_80387 && reload_completed
!    && SSE_REG_P (operands[0]) && TARGET_SSE_PARTIAL_REGS"
!   [(const_int 0)]
! {
!   rtx src, dest;
!   dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
!   src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
!   /* simplify_gen_subreg refuses to widen memory references.  */
!   if (GET_CODE (src) == SUBREG)
!     alter_subreg (&src);
!   if (reg_overlap_mentioned_p (operands[0], operands[1]))
!     abort ();
!   emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
!   emit_insn (gen_cvtsd2ss (dest, dest, src));
!   DONE;
! })
  
  (define_split
    [(set (match_operand:SF 0 "register_operand" "")
***************
*** 4491,4496 ****
--- 4725,4746 ----
     (set_attr "mode" "SF")
     (set_attr "fp_int_src" "true")])
  
+ ; Avoid possible reformating penalty on the destination by first
+ ; zeroing it out
+ (define_split
+   [(set (match_operand:SF 0 "register_operand" "")
+ 	(float:SF (match_operand:SI 1 "nonimmediate_operand" "")))]
+   "TARGET_80387 && reload_completed
+    && SSE_REG_P (operands[0]) && TARGET_SSE_PARTIAL_REGS"
+   [(const_int 0)]
+ {
+   rtx dest;
+   dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+   emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
+   emit_insn (gen_cvtsi2ss (dest, dest, operands[1]));
+   DONE;
+ })
+ 
  (define_expand "floatdisf2"
    [(set (match_operand:SF 0 "register_operand" "")
  	(float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
***************
*** 4529,4534 ****
--- 4779,4800 ----
     (set_attr "mode" "SF")
     (set_attr "fp_int_src" "true")])
  
+ ; Avoid possible reformating penalty on the destination by first
+ ; zeroing it out
+ (define_split
+   [(set (match_operand:SF 0 "register_operand" "")
+ 	(float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
+   "TARGET_80387 && reload_completed
+    && SSE_REG_P (operands[0]) && TARGET_SSE_PARTIAL_REGS"
+   [(const_int 0)]
+ {
+   rtx dest;
+   dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+   emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
+   emit_insn (gen_cvtsi2ssq (dest, dest, operands[1]));
+   DONE;
+ })
+ 
  (define_insn "floathidf2"
    [(set (match_operand:DF 0 "register_operand" "=f,f")
  	(float:DF (match_operand:HI 1 "nonimmediate_operand" "m,r")))]
***************
*** 9492,9497 ****
--- 9758,9771 ----
  	(xor:TI (subreg:TI (match_dup 1) 0)
  		(subreg:TI (match_dup 2) 0)))]
  {
+   /* Avoid possible reformating on the operands.  */
+   if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
+     {
+       rtx op = gen_rtx_SUBREG (V2DFmode, operands[1], 0);
+       emit_insn (gen_sse2_unpcklpd (op, op, op));
+       op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
+       emit_insn (gen_sse2_unpcklpd (op, op, op));
+     }
    if (operands_match_p (operands[0], operands[2]))
      {
        rtx tmp;
***************
*** 9925,9931 ****
    "reload_completed && SSE_REG_P (operands[0])"
    [(set (subreg:TI (match_dup 0) 0)
  	(and:TI (not:TI (subreg:TI (match_dup 2) 0))
! 		(subreg:TI (match_dup 1) 0)))])
  
  
  ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
--- 10199,10215 ----
    "reload_completed && SSE_REG_P (operands[0])"
    [(set (subreg:TI (match_dup 0) 0)
  	(and:TI (not:TI (subreg:TI (match_dup 2) 0))
! 		(subreg:TI (match_dup 1) 0)))]
! {
!   /* Avoid possible reformating on the operands.  */
!   if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
!     {
!       rtx op = gen_rtx_SUBREG (V2DFmode, operands[1], 0);
!       emit_insn (gen_sse2_unpcklpd (op, op, op));
!       op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
!       emit_insn (gen_sse2_unpcklpd (op, op, op));
!     }
! })
  
  
  ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
***************
*** 16596,16601 ****
--- 16880,16893 ----
     (set (subreg:TI (match_dup 0) 0) (ior:TI (subreg:TI (match_dup 6) 0)
  					    (subreg:TI (match_dup 7) 0)))]
  {
+   if (GET_MODE (operands[2]) == DFmode
+       && TARGET_SSE_PARTIAL_REGS && !optimize_size)
+     {
+       rtx op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
+       emit_insn (gen_sse2_unpcklpd (op, op, op));
+       op = gen_rtx_SUBREG (V2DFmode, operands[3], 0);
+       emit_insn (gen_sse2_unpcklpd (op, op, op));
+     }
    /* If op2 == op3, op3 will be clobbered before it is used.
       This should be optimized out though.  */
    if (operands_match_p (operands[2], operands[3]))
***************
*** 16704,16709 ****
--- 16996,17015 ----
     (set (subreg:TI (match_dup 0) 0) (and:TI (match_dup 6)
  					    (subreg:TI (match_dup 7) 0)))]
  {
+   if (TARGET_SSE_PARTIAL_REGS && !optimize_size
+       && GET_MODE (operands[2]) == DFmode)
+     {
+       if (REG_P (operands[2]))
+ 	{
+ 	  rtx op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
+ 	  emit_insn (gen_sse2_unpcklpd (op, op, op));
+ 	}
+       if (REG_P (operands[3]))
+ 	{
+ 	  rtx op = gen_rtx_SUBREG (V2DFmode, operands[3], 0);
+ 	  emit_insn (gen_sse2_unpcklpd (op, op, op));
+ 	}
+     }
    PUT_MODE (operands[1], GET_MODE (operands[0]));
    if (!sse_comparison_operator (operands[1], VOIDmode))
      {
***************
*** 17810,17816 ****
    [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
  	(match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))]
    "TARGET_SSE"
-   ;; @@@ let's try to use movaps here.
    "movaps\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssemov")
     (set_attr "mode" "V4SF")])
--- 18116,18121 ----
***************
*** 17819,17828 ****
    [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
  	(match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
    "TARGET_SSE"
!   ;; @@@ let's try to use movaps here.
!   "movaps\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssemov")
!    (set_attr "mode" "V4SF")])
  
  (define_insn "movv8qi_internal"
    [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
--- 18124,18152 ----
    [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
  	(match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
    "TARGET_SSE"
! {
!   if (get_attr_mode (insn) == MODE_V4SF)
!     return "movaps\t{%1, %0|%0, %1}";
!   else
!     return "movdqa\t{%1, %0|%0, %1}";
! }
    [(set_attr "type" "ssemov")
!    (set (attr "mode")
!         (cond [(eq_attr "alternative" "0")
! 		 (if_then_else
! 		   (ne (symbol_ref "optimize_size")
! 		       (const_int 0))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))
! 	       (eq_attr "alternative" "1")
! 		 (if_then_else
! 		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
! 			    (const_int 0))
! 			(ne (symbol_ref "optimize_size")
! 			    (const_int 0)))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))]
! 	       (const_string "TI")))])
  
  (define_insn "movv8qi_internal"
    [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
***************
*** 17872,17899 ****
    [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
  	(match_operand:V2DF 1 "general_operand" "xm,x"))]
    "TARGET_SSE2"
!   ;; @@@ let's try to use movaps here.
!   "movapd\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssemov")
!    (set_attr "mode" "V2DF")])
  
  (define_insn "movv8hi_internal"
    [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
  	(match_operand:V8HI 1 "general_operand" "xm,x"))]
    "TARGET_SSE2"
!   ;; @@@ let's try to use movaps here.
!   "movaps\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssemov")
!    (set_attr "mode" "V4SF")])
  
  (define_insn "movv16qi_internal"
    [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
  	(match_operand:V16QI 1 "general_operand" "xm,x"))]
    "TARGET_SSE2"
!   ;; @@@ let's try to use movaps here.
!   "movaps\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssemov")
!    (set_attr "mode" "V4SF")])
  
  (define_expand "movv2df"
    [(set (match_operand:V2DF 0 "general_operand" "")
--- 18196,18280 ----
    [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
  	(match_operand:V2DF 1 "general_operand" "xm,x"))]
    "TARGET_SSE2"
! {
!   if (get_attr_mode (insn) == MODE_V4SF)
!     return "movaps\t{%1, %0|%0, %1}";
!   else
!     return "movapd\t{%1, %0|%0, %1}";
! }
    [(set_attr "type" "ssemov")
!    (set (attr "mode")
!         (cond [(eq_attr "alternative" "0")
! 		 (if_then_else
! 		   (ne (symbol_ref "optimize_size")
! 		       (const_int 0))
! 		   (const_string "V4SF")
! 		   (const_string "V2DF"))
! 	       (eq_attr "alternative" "1")
! 		 (if_then_else
! 		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
! 			    (const_int 0))
! 			(ne (symbol_ref "optimize_size")
! 			    (const_int 0)))
! 		   (const_string "V4SF")
! 		   (const_string "V2DF"))]
! 	       (const_string "V2DF")))])
  
  (define_insn "movv8hi_internal"
    [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
  	(match_operand:V8HI 1 "general_operand" "xm,x"))]
    "TARGET_SSE2"
! {
!   if (get_attr_mode (insn) == MODE_V4SF)
!     return "movaps\t{%1, %0|%0, %1}";
!   else
!     return "movdqa\t{%1, %0|%0, %1}";
! }
    [(set_attr "type" "ssemov")
!    (set (attr "mode")
!         (cond [(eq_attr "alternative" "0")
! 		 (if_then_else
! 		   (ne (symbol_ref "optimize_size")
! 		       (const_int 0))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))
! 	       (eq_attr "alternative" "1")
! 		 (if_then_else
! 		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
! 			    (const_int 0))
! 			(ne (symbol_ref "optimize_size")
! 			    (const_int 0)))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))]
! 	       (const_string "TI")))])
  
  (define_insn "movv16qi_internal"
    [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
  	(match_operand:V16QI 1 "general_operand" "xm,x"))]
    "TARGET_SSE2"
! {
!   if (get_attr_mode (insn) == MODE_V4SF)
!     return "movaps\t{%1, %0|%0, %1}";
!   else
!     return "movdqa\t{%1, %0|%0, %1}";
! }
    [(set_attr "type" "ssemov")
!    (set (attr "mode")
!         (cond [(eq_attr "alternative" "0")
! 		 (if_then_else
! 		   (ne (symbol_ref "optimize_size")
! 		       (const_int 0))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))
! 	       (eq_attr "alternative" "1")
! 		 (if_then_else
! 		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
! 			    (const_int 0))
! 			(ne (symbol_ref "optimize_size")
! 			    (const_int 0)))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))]
! 	       (const_string "TI")))])
  
  (define_expand "movv2df"
    [(set (match_operand:V2DF 0 "general_operand" "")
***************
*** 18090,18115 ****
    [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
  	(match_operand:TI 1 "general_operand" "O,xm,x"))]
    "TARGET_SSE && !TARGET_64BIT"
!   "@
!    xorps\t%0, %0
!    movaps\t{%1, %0|%0, %1}
!    movaps\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssemov,ssemov,ssemov")
!    (set_attr "mode" "V4SF")])
  
  (define_insn "*movti_rex64"
    [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x")
  	(match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))]
    "TARGET_64BIT
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
!   "@
!    #
!    #
!    xorps\t%0, %0
!    movaps\\t{%1, %0|%0, %1}
!    movaps\\t{%1, %0|%0, %1}"
    [(set_attr "type" "*,*,ssemov,ssemov,ssemov")
!    (set_attr "mode" "V4SF")])
  
  (define_split
    [(set (match_operand:TI 0 "nonimmediate_operand" "")
--- 18471,18553 ----
    [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
  	(match_operand:TI 1 "general_operand" "O,xm,x"))]
    "TARGET_SSE && !TARGET_64BIT"
! {
!   switch (which_alternative)
!     {
!     case 0:
!       if (get_attr_mode (insn) == MODE_V4SF)
! 	return "xorps\t%0, %0";
!       else
! 	return "pxor\t%0, %0";
!     case 1:
!     case 2:
!       if (get_attr_mode (insn) == MODE_V4SF)
! 	return "movaps\t{%1, %0|%0, %1}";
!       else
! 	return "movdqa\t{%1, %0|%0, %1}";
!     default:
!       abort ();
!     }
! }
    [(set_attr "type" "ssemov,ssemov,ssemov")
!    (set (attr "mode")
!         (cond [(eq_attr "alternative" "0,1")
! 		 (if_then_else
! 		   (ne (symbol_ref "optimize_size")
! 		       (const_int 0))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))
! 	       (eq_attr "alternative" "2")
! 		 (if_then_else
! 		   (ne (symbol_ref "optimize_size")
! 		       (const_int 0))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))]
! 	       (const_string "TI")))])
  
  (define_insn "*movti_rex64"
    [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x")
  	(match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))]
    "TARGET_64BIT
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
! {
!   switch (which_alternative)
!     {
!     case 0:
!     case 1:
!       return "#";
!     case 2:
!       if (get_attr_mode (insn) == MODE_V4SF)
! 	return "xorps\t%0, %0";
!       else
! 	return "pxor\t%0, %0";
!     case 3:
!     case 4:
!       if (get_attr_mode (insn) == MODE_V4SF)
! 	return "movaps\t{%1, %0|%0, %1}";
!       else
! 	return "movdqa\t{%1, %0|%0, %1}";
!     default:
!       abort ();
!     }
! }
    [(set_attr "type" "*,*,ssemov,ssemov,ssemov")
!    (set (attr "mode")
!         (cond [(eq_attr "alternative" "2,3")
! 		 (if_then_else
! 		   (ne (symbol_ref "optimize_size")
! 		       (const_int 0))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))
! 	       (eq_attr "alternative" "4")
! 		 (if_then_else
! 		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
! 			    (const_int 0))
! 			(ne (symbol_ref "optimize_size")
! 			    (const_int 0)))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))]
! 	       (const_string "DI")))])
  
  (define_split
    [(set (match_operand:TI 0 "nonimmediate_operand" "")
***************
*** 18709,18720 ****
  ;; this insn.
  (define_insn "sse_clrv4sf"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
!         (unspec:V4SF [(const_int 0)] UNSPEC_NOP))]
    "TARGET_SSE"
!   "xorps\t{%0, %0|%0, %0}"
    [(set_attr "type" "sselog")
     (set_attr "memory" "none")
!    (set_attr "mode" "V4SF")])
  
  ;; SSE mask-generating compares
  
--- 19147,19172 ----
  ;; this insn.
  (define_insn "sse_clrv4sf"
    [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(match_operand:V4SF 1 "const0_operand" "X"))]
    "TARGET_SSE"
! {
!   if (get_attr_mode (insn) == MODE_TI)
!     return "pxor\t{%0, %0|%0, %0}";
!   else
!     return "xorps\t{%0, %0|%0, %0}";
! }
    [(set_attr "type" "sselog")
     (set_attr "memory" "none")
!    (set (attr "mode")
! 	(if_then_else
! 	   (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
! 			 (const_int 0))
! 		     (ne (symbol_ref "TARGET_SSE2")
! 			 (const_int 0)))
! 		(eq (symbol_ref "optimize_size")
! 		    (const_int 0)))
! 	 (const_string "TI")
! 	 (const_string "V4SF")))])
  
  ;; SSE mask-generating compares
  
***************
*** 18938,18943 ****
--- 19390,19407 ----
    [(set_attr "type" "ssecvt")
     (set_attr "mode" "SF")])
  
+ (define_insn "cvtsi2ssq"
+   [(set (match_operand:V4SF 0 "register_operand" "=x")
+ 	(vec_merge:V4SF
+ 	 (match_operand:V4SF 1 "register_operand" "0")
+ 	 (vec_duplicate:V4SF
+ 	  (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ 	 (const_int 14)))]
+   "TARGET_SSE && TARGET_64BIT"
+   "cvtsi2ssq\t{%2, %0|%0, %2}"
+   [(set_attr "type" "ssecvt")
+    (set_attr "mode" "SF")])
+ 
  (define_insn "cvtss2si"
    [(set (match_operand:SI 0 "register_operand" "=r")
  	(vec_select:SI
***************
*** 20586,20592 ****
  	(vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
  	 		(vec_duplicate:V4SF
  			  (float_truncate:V2SF
! 			    (match_operand:V2DF 2 "register_operand" "xm")))
  			(const_int 14)))]
    "TARGET_SSE2"
    "cvtsd2ss\t{%2, %0|%0, %2}"
--- 21050,21056 ----
  	(vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
  	 		(vec_duplicate:V4SF
  			  (float_truncate:V2SF
! 			    (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
  			(const_int 14)))]
    "TARGET_SSE2"
    "cvtsd2ss\t{%2, %0|%0, %2}"
***************
*** 20598,20604 ****
  	(vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0")
  	 		(float_extend:V2DF
  			  (vec_select:V2SF
! 			    (match_operand:V4SF 2 "register_operand" "xm")
  			    (parallel [(const_int 0)
  				       (const_int 1)])))
  			(const_int 2)))]
--- 21062,21068 ----
  	(vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0")
  	 		(float_extend:V2DF
  			  (vec_select:V2SF
! 			    (match_operand:V4SF 2 "nonimmediate_operand" "xm")
  			    (parallel [(const_int 0)
  				       (const_int 1)])))
  			(const_int 2)))]
***************
*** 20874,20883 ****
  (define_insn "sse2_clrti"
    [(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))]
    "TARGET_SSE2"
!   "pxor\t{%0, %0|%0, %0}"
!   [(set_attr "type" "sseiadd")
     (set_attr "memory" "none")
!    (set_attr "mode" "TI")])
  
  ;; MMX unsigned averages/sum of absolute differences
  
--- 21338,21357 ----
  (define_insn "sse2_clrti"
    [(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))]
    "TARGET_SSE2"
! {
!   if (get_attr_mode (insn) == MODE_TI)
!     return "pxor\t%0, %0";
!   else
!     return "xorps\t%0, %0";
! }
!   [(set_attr "type" "ssemov")
     (set_attr "memory" "none")
!    (set (attr "mode")
! 	      (if_then_else
! 		(ne (symbol_ref "optimize_size")
! 		    (const_int 0))
! 		(const_string "V4SF")
! 		(const_string "TI")))])
  
  ;; MMX unsigned averages/sum of absolute differences
  



More information about the Gcc-patches mailing list