This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[3.4-bib] SSE moves patch


Hi,
this patch should resolve somewhat complicated situation concerning SSE
move instructions that we were handling correctly only for P4 and wasted
the perofmrance considerably for other targets.

Regtested/bootstrapped on athlon without SSE, hope Andreas will
benchmark it and thus the actual SSE code will get some testing.

OK for BIB branch?

Honza

Sun Oct  6 22:53:18 CEST 2002  Jan Hubicka  <jh@suse.cz>
	* i386.c (x86_sse_partial_reg_dependency, x86_sse_partial_regs,
	x86_sse_typeless_stores, x86_sse_typeless_load0): New global
	variables.
	* i386.h (x86_sse_partial_reg_dependency, x86_sse_partial_regs,
	x86_sse_typeless_stores, x86_sse_typeless_load0): Declare.
	(TARGET_SSE_PARTIAL_REG_DEPENDENCY, TARGET_SSE_PARTIAL_REGS,
	TARGET_SSE_TYPELESS_STORES, TARGET_SSE_TYPELESS_LOAD0): New
	macros.
	* i386.md (movsf*, movdf*, movti, movv4sf, movv2df, movv16qi, movv8hi,
	movv4si):  Obey the new flags.
	(floatsi2sf, floatdi2sf, truncatedf2sf): Emit extra load of 0 to avoid
	reformating penalty.
	(anddf, cmov patterns): Avoid reformating by first converting.
	(sse_cvtsd2ss): Fix predicate.
	(sse2_clrti): Fix mode,
	(sse2_clrv4sf): New.
	
*** i386.c.old1	Sun Oct  6 02:18:27 2002
--- i386.c	Sun Oct  6 20:30:18 2002
*************** const int x86_epilogue_using_move = m_AT
*** 404,409 ****
--- 404,416 ----
  const int x86_decompose_lea = m_PENT4;
  const int x86_shift1 = ~m_486;
  const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
+ const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
+ /* Set for machines where the type and dependencies are resolved on SSE register
+    parts insetad of whole registers, so we may maintain just lower part of
+    scalar values in proper format leaving the upper part undefined.  */
+ const int x86_sse_partial_regs = m_ATHLON;
+ const int x86_sse_typeless_stores = m_ATHLON;
+ const int x86_sse_typeless_load0 = m_PPRO | m_PENT4;
  
  /* In case the avreage insn count for single function invocation is
     lower than this constant, emit fast (but longer) prologue and
*** i386.h.old1	Sun Oct  6 02:18:31 2002
--- i386.h	Sun Oct  6 20:11:13 2002
*************** extern const int x86_partial_reg_depende
*** 207,212 ****
--- 207,214 ----
  extern const int x86_accumulate_outgoing_args, x86_prologue_using_move;
  extern const int x86_epilogue_using_move, x86_decompose_lea;
  extern const int x86_arch_always_fancy_math_387, x86_shift1;
+ extern const int x86_sse_partial_reg_dependency, x86_sse_partial_regs;
+ extern const int x86_sse_typeless_stores, x86_sse_typeless_load0;
  extern int x86_prefetch_sse;
  
  #define TARGET_USE_LEAVE (x86_use_leave & CPUMASK)
*************** extern int x86_prefetch_sse;
*** 243,248 ****
--- 245,255 ----
  #define TARGET_SUB_ESP_8 (x86_sub_esp_8 & CPUMASK)
  #define TARGET_INTEGER_DFMODE_MOVES (x86_integer_DFmode_moves & CPUMASK)
  #define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & CPUMASK)
+ #define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
+ 				      (x86_sse_partial_reg_dependency & CPUMASK)
+ #define TARGET_SSE_PARTIAL_REGS (x86_sse_partial_regs & CPUMASK)
+ #define TARGET_SSE_TYPELESS_STORES (x86_sse_typeless_stores & CPUMASK)
+ #define TARGET_SSE_TYPELESS_LOAD0 (x86_sse_typeless_load0 & CPUMASK)
  #define TARGET_MEMORY_MISMATCH_STALL (x86_memory_mismatch_stall & CPUMASK)
  #define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & CPUMASK)
  #define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & CPUMASK)
*** i386.md.old	Sun Oct  6 22:52:23 2002
--- i386.md	Sun Oct  6 22:56:16 2002
***************
*** 2133,2144 ****
      case 4:
        return "mov{l}\t{%1, %0|%0, %1}";
      case 5:
!       if (TARGET_SSE2 && !TARGET_ATHLON)
! 	return "pxor\t%0, %0";
!       else
! 	return "xorps\t%0, %0";
      case 6:
!       if (TARGET_PARTIAL_REG_DEPENDENCY)
  	return "movaps\t{%1, %0|%0, %1}";
        else
  	return "movss\t{%1, %0|%0, %1}";
--- 2133,2141 ----
      case 4:
        return "mov{l}\t{%1, %0|%0, %1}";
      case 5:
!       return "xorps\t%0, %0";
      case 6:
!       if (get_attr_mode (insn) == MODE_V4SF)
  	return "movaps\t{%1, %0|%0, %1}";
        else
  	return "movss\t{%1, %0|%0, %1}";
***************
*** 2158,2164 ****
      }
  }
    [(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov")
!    (set_attr "mode" "SF,SF,SF,SI,SI,TI,SF,SF,SF,SI,SI,DI")])
  
  (define_insn "*swapsf"
    [(set (match_operand:SF 0 "register_operand" "+f")
--- 2155,2186 ----
      }
  }
    [(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov")
!    (set (attr "mode")
!         (cond [(eq_attr "alternative" "3,4,9,10")
! 		 (const_string "SI")
! 	       (eq_attr "alternative" "5")
! 		 (const_string "V4SF")
! 	       /* For architectures resolving dependencies on
! 		  whole SSE registers use APS move to break dependency
! 		  chains, otherwise use short move to avoid extra work. 
! 
! 		  Do the same for architectures resolving dependencies on
! 		  the parts.  While in DF mode it is better to always handle
! 		  just register parts, the SF mode is different due to lack
! 		  of instructions to load just part of the register.  It is
! 		  better to maintain the whole registers in single format
! 		  to avoid problems on using packed logical operations.  */
! 	       (eq_attr "alternative" "6")
! 		 (if_then_else
! 		   (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
! 			    (const_int 0))
! 			(ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
! 			    (const_int 0)))
! 		   (const_string "V4SF")
! 		   (const_string "SF"))
! 	       (eq_attr "alternative" "11")
! 		 (const_string "DI")]
! 	       (const_string "SF")))])
  
  (define_insn "*swapsf"
    [(set (match_operand:SF 0 "register_operand" "+f")
***************
*** 2319,2343 ****
      case 4:
        return "#";
      case 5:
!       if (TARGET_ATHLON)
!         return "xorpd\t%0, %0";
        else
!         return "pxor\t%0, %0";
      case 6:
!       if (TARGET_PARTIAL_REG_DEPENDENCY)
  	return "movapd\t{%1, %0|%0, %1}";
        else
  	return "movsd\t{%1, %0|%0, %1}";
      case 7:
      case 8:
!         return "movsd\t{%1, %0|%0, %1}";
  
      default:
        abort();
      }
  }
    [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
!    (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")])
  
  (define_insn "*movdf_integer"
    [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m")
--- 2341,2406 ----
      case 4:
        return "#";
      case 5:
!       if (get_attr_mode (insn) == MODE_V4SF)
!         return "xorps\t%0, %0";
        else
!         return "xorpd\t%0, %0";
      case 6:
!       if (get_attr_mode (insn) == MODE_V2DF)
  	return "movapd\t{%1, %0|%0, %1}";
+       else if (get_attr_mode (insn) == MODE_V4SF)
+ 	return "movaps\t{%1, %0|%0, %1}";
        else
  	return "movsd\t{%1, %0|%0, %1}";
      case 7:
+       if (get_attr_mode (insn) == MODE_V2DF)
+ 	return "movlpd\t{%1, %0|%0, %1}";
+       else
+ 	return "movsd\t{%1, %0|%0, %1}";
      case 8:
!       return "movsd\t{%1, %0|%0, %1}";
  
      default:
        abort();
      }
  }
    [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
!    (set (attr "mode")
!         (cond [(eq_attr "alternative" "3,4")
! 		 (const_string "SI")
! 	       /* xorps is one byte shorter.  */
! 	       (eq_attr "alternative" "5")
! 		 (if_then_else
! 		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_LOAD0")
! 			    (const_int 0))
! 			(ne (symbol_ref "optimize_size")
! 			    (const_int 0)))
! 		   (const_string "V4SF")
! 		   (const_string "V2DF"))
! 	       /* For architectures resolving dependencies on
! 		  whole SSE registers use APD move to break dependency
! 		  chains, otherwise use short move to avoid extra work.
! 
! 		  movaps encodes one byte shorter.  */
! 	       (eq_attr "alternative" "6")
! 		 (cond
! 		  [(ne (symbol_ref "optimize_size")
! 		       (const_int 0))
! 		     (const_string "V4SF")
! 		   (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
! 		       (const_int 0))
! 		     (const_string "V2DF")]
! 		   (const_string "DF"))
! 	       /* For achitectures resolving dependencies on register
! 		  parts we may avoid extra work to zero out upper part
! 		  of register.  */
! 	       (eq_attr "alternative" "7")
! 		 (if_then_else
! 		   (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
! 		       (const_int 0))
! 		   (const_string "V2DF")
! 		   (const_string "DF"))]
! 	       (const_string "DF")))])
  
  (define_insn "*movdf_integer"
    [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m")
***************
*** 2381,2396 ****
        return "#";
  
      case 5:
!       if (TARGET_ATHLON)
!         return "xorpd\t%0, %0";
        else
!         return "pxor\t%0, %0";
      case 6:
!       if (TARGET_PARTIAL_REG_DEPENDENCY)
  	return "movapd\t{%1, %0|%0, %1}";
        else
  	return "movsd\t{%1, %0|%0, %1}";
      case 7:
      case 8:
        return "movsd\t{%1, %0|%0, %1}";
  
--- 2444,2465 ----
        return "#";
  
      case 5:
!       if (get_attr_mode (insn) == MODE_V4SF)
!         return "xorps\t%0, %0";
        else
!         return "xorpd\t%0, %0";
      case 6:
!       if (get_attr_mode (insn) == MODE_V2DF)
  	return "movapd\t{%1, %0|%0, %1}";
+       else if (get_attr_mode (insn) == MODE_V4SF)
+ 	return "movaps\t{%1, %0|%0, %1}";
        else
  	return "movsd\t{%1, %0|%0, %1}";
      case 7:
+       if (get_attr_mode (insn) == MODE_V2DF)
+ 	return "movlpd\t{%1, %0|%0, %1}";
+       else
+ 	return "movsd\t{%1, %0|%0, %1}";
      case 8:
        return "movsd\t{%1, %0|%0, %1}";
  
***************
*** 2399,2405 ****
      }
  }
    [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
!    (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")])
  
  (define_split
    [(set (match_operand:DF 0 "nonimmediate_operand" "")
--- 2468,2509 ----
      }
  }
    [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
!    (set (attr "mode")
!         (cond [(eq_attr "alternative" "3,4")
! 		 (const_string "SI")
! 	       /* xorps is one byte shorter.  */
! 	       (eq_attr "alternative" "5")
! 		 (if_then_else
! 		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_LOAD0")
! 			    (const_int 0))
! 			(ne (symbol_ref "optimize_size")
! 			    (const_int 0)))
! 		   (const_string "V4SF")
! 		   (const_string "V2DF"))
! 	       /* For architectures resolving dependencies on
! 		  whole SSE registers use APD move to break dependency
! 		  chains, otherwise use short move to avoid extra work.  
! 
! 		  movaps encodes one byte shorter.  */
! 	       (eq_attr "alternative" "6")
! 		 (cond
! 		  [(ne (symbol_ref "optimize_size")
! 		       (const_int 0))
! 		     (const_string "V4SF")
! 		   (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
! 		       (const_int 0))
! 		     (const_string "V2DF")]
! 		   (const_string "DF"))
! 	       /* For achitectures resolving dependencies on register
! 		  parts we may avoid extra work to zero out upper part
! 		  of register.  */
! 	       (eq_attr "alternative" "7")
! 		 (if_then_else
! 		   (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
! 		       (const_int 0))
! 		   (const_string "V2DF")
! 		   (const_string "DF"))]
! 	       (const_string "DF")))])
  
  (define_split
    [(set (match_operand:DF 0 "nonimmediate_operand" "")
***************
*** 3716,3722 ****
        else
  	return "fst%z0\t%y0";
      case 4:
!       return "cvtsd2ss\t{%1, %0|%0, %1}";
      default:
        abort ();
      }
--- 3820,3826 ----
        else
  	return "fst%z0\t%y0";
      case 4:
!       return "#";
      default:
        abort ();
      }
***************
*** 3728,3734 ****
    [(set (match_operand:SF 0 "nonimmediate_operand" "=Y,!m")
  	(float_truncate:SF
  	 (match_operand:DF 1 "nonimmediate_operand" "mY,f")))]
!   "TARGET_80387 && TARGET_SSE2
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
  {
    switch (which_alternative)
--- 3832,3838 ----
    [(set (match_operand:SF 0 "nonimmediate_operand" "=Y,!m")
  	(float_truncate:SF
  	 (match_operand:DF 1 "nonimmediate_operand" "mY,f")))]
!   "TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
  {
    switch (which_alternative)
***************
*** 3747,3753 ****
    [(set_attr "type" "ssecvt,fmov")
     (set_attr "mode" "DF,SF")])
  
! (define_insn "truncdfsf2_3"
    [(set (match_operand:SF 0 "memory_operand" "=m")
  	(float_truncate:SF
  	 (match_operand:DF 1 "register_operand" "f")))]
--- 3851,3880 ----
    [(set_attr "type" "ssecvt,fmov")
     (set_attr "mode" "DF,SF")])
  
! (define_insn "*truncdfsf2_2_nooverlap"
!   [(set (match_operand:SF 0 "nonimmediate_operand" "=&Y,!m")
! 	(float_truncate:SF
! 	 (match_operand:DF 1 "nonimmediate_operand" "mY,f")))]
!   "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS
!    && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
! {
!   switch (which_alternative)
!     {
!     case 0:
!       return "#";
!     case 1:
!       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
! 	return "fstp%z0\t%y0";
!       else
! 	return "fst%z0\t%y0";
!     default:
!       abort ();
!     }
! }
!   [(set_attr "type" "ssecvt,fmov")
!    (set_attr "mode" "DF,SF")])
! 
! (define_insn "*truncdfsf2_3"
    [(set (match_operand:SF 0 "memory_operand" "=m")
  	(float_truncate:SF
  	 (match_operand:DF 1 "register_operand" "f")))]
***************
*** 3765,3775 ****
    [(set (match_operand:SF 0 "register_operand" "=Y")
  	(float_truncate:SF
  	 (match_operand:DF 1 "nonimmediate_operand" "mY")))]
!   "!TARGET_80387 && TARGET_SSE2"
    "cvtsd2ss\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssecvt")
     (set_attr "mode" "DF")])
  
  (define_split
    [(set (match_operand:SF 0 "memory_operand" "")
  	(float_truncate:SF
--- 3892,3911 ----
    [(set (match_operand:SF 0 "register_operand" "=Y")
  	(float_truncate:SF
  	 (match_operand:DF 1 "nonimmediate_operand" "mY")))]
!   "!TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS"
    "cvtsd2ss\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssecvt")
     (set_attr "mode" "DF")])
  
+ (define_insn "*truncdfsf2_sse_only_nooverlap"
+   [(set (match_operand:SF 0 "register_operand" "=&Y")
+ 	(float_truncate:SF
+ 	 (match_operand:DF 1 "nonimmediate_operand" "mY")))]
+   "!TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS"
+   "#"
+   [(set_attr "type" "ssecvt")
+    (set_attr "mode" "DF")])
+ 
  (define_split
    [(set (match_operand:SF 0 "memory_operand" "")
  	(float_truncate:SF
***************
*** 3779,3793 ****
    [(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
    "")
  
  (define_split
!   [(set (match_operand:SF 0 "nonimmediate_operand" "")
  	(float_truncate:SF
  	 (match_operand:DF 1 "nonimmediate_operand" "")))
     (clobber (match_operand 2 "" ""))]
    "TARGET_80387 && reload_completed
!    && !FP_REG_P (operands[0]) && !FP_REG_P (operands[1])"
!   [(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
!   "")
  
  (define_split
    [(set (match_operand:SF 0 "register_operand" "")
--- 3915,3969 ----
    [(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
    "")
  
+ ; Avoid possible reformating penalty on the destination by first
+ ; zeroing it out
  (define_split
!   [(set (match_operand:SF 0 "register_operand" "")
  	(float_truncate:SF
  	 (match_operand:DF 1 "nonimmediate_operand" "")))
     (clobber (match_operand 2 "" ""))]
    "TARGET_80387 && reload_completed
!    && SSE_REG_P (operands[0])"
!   [(const_int 0)]
! {
!   rtx src, dest;
!   if (!TARGET_SSE_PARTIAL_REGS)
!     emit_insn (gen_truncdfsf2_sse_only (operands[0], operands[1]));
!   else
!     {
!       dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
!       src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
!       /* simplify_gen_subreg refuses to widen memory references.  */
!       if (GET_CODE (src) == SUBREG)
! 	alter_subreg (&src);
!       if (reg_overlap_mentioned_p (operands[0], operands[1]))
! 	abort ();
!       emit_insn (gen_sse2_clrv4sf (dest, CONST0_RTX (V4SFmode)));
!       emit_insn (gen_cvtsd2ss (dest, dest, src));
!     }
!   DONE;
! })
! 
! (define_split
!   [(set (match_operand:SF 0 "register_operand" "")
! 	(float_truncate:SF
! 	 (match_operand:DF 1 "nonimmediate_operand" "")))]
!   "TARGET_80387 && reload_completed
!    && SSE_REG_P (operands[0]) && TARGET_SSE_PARTIAL_REGS"
!   [(const_int 0)]
! {
!   rtx src, dest;
!   dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
!   src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
!   /* simplify_gen_subreg refuses to widen memory references.  */
!   if (GET_CODE (src) == SUBREG)
!     alter_subreg (&src);
!   if (reg_overlap_mentioned_p (operands[0], operands[1]))
!     abort ();
!   emit_insn (gen_sse2_clrv4sf (dest, CONST0_RTX (V4SFmode)));
!   emit_insn (gen_cvtsd2ss (dest, dest, src));
!   DONE;
! })
  
  (define_split
    [(set (match_operand:SF 0 "register_operand" "")
***************
*** 4491,4496 ****
--- 4667,4688 ----
     (set_attr "mode" "SF")
     (set_attr "fp_int_src" "true")])
  
+ ; Avoid possible reformating penalty on the destination by first
+ ; zeroing it out
+ (define_split
+   [(set (match_operand:SF 0 "register_operand" "")
+ 	(float:SF (match_operand:SI 1 "nonimmediate_operand" "")))]
+   "TARGET_80387 && reload_completed
+    && SSE_REG_P (operands[0]) && TARGET_SSE_PARTIAL_REGS"
+   [(const_int 0)]
+ {
+   rtx dest;
+   dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+   emit_insn (gen_sse2_clrv4sf (dest, CONST0_RTX (V4SFmode)));
+   emit_insn (gen_cvtsi2ss (dest, dest, operands[1]));
+   DONE;
+ })
+ 
  (define_expand "floatdisf2"
    [(set (match_operand:SF 0 "register_operand" "")
  	(float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
***************
*** 4529,4534 ****
--- 4721,4742 ----
     (set_attr "mode" "SF")
     (set_attr "fp_int_src" "true")])
  
+ ; Avoid possible reformating penalty on the destination by first
+ ; zeroing it out
+ (define_split
+   [(set (match_operand:SF 0 "register_operand" "")
+ 	(float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
+   "TARGET_80387 && reload_completed
+    && SSE_REG_P (operands[0]) && TARGET_SSE_PARTIAL_REGS"
+   [(const_int 0)]
+ {
+   rtx dest;
+   dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+   emit_insn (gen_sse2_clrv4sf (dest, CONST0_RTX (V4SFmode)));
+   emit_insn (gen_cvtsi2ssq (dest, dest, operands[1]));
+   DONE;
+ })
+ 
  (define_insn "floathidf2"
    [(set (match_operand:DF 0 "register_operand" "=f,f")
  	(float:DF (match_operand:HI 1 "nonimmediate_operand" "m,r")))]
***************
*** 9492,9497 ****
--- 9700,9713 ----
  	(xor:TI (subreg:TI (match_dup 1) 0)
  		(subreg:TI (match_dup 2) 0)))]
  {
+   /* Avoid possible reformating on the operands.  */
+   if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
+     {
+       rtx op = gen_rtx_SUBREG (V2DFmode, operands[1], 0);
+       emit_insn (gen_sse2_unpcklpd (op, op, op));
+       op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
+       emit_insn (gen_sse2_unpcklpd (op, op, op));
+     }
    if (operands_match_p (operands[0], operands[2]))
      {
        rtx tmp;
***************
*** 9925,9931 ****
    "reload_completed && SSE_REG_P (operands[0])"
    [(set (subreg:TI (match_dup 0) 0)
  	(and:TI (not:TI (subreg:TI (match_dup 2) 0))
! 		(subreg:TI (match_dup 1) 0)))])
  
  
  ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
--- 10141,10157 ----
    "reload_completed && SSE_REG_P (operands[0])"
    [(set (subreg:TI (match_dup 0) 0)
  	(and:TI (not:TI (subreg:TI (match_dup 2) 0))
! 		(subreg:TI (match_dup 1) 0)))]
! {
!   /* Avoid possible reformating on the operands.  */
!   if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
!     {
!       rtx op = gen_rtx_SUBREG (V2DFmode, operands[1], 0);
!       emit_insn (gen_sse2_unpcklpd (op, op, op));
!       op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
!       emit_insn (gen_sse2_unpcklpd (op, op, op));
!     }
! })
  
  
  ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
***************
*** 16596,16601 ****
--- 16822,16835 ----
     (set (subreg:TI (match_dup 0) 0) (ior:TI (subreg:TI (match_dup 6) 0)
  					    (subreg:TI (match_dup 7) 0)))]
  {
+   if (GET_MODE (operands[2]) == DFmode
+       && TARGET_SSE_PARTIAL_REGS && !optimize_size)
+     {
+       rtx op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
+       emit_insn (gen_sse2_unpcklpd (op, op, op));
+       op = gen_rtx_SUBREG (V2DFmode, operands[3], 0);
+       emit_insn (gen_sse2_unpcklpd (op, op, op));
+     }
    /* If op2 == op3, op3 will be clobbered before it is used.
       This should be optimized out though.  */
    if (operands_match_p (operands[2], operands[3]))
***************
*** 16704,16709 ****
--- 16938,16957 ----
     (set (subreg:TI (match_dup 0) 0) (and:TI (match_dup 6)
  					    (subreg:TI (match_dup 7) 0)))]
  {
+   if (TARGET_SSE_PARTIAL_REGS && !optimize_size
+       && GET_MODE (operands[2]) == DFmode)
+     {
+       if (REG_P (operands[2]))
+ 	{
+ 	  rtx op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
+ 	  emit_insn (gen_sse2_unpcklpd (op, op, op));
+ 	}
+       if (REG_P (operands[3]))
+ 	{
+ 	  rtx op = gen_rtx_SUBREG (V2DFmode, operands[3], 0);
+ 	  emit_insn (gen_sse2_unpcklpd (op, op, op));
+ 	}
+     }
    PUT_MODE (operands[1], GET_MODE (operands[0]));
    if (!sse_comparison_operator (operands[1], VOIDmode))
      {
***************
*** 17810,17816 ****
    [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
  	(match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))]
    "TARGET_SSE"
-   ;; @@@ let's try to use movaps here.
    "movaps\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssemov")
     (set_attr "mode" "V4SF")])
--- 18058,18063 ----
***************
*** 17819,17828 ****
    [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
  	(match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
    "TARGET_SSE"
!   ;; @@@ let's try to use movaps here.
!   "movaps\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssemov")
!    (set_attr "mode" "V4SF")])
  
  (define_insn "movv8qi_internal"
    [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
--- 18066,18094 ----
    [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
  	(match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
    "TARGET_SSE"
! {
!   if (get_attr_mode (insn) == MODE_V4SF)
!     return "movaps\t{%1, %0|%0, %1}";
!   else
!     return "movdqa\t{%1, %0|%0, %1}";
! }
    [(set_attr "type" "ssemov")
!    (set (attr "mode")
!         (cond [(eq_attr "alternative" "0")
! 		 (if_then_else
! 		   (ne (symbol_ref "optimize_size")
! 		       (const_int 0))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))
! 	       (eq_attr "alternative" "1")
! 		 (if_then_else
! 		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
! 			    (const_int 0))
! 			(ne (symbol_ref "optimize_size")
! 			    (const_int 0)))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))]
! 	       (const_string "TI")))])
  
  (define_insn "movv8qi_internal"
    [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
***************
*** 17872,17899 ****
    [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
  	(match_operand:V2DF 1 "general_operand" "xm,x"))]
    "TARGET_SSE2"
!   ;; @@@ let's try to use movaps here.
!   "movapd\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssemov")
!    (set_attr "mode" "V2DF")])
  
  (define_insn "movv8hi_internal"
    [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
  	(match_operand:V8HI 1 "general_operand" "xm,x"))]
    "TARGET_SSE2"
!   ;; @@@ let's try to use movaps here.
!   "movaps\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssemov")
!    (set_attr "mode" "V4SF")])
  
  (define_insn "movv16qi_internal"
    [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
  	(match_operand:V16QI 1 "general_operand" "xm,x"))]
    "TARGET_SSE2"
!   ;; @@@ let's try to use movaps here.
!   "movaps\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssemov")
!    (set_attr "mode" "V4SF")])
  
  (define_expand "movv2df"
    [(set (match_operand:V2DF 0 "general_operand" "")
--- 18138,18222 ----
    [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
  	(match_operand:V2DF 1 "general_operand" "xm,x"))]
    "TARGET_SSE2"
! {
!   if (get_attr_mode (insn) == MODE_V4SF)
!     return "movaps\t{%1, %0|%0, %1}";
!   else
!     return "movapd\t{%1, %0|%0, %1}";
! }
    [(set_attr "type" "ssemov")
!    (set (attr "mode")
!         (cond [(eq_attr "alternative" "0")
! 		 (if_then_else
! 		   (ne (symbol_ref "optimize_size")
! 		       (const_int 0))
! 		   (const_string "V4SF")
! 		   (const_string "V2DF"))
! 	       (eq_attr "alternative" "1")
! 		 (if_then_else
! 		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
! 			    (const_int 0))
! 			(ne (symbol_ref "optimize_size")
! 			    (const_int 0)))
! 		   (const_string "V4SF")
! 		   (const_string "V2DF"))]
! 	       (const_string "V2DF")))])
  
  (define_insn "movv8hi_internal"
    [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
  	(match_operand:V8HI 1 "general_operand" "xm,x"))]
    "TARGET_SSE2"
! {
!   if (get_attr_mode (insn) == MODE_V4SF)
!     return "movaps\t{%1, %0|%0, %1}";
!   else
!     return "movdqa\t{%1, %0|%0, %1}";
! }
    [(set_attr "type" "ssemov")
!    (set (attr "mode")
!         (cond [(eq_attr "alternative" "0")
! 		 (if_then_else
! 		   (ne (symbol_ref "optimize_size")
! 		       (const_int 0))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))
! 	       (eq_attr "alternative" "1")
! 		 (if_then_else
! 		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
! 			    (const_int 0))
! 			(ne (symbol_ref "optimize_size")
! 			    (const_int 0)))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))]
! 	       (const_string "TI")))])
  
  (define_insn "movv16qi_internal"
    [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
  	(match_operand:V16QI 1 "general_operand" "xm,x"))]
    "TARGET_SSE2"
! {
!   if (get_attr_mode (insn) == MODE_V4SF)
!     return "movaps\t{%1, %0|%0, %1}";
!   else
!     return "movdqa\t{%1, %0|%0, %1}";
! }
    [(set_attr "type" "ssemov")
!    (set (attr "mode")
!         (cond [(eq_attr "alternative" "0")
! 		 (if_then_else
! 		   (ne (symbol_ref "optimize_size")
! 		       (const_int 0))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))
! 	       (eq_attr "alternative" "1")
! 		 (if_then_else
! 		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
! 			    (const_int 0))
! 			(ne (symbol_ref "optimize_size")
! 			    (const_int 0)))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))]
! 	       (const_string "TI")))])
  
  (define_expand "movv2df"
    [(set (match_operand:V2DF 0 "general_operand" "")
***************
*** 18090,18115 ****
    [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
  	(match_operand:TI 1 "general_operand" "O,xm,x"))]
    "TARGET_SSE && !TARGET_64BIT"
!   "@
!    xorps\t%0, %0
!    movaps\t{%1, %0|%0, %1}
!    movaps\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssemov,ssemov,ssemov")
!    (set_attr "mode" "V4SF")])
  
  (define_insn "*movti_rex64"
    [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x")
  	(match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))]
    "TARGET_64BIT
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
!   "@
!    #
!    #
!    xorps\t%0, %0
!    movaps\\t{%1, %0|%0, %1}
!    movaps\\t{%1, %0|%0, %1}"
    [(set_attr "type" "*,*,ssemov,ssemov,ssemov")
!    (set_attr "mode" "V4SF")])
  
  (define_split
    [(set (match_operand:TI 0 "nonimmediate_operand" "")
--- 18413,18513 ----
    [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
  	(match_operand:TI 1 "general_operand" "O,xm,x"))]
    "TARGET_SSE && !TARGET_64BIT"
! {
!   switch (which_alternative)
!     {
!     case 0:
!       if (get_attr_mode (insn) == MODE_V4SF)
! 	return "xorps\t%0, %0";
!       else
! 	return "pxor\t%0, %0";
!     case 1:
!     case 2:
!       if (get_attr_mode (insn) == MODE_V4SF)
! 	return "movaps\t{%1, %0|%0, %1}";
!       else
! 	return "movdqa\t{%1, %0|%0, %1}";
!     default:
!       abort ();
!     }
! }
    [(set_attr "type" "ssemov,ssemov,ssemov")
!    (set (attr "mode")
!         (cond [(eq_attr "alternative" "0")
! 		 (if_then_else
! 		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_LOAD0")
! 			    (const_int 0))
! 			(ne (symbol_ref "optimize_size")
! 			    (const_int 0)))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))
! 	       (eq_attr "alternative" "1")
! 		 (if_then_else
! 		   (ne (symbol_ref "optimize_size")
! 		       (const_int 0))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))
! 	       (eq_attr "alternative" "2")
! 		 (if_then_else
! 		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
! 			    (const_int 0))
! 			(ne (symbol_ref "optimize_size")
! 			    (const_int 0)))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))]
! 	       (const_string "TI")))])
  
  (define_insn "*movti_rex64"
    [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x")
  	(match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))]
    "TARGET_64BIT
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
! {
!   switch (which_alternative)
!     {
!     case 0:
!     case 1:
!       return "#";
!     case 2:
!       if (get_attr_mode (insn) == MODE_V4SF)
! 	return "xorps\t%0, %0";
!       else
! 	return "pxor\t%0, %0";
!     case 3:
!     case 4:
!       if (get_attr_mode (insn) == MODE_V4SF)
! 	return "movaps\t{%1, %0|%0, %1}";
!       else
! 	return "movdqa\t{%1, %0|%0, %1}";
!     default:
!       abort ();
!     }
! }
    [(set_attr "type" "*,*,ssemov,ssemov,ssemov")
!    (set (attr "mode")
!         (cond [(eq_attr "alternative" "2")
! 		 (if_then_else
! 		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_LOAD0")
! 			    (const_int 0))
! 			(ne (symbol_ref "optimize_size")
! 			    (const_int 0)))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))
! 	       (eq_attr "alternative" "3")
! 		 (if_then_else
! 		   (ne (symbol_ref "optimize_size")
! 		       (const_int 0))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))
! 	       (eq_attr "alternative" "4")
! 		 (if_then_else
! 		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
! 			    (const_int 0))
! 			(ne (symbol_ref "optimize_size")
! 			    (const_int 0)))
! 		   (const_string "V4SF")
! 		   (const_string "TI"))]
! 	       (const_string "DI")))])
  
  (define_split
    [(set (match_operand:TI 0 "nonimmediate_operand" "")
***************
*** 18938,18943 ****
--- 19336,19353 ----
    [(set_attr "type" "ssecvt")
     (set_attr "mode" "SF")])
  
+ (define_insn "cvtsi2ssq"
+   [(set (match_operand:V4SF 0 "register_operand" "=x")
+ 	(vec_merge:V4SF
+ 	 (match_operand:V4SF 1 "register_operand" "0")
+ 	 (vec_duplicate:V4SF
+ 	  (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ 	 (const_int 14)))]
+   "TARGET_SSE && TARGET_64BIT"
+   "cvtsi2ssq\t{%2, %0|%0, %2}"
+   [(set_attr "type" "ssecvt")
+    (set_attr "mode" "SF")])
+ 
  (define_insn "cvtss2si"
    [(set (match_operand:SI 0 "register_operand" "=r")
  	(vec_select:SI
***************
*** 20586,20592 ****
  	(vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
  	 		(vec_duplicate:V4SF
  			  (float_truncate:V2SF
! 			    (match_operand:V2DF 2 "register_operand" "xm")))
  			(const_int 14)))]
    "TARGET_SSE2"
    "cvtsd2ss\t{%2, %0|%0, %2}"
--- 20996,21002 ----
  	(vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
  	 		(vec_duplicate:V4SF
  			  (float_truncate:V2SF
! 			    (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
  			(const_int 14)))]
    "TARGET_SSE2"
    "cvtsd2ss\t{%2, %0|%0, %2}"
***************
*** 20598,20604 ****
  	(vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0")
  	 		(float_extend:V2DF
  			  (vec_select:V2SF
! 			    (match_operand:V4SF 2 "register_operand" "xm")
  			    (parallel [(const_int 0)
  				       (const_int 1)])))
  			(const_int 2)))]
--- 21008,21014 ----
  	(vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0")
  	 		(float_extend:V2DF
  			  (vec_select:V2SF
! 			    (match_operand:V4SF 2 "nonimmediate_operand" "xm")
  			    (parallel [(const_int 0)
  				       (const_int 1)])))
  			(const_int 2)))]
***************
*** 20875,20881 ****
    [(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))]
    "TARGET_SSE2"
    "pxor\t{%0, %0|%0, %0}"
!   [(set_attr "type" "sseiadd")
     (set_attr "memory" "none")
     (set_attr "mode" "TI")])
  
--- 21285,21300 ----
    [(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))]
    "TARGET_SSE2"
    "pxor\t{%0, %0|%0, %0}"
!   [(set_attr "type" "ssemov")
!    (set_attr "memory" "none")
!    (set_attr "mode" "TI")])
! 
! (define_insn "sse2_clrv4sf"
!   [(set (match_operand:V4SF 0 "register_operand" "=x")
! 	(match_operand:V4SF 1 "const0_operand" "X"))]
!   "TARGET_SSE"
!   "xorps\t{%0, %0|%0, %0}"
!   [(set_attr "type" "ssemov")
     (set_attr "memory" "none")
     (set_attr "mode" "TI")])
  


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]