This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

SSE/MMX moves optimization


Hi,
Athlon and K8 does not like moves from SSE to integer and vice versa.
It is faster to do trought memory.  This patch implements it and fixes
some dead ends in the patterns I've noticed.

Bootstrapped/regtested x86-64.  OK?

Tue Feb  4 20:27:24 CET 2003  Jan Hubicka  <jh@suse.cz>
	* i386.c (x86_inter_unit_moves): New variable.
	(ix86_secondary_memory_needed): Fix 64bit case, honor
	TARGET_INTER_UNIT_MOVES
	* i386.h (x86_inter_unit_moves): Declare.
	(TARGET_INTER_UNIT_MOVES): New macro.
	* i386.md (movsi_1): Cleanup constraints; disable
	when not doing inter-unit moves.
	(movsi_1_nointernunit): New.
	(movdi_1_rex64): Fix constraints; deal with SSE->GPR moves.
	(movdi_1_rex64_nointerunit): New.
	(mivsf_1): disable when not doing inter-unit moves.
	(movsf_1_nointerunit): New.
Index: i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.524
diff -c -3 -p -r1.524 i386.c
*** i386.c	3 Feb 2003 20:20:36 -0000	1.524
--- i386.c	4 Feb 2003 19:27:16 -0000
*************** const int x86_sse_typeless_stores = m_AT
*** 521,526 ****
--- 521,527 ----
  const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
  const int x86_use_ffreep = m_ATHLON_K8;
  const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
+ const int x86_inter_unit_moves = ~(m_ATHLON_K8);
  
  /* In case the average insn count for single function invocation is
     lower than this constant, emit fast (but longer) prologue and
*************** ix86_secondary_memory_needed (class1, cl
*** 14385,14394 ****
  	return 1;
      }
    return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
! 	  || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
! 	      && (mode) != SImode)
! 	  || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
! 	      && (mode) != SImode));
  }
  /* Return the cost of moving data from a register in class CLASS1 to
     one in class CLASS2.
--- 14386,14395 ----
  	return 1;
      }
    return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
! 	  || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
! 	       || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
! 	      && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
! 		  || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
  }
  /* Return the cost of moving data from a register in class CLASS1 to
     one in class CLASS2.
Index: i386.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.h,v
retrieving revision 1.318
diff -c -3 -p -r1.318 i386.h
*** i386.h	31 Jan 2003 23:34:15 -0000	1.318
--- i386.h	4 Feb 2003 19:27:16 -0000
*************** extern const int x86_arch_always_fancy_m
*** 230,235 ****
--- 230,236 ----
  extern const int x86_sse_partial_reg_dependency, x86_sse_partial_regs;
  extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor;
  extern const int x86_use_ffreep, x86_sse_partial_regs_for_cvtsd2ss;
+ extern const int x86_inter_unit_moves;
  extern int x86_prefetch_sse;
  
  #define TARGET_USE_LEAVE (x86_use_leave & CPUMASK)
*************** extern int x86_prefetch_sse;
*** 282,287 ****
--- 283,289 ----
  #define TARGET_SHIFT1 (x86_shift1 & CPUMASK)
  #define TARGET_USE_FFREEP (x86_use_ffreep & CPUMASK)
  #define TARGET_REP_MOVL_OPTIMAL (x86_rep_movl_optimal & CPUMASK)
+ #define TARGET_INTER_UNIT_MOVES (x86_inter_unit_moves & CPUMASK)
  
  #define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
  
Index: i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.421
diff -c -3 -p -r1.421 i386.md
*** i386.md	3 Feb 2003 20:20:36 -0000	1.421
--- i386.md	4 Feb 2003 19:27:17 -0000
***************
*** 1166,1173 ****
  
  (define_insn "*movsi_1"
    [(set (match_operand:SI 0 "nonimmediate_operand" "=*?a,r,*?a,m,!*y,!rm,!*y,!*Y,!rm,!*Y")
! 	(match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,rm,*y,*y,rm,*Y,*Y"))]
!   "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
  {
    switch (get_attr_type (insn))
      {
--- 1166,1214 ----
  
  (define_insn "*movsi_1"
    [(set (match_operand:SI 0 "nonimmediate_operand" "=*?a,r,*?a,m,!*y,!rm,!*y,!*Y,!rm,!*Y")
! 	(match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,*y,*y,rm,*Y,*Y,rm"))]
!   "(TARGET_INTER_UNIT_MOVES || optimize_size)
!    && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
! {
!   switch (get_attr_type (insn))
!     {
!     case TYPE_SSEMOV:
!       if (get_attr_mode (insn) == TImode)
!         return "movdqa\t{%1, %0|%0, %1}";
!       return "movd\t{%1, %0|%0, %1}";
! 
!     case TYPE_MMXMOV:
!       if (get_attr_mode (insn) == DImode)
! 	return "movq\t{%1, %0|%0, %1}";
!       return "movd\t{%1, %0|%0, %1}";
! 
!     case TYPE_LEA:
!       return "lea{l}\t{%1, %0|%0, %1}";
! 
!     default:
!       if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1]))
! 	abort();
!       return "mov{l}\t{%1, %0|%0, %1}";
!     }
! }
!   [(set (attr "type")
!      (cond [(eq_attr "alternative" "4,5,6")
! 	      (const_string "mmxmov")
! 	    (eq_attr "alternative" "7,8,9")
! 	      (const_string "ssemov")
! 	    (and (ne (symbol_ref "flag_pic") (const_int 0))
! 		 (match_operand:SI 1 "symbolic_operand" ""))
! 	      (const_string "lea")
! 	   ]
! 	   (const_string "imov")))
!    (set_attr "modrm" "0,*,0,*,*,*,*,*,*,*")
!    (set_attr "mode" "SI,SI,SI,SI,DI,SI,SI,TI,SI,SI")])
! 
! (define_insn "*movsi_1_nointernunit"
!   [(set (match_operand:SI 0 "nonimmediate_operand" "=*?a,r,*?a,m,!*y,!m,!*y,!*Y,!m,!*Y")
! 	(match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,*y,*y,m,*Y,*Y,m"))]
!   "(!TARGET_INTER_UNIT_MOVES && !optimize_size)
!    && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
  {
    switch (get_attr_type (insn))
      {
***************
*** 1201,1207 ****
  	   ]
  	   (const_string "imov")))
     (set_attr "modrm" "0,*,0,*,*,*,*,*,*,*")
!    (set_attr "mode" "SI,SI,SI,SI,SI,SI,DI,TI,SI,SI")])
  
  ;; Stores and loads of ax to arbitrary constant address.
  ;; We fake an second form of instruction to force reload to load address
--- 1242,1248 ----
  	   ]
  	   (const_string "imov")))
     (set_attr "modrm" "0,*,0,*,*,*,*,*,*,*")
!    (set_attr "mode" "SI,SI,SI,SI,DI,SI,SI,TI,SI,SI")])
  
  ;; Stores and loads of ax to arbitrary constant address.
  ;; We fake an second form of instruction to force reload to load address
***************
*** 1932,1947 ****
    "ix86_split_long_move (operands); DONE;")
  
  (define_insn "*movdi_1_rex64"
!   [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,mr,!mr,!m*y,!*y,!*Y,!m,!*Y")
! 	(match_operand:DI 1 "general_operand" "Z,rem,i,re,n,*y,m,*Y,*Y,*m"))]
    "TARGET_64BIT
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
  {
    switch (get_attr_type (insn))
      {
      case TYPE_SSEMOV:
!       if (register_operand (operands[0], DImode)
! 	  && register_operand (operands[1], DImode))
  	  return "movdqa\t{%1, %0|%0, %1}";
        /* FALLTHRU */
      case TYPE_MMXMOV:
--- 1973,2038 ----
    "ix86_split_long_move (operands); DONE;")
  
  (define_insn "*movdi_1_rex64"
!   [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,mr,!mr,!*y,!rm,!*y,!*Y,!rm,!*Y")
! 	(match_operand:DI 1 "general_operand" "Z,rem,i,re,n,*y,*y,rm,*Y,*Y,rm"))]
!   "TARGET_64BIT
!    && (TARGET_INTER_UNIT_MOVES || optimize_size)
!    && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
! {
!   switch (get_attr_type (insn))
!     {
!     case TYPE_SSEMOV:
!       if (get_attr_mode (insn) == MODE_TI)
! 	  return "movdqa\t{%1, %0|%0, %1}";
!       /* Moves from and into integer register is done using movd opcode with
!  	 REX prefix.  */
!       if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))
! 	  return "movd\t{%1, %0|%0, %1}";
!       /* FALLTHRU */
!     case TYPE_MMXMOV:
!       return "movq\t{%1, %0|%0, %1}";
!     case TYPE_MULTI:
!       return "#";
!     case TYPE_LEA:
!       return "lea{q}\t{%a1, %0|%0, %a1}";
!     default:
!       if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1]))
! 	abort ();
!       if (get_attr_mode (insn) == MODE_SI)
! 	return "mov{l}\t{%k1, %k0|%k0, %k1}";
!       else if (which_alternative == 2)
! 	return "movabs{q}\t{%1, %0|%0, %1}";
!       else
! 	return "mov{q}\t{%1, %0|%0, %1}";
!     }
! }
!   [(set (attr "type")
!      (cond [(eq_attr "alternative" "5,6,7")
! 	      (const_string "mmxmov")
! 	    (eq_attr "alternative" "8,9,10")
! 	      (const_string "ssemov")
! 	    (eq_attr "alternative" "4")
! 	      (const_string "multi")
!  	    (and (ne (symbol_ref "flag_pic") (const_int 0))
! 		 (match_operand:DI 1 "symbolic_operand" ""))
! 	      (const_string "lea")
! 	   ]
! 	   (const_string "imov")))
!    (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*")
!    (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*")
!    (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI,DI")])
! 
! (define_insn "*movdi_1_rex64_nointerunit"
!   [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,mr,!mr,!*y,!m,!*y,!*Y,!m,!*Y")
! 	(match_operand:DI 1 "general_operand" "Z,rem,i,re,n,*y,*y,m,*Y,*Y,m"))]
    "TARGET_64BIT
+    && (!TARGET_INTER_UNIT_MOVES && !optimize_size)
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
  {
    switch (get_attr_type (insn))
      {
      case TYPE_SSEMOV:
!       if (get_attr_mode (insn) == MODE_TI)
  	  return "movdqa\t{%1, %0|%0, %1}";
        /* FALLTHRU */
      case TYPE_MMXMOV:
***************
*** 1962,1970 ****
      }
  }
    [(set (attr "type")
!      (cond [(eq_attr "alternative" "5,6")
  	      (const_string "mmxmov")
! 	    (eq_attr "alternative" "7,8")
  	      (const_string "ssemov")
  	    (eq_attr "alternative" "4")
  	      (const_string "multi")
--- 2053,2061 ----
      }
  }
    [(set (attr "type")
!      (cond [(eq_attr "alternative" "5,6,7")
  	      (const_string "mmxmov")
! 	    (eq_attr "alternative" "8,9,10")
  	      (const_string "ssemov")
  	    (eq_attr "alternative" "4")
  	      (const_string "multi")
***************
*** 1973,1981 ****
  	      (const_string "lea")
  	   ]
  	   (const_string "imov")))
!    (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*")
!    (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*")
!    (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI")])
  
  ;; Stores and loads of ax to arbitrary constant address.
  ;; We fake an second form of instruction to force reload to load address
--- 2064,2072 ----
  	      (const_string "lea")
  	   ]
  	   (const_string "imov")))
!    (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*")
!    (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*")
!    (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI,DI")])
  
  ;; Stores and loads of ax to arbitrary constant address.
  ;; We fake an second form of instruction to force reload to load address
***************
*** 2130,2136 ****
  (define_insn "*movsf_1"
    [(set (match_operand:SF 0 "nonimmediate_operand" "=f#xr,m,f#xr,r#xf,m,x#rf,x#rf,x#rf,m,!*y,!rm,!*y")
  	(match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,C,x,xm#rf,x#rf,rm,*y,*y"))]
!   "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
     && (reload_in_progress || reload_completed
         || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
         || GET_CODE (operands[1]) != CONST_DOUBLE
--- 2221,2329 ----
  (define_insn "*movsf_1"
    [(set (match_operand:SF 0 "nonimmediate_operand" "=f#xr,m,f#xr,r#xf,m,x#rf,x#rf,x#rf,m,!*y,!rm,!*y")
  	(match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,C,x,xm#rf,x#rf,rm,*y,*y"))]
!   "(TARGET_INTER_UNIT_MOVES || optimize_size)
!    && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
!    && (reload_in_progress || reload_completed
!        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
!        || GET_CODE (operands[1]) != CONST_DOUBLE
!        || memory_operand (operands[0], SFmode))" 
! {
!   switch (which_alternative)
!     {
!     case 0:
!       if (REG_P (operands[1])
!           && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
!         return "fstp\t%y0";
!       else if (STACK_TOP_P (operands[0]))
!         return "fld%z1\t%y1";
!       else
!         return "fst\t%y0";
! 
!     case 1:
!       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
!         return "fstp%z0\t%y0";
!       else
!         return "fst%z0\t%y0";
! 
!     case 2:
!       switch (standard_80387_constant_p (operands[1]))
!         {
!         case 1:
! 	  return "fldz";
! 	case 2:
! 	  return "fld1";
! 	}
!       abort();
! 
!     case 3:
!     case 4:
!       return "mov{l}\t{%1, %0|%0, %1}";
!     case 5:
!       if (get_attr_mode (insn) == MODE_TI)
! 	return "pxor\t%0, %0";
!       else
! 	return "xorps\t%0, %0";
!     case 6:
!       if (get_attr_mode (insn) == MODE_V4SF)
! 	return "movaps\t{%1, %0|%0, %1}";
!       else
! 	return "movss\t{%1, %0|%0, %1}";
!     case 7:
!     case 8:
!       return "movss\t{%1, %0|%0, %1}";
! 
!     case 9:
!     case 10:
!       return "movd\t{%1, %0|%0, %1}";
! 
!     case 11:
!       return "movq\t{%1, %0|%0, %1}";
! 
!     default:
!       abort();
!     }
! }
!   [(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov")
!    (set (attr "mode")
!         (cond [(eq_attr "alternative" "3,4,9,10")
! 		 (const_string "SI")
! 	       (eq_attr "alternative" "5")
! 		 (if_then_else
! 		   (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
! 			    	 (const_int 0))
! 			     (ne (symbol_ref "TARGET_SSE2")
! 				 (const_int 0)))
! 			(eq (symbol_ref "optimize_size")
! 			    (const_int 0)))
! 		   (const_string "TI")
! 		   (const_string "V4SF"))
! 	       /* For architectures resolving dependencies on
! 		  whole SSE registers use APS move to break dependency
! 		  chains, otherwise use short move to avoid extra work. 
! 
! 		  Do the same for architectures resolving dependencies on
! 		  the parts.  While in DF mode it is better to always handle
! 		  just register parts, the SF mode is different due to lack
! 		  of instructions to load just part of the register.  It is
! 		  better to maintain the whole registers in single format
! 		  to avoid problems on using packed logical operations.  */
! 	       (eq_attr "alternative" "6")
! 		 (if_then_else
! 		   (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
! 			    (const_int 0))
! 			(ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
! 			    (const_int 0)))
! 		   (const_string "V4SF")
! 		   (const_string "SF"))
! 	       (eq_attr "alternative" "11")
! 		 (const_string "DI")]
! 	       (const_string "SF")))])
! 
! (define_insn "*movsf_1_nointerunit"
!   [(set (match_operand:SF 0 "nonimmediate_operand" "=f#xr,m,f#xr,r#xf,m,x#rf,x#rf,x#rf,m,!*y,!m,!*y")
! 	(match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,C,x,xm#rf,x#rf,m,*y,*y"))]
!   "(!TARGET_INTER_UNIT_MOVES && !optimize_size)
!    && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
     && (reload_in_progress || reload_completed
         || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
         || GET_CODE (operands[1]) != CONST_DOUBLE


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]