This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

SSE fix 4


Hi,
We generate somewhat lousy code for SSE absolute values since previously load
of negation of negative zero into register didn't work (converting to
LONG_DOUBLE and back thrown away the unneeded parts of NAN resulting in
different number).  I assume that Richard's rewrite made it work, as it just works now.

This patch makes SSE absolute values much faster by using vector operand and
and instead of nand that required to kill the constant operand.

In case this is OK, I will update the other splitters to use vector operands
too as it avoid one reformating on Athlon and add splitter to optimize loads of
vectors having 0 in upper half to loads of short vectors.

OK for 3.4-BIB branch?

Honza

Mon Oct 14 18:41:50 CEST 2002  Jan Hubicka  <jh@suse.cz>

	* i386.md (abssf,absdf): Use vector operands for SSE
	(abssf2_ifs, absdf2_ifs, absdf2_ifs_rex64 and splitters): Update for
	vector operand.

*** i386.md	Mon Oct 14 18:15:53 2002
--- /p1/ssediv/egcs/gcc/config/i386/i386.md	Mon Oct 14 18:24:01 2002
***************
*** 10068,10081 ****
  	{
  	  /* Using SSE is tricky, since we need bitwise negation of -0
  	     in register.  */
! 	  rtx reg = gen_reg_rtx (SFmode);
  	  rtx dest = operands[0];
  
  	  operands[1] = force_reg (SFmode, operands[1]);
  	  operands[0] = force_reg (SFmode, operands[0]);
  	  emit_move_insn (reg,
! 			  gen_lowpart (SFmode,
! 				       gen_int_mode (0x80000000, SImode)));
  	  emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg));
  	  if (dest != operands[0])
  	    emit_move_insn (dest, operands[0]);
--- 10068,10084 ----
  	{
  	  /* Using SSE is tricky, since we need bitwise negation of -0
  	     in register.  */
! 	  rtx reg = gen_reg_rtx (V4SFmode);
  	  rtx dest = operands[0];
+ 	  rtx imm;
  
  	  operands[1] = force_reg (SFmode, operands[1]);
  	  operands[0] = force_reg (SFmode, operands[0]);
+ 	  imm = gen_lowpart (SFmode, gen_int_mode(~0x80000000, SImode));
  	  emit_move_insn (reg,
! 			  gen_rtx_CONST_VECTOR (V4SFmode,
! 			  gen_rtvec (4, imm, CONST0_RTX (SFmode),
! 				     CONST0_RTX (SFmode), CONST0_RTX (SFmode))));
  	  emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg));
  	  if (dest != operands[0])
  	    emit_move_insn (dest, operands[0]);
***************
*** 10092,10100 ****
    "#")
  
  (define_insn "abssf2_ifs"
!   [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,f#xr,rm#xf")
! 	(abs:SF (match_operand:SF 1 "nonimmediate_operand" "x,0,0")))
!    (use (match_operand:SF 2 "nonmemory_operand" "*0#x,*g#x,*g#x"))
     (clobber (reg:CC 17))]
    "TARGET_SSE
     && (reload_in_progress || reload_completed
--- 10095,10103 ----
    "#")
  
  (define_insn "abssf2_ifs"
!   [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf")
! 	(abs:SF (match_operand:SF 1 "nonimmediate_operand" "0,x,0,0")))
!    (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,x*rm,x*rm"))
     (clobber (reg:CC 17))]
    "TARGET_SSE
     && (reload_in_progress || reload_completed
***************
*** 10105,10111 ****
  (define_split
    [(set (match_operand:SF 0 "memory_operand" "")
  	(abs:SF (match_operand:SF 1 "memory_operand" "")))
!    (use (match_operand:SF 2 "" ""))
     (clobber (reg:CC 17))]
    ""
    [(parallel [(set (match_dup 0)
--- 10108,10114 ----
  (define_split
    [(set (match_operand:SF 0 "memory_operand" "")
  	(abs:SF (match_operand:SF 1 "memory_operand" "")))
!    (use (match_operand:V4SF 2 "" ""))
     (clobber (reg:CC 17))]
    ""
    [(parallel [(set (match_dup 0)
***************
*** 10115,10121 ****
  (define_split
    [(set (match_operand:SF 0 "register_operand" "")
  	(abs:SF (match_operand:SF 1 "register_operand" "")))
!    (use (match_operand:SF 2 "" ""))
     (clobber (reg:CC 17))]
    "reload_completed && !SSE_REG_P (operands[0])"
    [(parallel [(set (match_dup 0)
--- 10118,10124 ----
  (define_split
    [(set (match_operand:SF 0 "register_operand" "")
  	(abs:SF (match_operand:SF 1 "register_operand" "")))
!    (use (match_operand:V4SF 2 "" ""))
     (clobber (reg:CC 17))]
    "reload_completed && !SSE_REG_P (operands[0])"
    [(parallel [(set (match_dup 0)
***************
*** 10124,10136 ****
  
  (define_split
    [(set (match_operand:SF 0 "register_operand" "")
  	(abs:SF (match_operand:SF 1 "register_operand" "")))
!    (use (match_operand:SF 2 "register_operand" ""))
     (clobber (reg:CC 17))]
    "reload_completed && SSE_REG_P (operands[0])"
    [(set (subreg:TI (match_dup 0) 0)
! 	(and:TI (not:TI (subreg:TI (match_dup 2) 0))
! 		(subreg:TI (match_dup 1) 0)))])
  
  ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
  ;; because of secondary memory needed to reload from class FLOAT_INT_REGS
--- 10127,10158 ----
  
  (define_split
    [(set (match_operand:SF 0 "register_operand" "")
+ 	(abs:SF (match_dup 0)))
+    (use (match_operand:V4SF 1 "nonmemory_operand" ""))
+    (clobber (reg:CC 17))]
+   "reload_completed && SSE_REG_P (operands[0])"
+   [(set (subreg:TI (match_dup 0) 0)
+ 	(and:TI (subreg:TI (match_dup 0) 0)
+ 		(subreg:TI (match_dup 1) 0)))]
+ {
+   operands[0] = gen_rtx_SUBREG (V4SFmode, operands[0], 0);
+ })
+ 
+ (define_split
+   [(set (match_operand:SF 0 "register_operand" "")
  	(abs:SF (match_operand:SF 1 "register_operand" "")))
!    (use (match_operand:V4SF 2 "register_operand" ""))
     (clobber (reg:CC 17))]
    "reload_completed && SSE_REG_P (operands[0])"
    [(set (subreg:TI (match_dup 0) 0)
! 	(and:TI (subreg:TI (match_dup 0) 0)
! 		(subreg:TI (match_dup 1) 0)))]
! {
!   /* Operand2 should match operand0, as the opposite case is handled above.  */
!   if (REGNO (operands[2]) != REGNO (operands[0]))
!     abort ();
!   operands[0] = gen_rtx_SUBREG (V4SFmode, operands[0], 0);
! })
  
  ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
  ;; because of secondary memory needed to reload from class FLOAT_INT_REGS
***************
*** 10193,10209 ****
  	{
  	  /* Using SSE is tricky, since we need bitwise negation of -0
  	     in register.  */
! 	  rtx reg = gen_reg_rtx (DFmode);
  #if HOST_BITS_PER_WIDE_INT >= 64
! 	  rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode);
  #else
! 	  rtx imm = immed_double_const (0, 0x80000000, DImode);
  #endif
  	  rtx dest = operands[0];
  
  	  operands[1] = force_reg (DFmode, operands[1]);
  	  operands[0] = force_reg (DFmode, operands[0]);
! 	  emit_move_insn (reg, gen_lowpart (DFmode, imm));
  	  emit_insn (gen_absdf2_ifs (operands[0], operands[1], reg));
  	  if (dest != operands[0])
  	    emit_move_insn (dest, operands[0]);
--- 10215,10236 ----
  	{
  	  /* Using SSE is tricky, since we need bitwise negation of -0
  	     in register.  */
! 	  rtx reg = gen_reg_rtx (V2DFmode);
  #if HOST_BITS_PER_WIDE_INT >= 64
! 	  rtx imm = gen_int_mode (~(((HOST_WIDE_INT)1) << 63), DImode);
  #else
! 	  rtx imm = immed_double_const (~0, ~0x80000000, DImode);
  #endif
  	  rtx dest = operands[0];
  
  	  operands[1] = force_reg (DFmode, operands[1]);
  	  operands[0] = force_reg (DFmode, operands[0]);
! 
! 	  /* Produce LONG_DOUBLE with the proper immediate argument.  */
! 	  imm = gen_lowpart (DFmode, imm);
! 	  emit_move_insn (reg,
! 			  gen_rtx_CONST_VECTOR (V2DFmode,
! 			  gen_rtvec (2, imm, CONST0_RTX (DFmode))));
  	  emit_insn (gen_absdf2_ifs (operands[0], operands[1], reg));
  	  if (dest != operands[0])
  	    emit_move_insn (dest, operands[0]);
***************
*** 10220,10228 ****
    "#")
  
  (define_insn "absdf2_ifs"
!   [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr,mr#Yf")
! 	(abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0,0")))
!    (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y,*g#Y"))
     (clobber (reg:CC 17))]
    "!TARGET_64BIT && TARGET_SSE2
     && (reload_in_progress || reload_completed
--- 10247,10255 ----
    "#")
  
  (define_insn "absdf2_ifs"
!   [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr,mr#Yf")
! 	(abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y,0,0")))
!    (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Y*rm,Y*rm"))
     (clobber (reg:CC 17))]
    "!TARGET_64BIT && TARGET_SSE2
     && (reload_in_progress || reload_completed
***************
*** 10231,10239 ****
    "#")
  
  (define_insn "*absdf2_ifs_rex64"
!   [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr")
! 	(abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0")))
!    (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y"))
     (clobber (reg:CC 17))]
    "TARGET_64BIT && TARGET_SSE2
     && (reload_in_progress || reload_completed
--- 10258,10266 ----
    "#")
  
  (define_insn "*absdf2_ifs_rex64"
!   [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr")
! 	(abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y,0")))
!    (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,*0,*Y*rm"))
     (clobber (reg:CC 17))]
    "TARGET_64BIT && TARGET_SSE2
     && (reload_in_progress || reload_completed
***************
*** 10244,10250 ****
  (define_split
    [(set (match_operand:DF 0 "memory_operand" "")
  	(abs:DF (match_operand:DF 1 "memory_operand" "")))
!    (use (match_operand:DF 2 "" ""))
     (clobber (reg:CC 17))]
    ""
    [(parallel [(set (match_dup 0)
--- 10271,10277 ----
  (define_split
    [(set (match_operand:DF 0 "memory_operand" "")
  	(abs:DF (match_operand:DF 1 "memory_operand" "")))
!    (use (match_operand:V2DF 2 "" ""))
     (clobber (reg:CC 17))]
    ""
    [(parallel [(set (match_dup 0)
***************
*** 10254,10260 ****
  (define_split
    [(set (match_operand:DF 0 "register_operand" "")
  	(abs:DF (match_operand:DF 1 "register_operand" "")))
!    (use (match_operand:DF 2 "" ""))
     (clobber (reg:CC 17))]
    "reload_completed && !SSE_REG_P (operands[0])"
    [(parallel [(set (match_dup 0)
--- 10281,10287 ----
  (define_split
    [(set (match_operand:DF 0 "register_operand" "")
  	(abs:DF (match_operand:DF 1 "register_operand" "")))
!    (use (match_operand:V2DF 2 "" ""))
     (clobber (reg:CC 17))]
    "reload_completed && !SSE_REG_P (operands[0])"
    [(parallel [(set (match_dup 0)
***************
*** 10263,10284 ****
  
  (define_split
    [(set (match_operand:DF 0 "register_operand" "")
  	(abs:DF (match_operand:DF 1 "register_operand" "")))
!    (use (match_operand:DF 2 "register_operand" ""))
     (clobber (reg:CC 17))]
    "reload_completed && SSE_REG_P (operands[0])"
    [(set (subreg:TI (match_dup 0) 0)
! 	(and:TI (not:TI (subreg:TI (match_dup 2) 0))
  		(subreg:TI (match_dup 1) 0)))]
  {
    /* Avoid possible reformating on the operands.  */
    if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
!     {
!       rtx op = gen_rtx_SUBREG (V2DFmode, operands[1], 0);
!       emit_insn (gen_sse2_unpcklpd (op, op, op));
!       op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
!       emit_insn (gen_sse2_unpcklpd (op, op, op));
!     }
  })
  
  
--- 10290,10326 ----
  
  (define_split
    [(set (match_operand:DF 0 "register_operand" "")
+ 	(abs:DF (match_dup:DF 0)))
+    (use (match_operand:V2DF 1 "nonimmediate_operand" ""))
+    (clobber (reg:CC 17))]
+   "reload_completed && SSE_REG_P (operands[0])"
+   [(set (subreg:TI (match_dup 0) 0)
+ 	(and:TI (subreg:TI (match_dup 0) 0)
+ 		(subreg:TI (match_dup 1) 0)))]
+ {
+   operands[0] = gen_rtx_SUBREG (V2DFmode, operands[0], 0);
+   /* Avoid possible reformating on the operands.  */
+   if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
+     emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
+ })
+ 
+ (define_split
+   [(set (match_operand:DF 0 "register_operand" "")
  	(abs:DF (match_operand:DF 1 "register_operand" "")))
!    (use (match_operand:V2DF 2 "register_operand" ""))
     (clobber (reg:CC 17))]
    "reload_completed && SSE_REG_P (operands[0])"
    [(set (subreg:TI (match_dup 0) 0)
! 	(and:TI (subreg:TI (match_dup 0) 0)
  		(subreg:TI (match_dup 1) 0)))]
  {
+   operands[0] = gen_rtx_SUBREG (V2DFmode, operands[0], 0);
+   /* Operand2 should match operand0, as the opposite case is handled above.  */
+   if (REGNO (operands[2]) != REGNO (operands[0]))
+     abort ();
    /* Avoid possible reformating on the operands.  */
    if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
!     emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
  })
  
  


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]