This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: SSE fix 4


> On Mon, Oct 14, 2002 at 05:22:59PM +0200, Jan Hubicka wrote:
> >   (define_split
> >     [(set (match_operand:SF 0 "register_operand" "")
> > + 	(abs:SF (match_dup 0)))
> > +    (use (match_operand:V4SF 1 "nonmemory_operand" ""))
> > +    (clobber (reg:CC 17))]
> > +   "reload_completed && SSE_REG_P (operands[0])"
> > +   [(set (subreg:TI (match_dup 0) 0)
> > + 	(and:TI (subreg:TI (match_dup 0) 0)
> > + 		(subreg:TI (match_dup 1) 0)))]
> > + {
> > +   operands[0] = gen_rtx_SUBREG (V4SFmode, operands[0], 0);
> 
> Produces nested subregs.  Not Ok.
I caught that already.
This version builds SPEC without the nested subregs

Mon Oct 14 18:41:50 CEST 2002  Jan Hubicka  <jh@suse.cz>

	* i386.md (abssf,absdf): Use vector operands for SSE
	(abssf2_ifs, absdf2_ifs, absdf2_ifs_rex64 and splitters): Update for
	vector operand.

Index: i386.md
===================================================================
RCS file: /cvsroot/gcc/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.380.4.8
diff -c -3 -p -r1.380.4.8 i386.md
*** i386.md	14 Oct 2002 02:42:25 -0000	1.380.4.8
--- i386.md	14 Oct 2002 16:54:11 -0000
***************
*** 9996,10009 ****
  	{
  	  /* Using SSE is tricky, since we need bitwise negation of -0
  	     in register.  */
! 	  rtx reg = gen_reg_rtx (SFmode);
  	  rtx dest = operands[0];
  
  	  operands[1] = force_reg (SFmode, operands[1]);
  	  operands[0] = force_reg (SFmode, operands[0]);
  	  emit_move_insn (reg,
! 			  gen_lowpart (SFmode,
! 				       gen_int_mode (0x80000000, SImode)));
  	  emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg));
  	  if (dest != operands[0])
  	    emit_move_insn (dest, operands[0]);
--- 9996,10012 ----
  	{
  	  /* Using SSE is tricky, since we need bitwise negation of -0
  	     in register.  */
! 	  rtx reg = gen_reg_rtx (V4SFmode);
  	  rtx dest = operands[0];
+ 	  rtx imm;
  
  	  operands[1] = force_reg (SFmode, operands[1]);
  	  operands[0] = force_reg (SFmode, operands[0]);
+ 	  imm = gen_lowpart (SFmode, gen_int_mode(~0x80000000, SImode));
  	  emit_move_insn (reg,
! 			  gen_rtx_CONST_VECTOR (V4SFmode,
! 			  gen_rtvec (4, imm, CONST0_RTX (SFmode),
! 				     CONST0_RTX (SFmode), CONST0_RTX (SFmode))));
  	  emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg));
  	  if (dest != operands[0])
  	    emit_move_insn (dest, operands[0]);
***************
*** 10020,10028 ****
    "#")
  
  (define_insn "abssf2_ifs"
!   [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,f#xr,rm#xf")
! 	(abs:SF (match_operand:SF 1 "nonimmediate_operand" "x,0,0")))
!    (use (match_operand:SF 2 "nonmemory_operand" "*0#x,*g#x,*g#x"))
     (clobber (reg:CC 17))]
    "TARGET_SSE
     && (reload_in_progress || reload_completed
--- 10023,10031 ----
    "#")
  
  (define_insn "abssf2_ifs"
!   [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf")
! 	(abs:SF (match_operand:SF 1 "nonimmediate_operand" "0,x,0,0")))
!    (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,x*rm,x*rm"))
     (clobber (reg:CC 17))]
    "TARGET_SSE
     && (reload_in_progress || reload_completed
***************
*** 10033,10039 ****
  (define_split
    [(set (match_operand:SF 0 "memory_operand" "")
  	(abs:SF (match_operand:SF 1 "memory_operand" "")))
!    (use (match_operand:SF 2 "" ""))
     (clobber (reg:CC 17))]
    ""
    [(parallel [(set (match_dup 0)
--- 10036,10042 ----
  (define_split
    [(set (match_operand:SF 0 "memory_operand" "")
  	(abs:SF (match_operand:SF 1 "memory_operand" "")))
!    (use (match_operand:V4SF 2 "" ""))
     (clobber (reg:CC 17))]
    ""
    [(parallel [(set (match_dup 0)
***************
*** 10043,10049 ****
  (define_split
    [(set (match_operand:SF 0 "register_operand" "")
  	(abs:SF (match_operand:SF 1 "register_operand" "")))
!    (use (match_operand:SF 2 "" ""))
     (clobber (reg:CC 17))]
    "reload_completed && !SSE_REG_P (operands[0])"
    [(parallel [(set (match_dup 0)
--- 10046,10052 ----
  (define_split
    [(set (match_operand:SF 0 "register_operand" "")
  	(abs:SF (match_operand:SF 1 "register_operand" "")))
!    (use (match_operand:V4SF 2 "" ""))
     (clobber (reg:CC 17))]
    "reload_completed && !SSE_REG_P (operands[0])"
    [(parallel [(set (match_dup 0)
***************
*** 10052,10064 ****
  
  (define_split
    [(set (match_operand:SF 0 "register_operand" "")
  	(abs:SF (match_operand:SF 1 "register_operand" "")))
!    (use (match_operand:SF 2 "register_operand" ""))
     (clobber (reg:CC 17))]
    "reload_completed && SSE_REG_P (operands[0])"
    [(set (subreg:TI (match_dup 0) 0)
! 	(and:TI (not:TI (subreg:TI (match_dup 2) 0))
! 		(subreg:TI (match_dup 1) 0)))])
  
  ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
  ;; because of secondary memory needed to reload from class FLOAT_INT_REGS
--- 10055,10088 ----
  
  (define_split
    [(set (match_operand:SF 0 "register_operand" "")
+ 	(abs:SF (match_dup 0)))
+    (use (match_operand:V4SF 1 "nonmemory_operand" ""))
+    (clobber (reg:CC 17))]
+   "reload_completed && SSE_REG_P (operands[0])"
+   [(set (subreg:TI (match_dup 0) 0)
+ 	(and:TI (subreg:TI (match_dup 0) 0)
+ 		(match_dup 1)))]
+ {
+   operands[0] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+   operands[1] = simplify_gen_subreg (TImode, operands[1], V4SFmode, 0);
+ })
+ 
+ (define_split
+   [(set (match_operand:SF 0 "register_operand" "")
  	(abs:SF (match_operand:SF 1 "register_operand" "")))
!    (use (match_operand:V4SF 2 "register_operand" ""))
     (clobber (reg:CC 17))]
    "reload_completed && SSE_REG_P (operands[0])"
    [(set (subreg:TI (match_dup 0) 0)
! 	(and:TI (subreg:TI (match_dup 0) 0)
! 		(match_dup 1)))]
! {
!   /* Operand2 should match operand0, as the opposite case is handled above.  */
!   if (REGNO (operands[2]) != REGNO (operands[0]))
!     abort ();
!   operands[0] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
!   operands[1] = simplify_gen_subreg (TImode, operands[1], V4SFmode, 0);
! })
  
  ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
  ;; because of secondary memory needed to reload from class FLOAT_INT_REGS
***************
*** 10121,10137 ****
  	{
  	  /* Using SSE is tricky, since we need bitwise negation of -0
  	     in register.  */
! 	  rtx reg = gen_reg_rtx (DFmode);
  #if HOST_BITS_PER_WIDE_INT >= 64
! 	  rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode);
  #else
! 	  rtx imm = immed_double_const (0, 0x80000000, DImode);
  #endif
  	  rtx dest = operands[0];
  
  	  operands[1] = force_reg (DFmode, operands[1]);
  	  operands[0] = force_reg (DFmode, operands[0]);
! 	  emit_move_insn (reg, gen_lowpart (DFmode, imm));
  	  emit_insn (gen_absdf2_ifs (operands[0], operands[1], reg));
  	  if (dest != operands[0])
  	    emit_move_insn (dest, operands[0]);
--- 10145,10166 ----
  	{
  	  /* Using SSE is tricky, since we need bitwise negation of -0
  	     in register.  */
! 	  rtx reg = gen_reg_rtx (V2DFmode);
  #if HOST_BITS_PER_WIDE_INT >= 64
! 	  rtx imm = gen_int_mode (~(((HOST_WIDE_INT)1) << 63), DImode);
  #else
! 	  rtx imm = immed_double_const (~0, ~0x80000000, DImode);
  #endif
  	  rtx dest = operands[0];
  
  	  operands[1] = force_reg (DFmode, operands[1]);
  	  operands[0] = force_reg (DFmode, operands[0]);
! 
! 	  /* Produce LONG_DOUBLE with the proper immediate argument.  */
! 	  imm = gen_lowpart (DFmode, imm);
! 	  emit_move_insn (reg,
! 			  gen_rtx_CONST_VECTOR (V2DFmode,
! 			  gen_rtvec (2, imm, CONST0_RTX (DFmode))));
  	  emit_insn (gen_absdf2_ifs (operands[0], operands[1], reg));
  	  if (dest != operands[0])
  	    emit_move_insn (dest, operands[0]);
***************
*** 10148,10156 ****
    "#")
  
  (define_insn "absdf2_ifs"
!   [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr,mr#Yf")
! 	(abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0,0")))
!    (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y,*g#Y"))
     (clobber (reg:CC 17))]
    "!TARGET_64BIT && TARGET_SSE2
     && (reload_in_progress || reload_completed
--- 10177,10185 ----
    "#")
  
  (define_insn "absdf2_ifs"
!   [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr,mr#Yf")
! 	(abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y,0,0")))
!    (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Y*rm,Y*rm"))
     (clobber (reg:CC 17))]
    "!TARGET_64BIT && TARGET_SSE2
     && (reload_in_progress || reload_completed
***************
*** 10159,10167 ****
    "#")
  
  (define_insn "*absdf2_ifs_rex64"
!   [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr")
! 	(abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0")))
!    (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y"))
     (clobber (reg:CC 17))]
    "TARGET_64BIT && TARGET_SSE2
     && (reload_in_progress || reload_completed
--- 10188,10196 ----
    "#")
  
  (define_insn "*absdf2_ifs_rex64"
!   [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr")
! 	(abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y,0")))
!    (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,*0,*Y*rm"))
     (clobber (reg:CC 17))]
    "TARGET_64BIT && TARGET_SSE2
     && (reload_in_progress || reload_completed
***************
*** 10172,10178 ****
  (define_split
    [(set (match_operand:DF 0 "memory_operand" "")
  	(abs:DF (match_operand:DF 1 "memory_operand" "")))
!    (use (match_operand:DF 2 "" ""))
     (clobber (reg:CC 17))]
    ""
    [(parallel [(set (match_dup 0)
--- 10201,10207 ----
  (define_split
    [(set (match_operand:DF 0 "memory_operand" "")
  	(abs:DF (match_operand:DF 1 "memory_operand" "")))
!    (use (match_operand:V2DF 2 "" ""))
     (clobber (reg:CC 17))]
    ""
    [(parallel [(set (match_dup 0)
***************
*** 10182,10188 ****
  (define_split
    [(set (match_operand:DF 0 "register_operand" "")
  	(abs:DF (match_operand:DF 1 "register_operand" "")))
!    (use (match_operand:DF 2 "" ""))
     (clobber (reg:CC 17))]
    "reload_completed && !SSE_REG_P (operands[0])"
    [(parallel [(set (match_dup 0)
--- 10211,10217 ----
  (define_split
    [(set (match_operand:DF 0 "register_operand" "")
  	(abs:DF (match_operand:DF 1 "register_operand" "")))
!    (use (match_operand:V2DF 2 "" ""))
     (clobber (reg:CC 17))]
    "reload_completed && !SSE_REG_P (operands[0])"
    [(parallel [(set (match_dup 0)
***************
*** 10191,10212 ****
  
  (define_split
    [(set (match_operand:DF 0 "register_operand" "")
  	(abs:DF (match_operand:DF 1 "register_operand" "")))
!    (use (match_operand:DF 2 "register_operand" ""))
     (clobber (reg:CC 17))]
    "reload_completed && SSE_REG_P (operands[0])"
    [(set (subreg:TI (match_dup 0) 0)
! 	(and:TI (not:TI (subreg:TI (match_dup 2) 0))
! 		(subreg:TI (match_dup 1) 0)))]
  {
    /* Avoid possible reformating on the operands.  */
    if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
!     {
!       rtx op = gen_rtx_SUBREG (V2DFmode, operands[1], 0);
!       emit_insn (gen_sse2_unpcklpd (op, op, op));
!       op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
!       emit_insn (gen_sse2_unpcklpd (op, op, op));
!     }
  })
  
  
--- 10220,10258 ----
  
  (define_split
    [(set (match_operand:DF 0 "register_operand" "")
+ 	(abs:DF (match_dup:DF 0)))
+    (use (match_operand:V2DF 1 "nonimmediate_operand" ""))
+    (clobber (reg:CC 17))]
+   "reload_completed && SSE_REG_P (operands[0])"
+   [(set (subreg:TI (match_dup 0) 0)
+ 	(and:TI (subreg:TI (match_dup 0) 0)
+ 		(match_dup 1)))]
+ {
+   operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
+   operands[1] = simplify_gen_subreg (TImode, operands[1], V2DFmode, 0);
+   /* Avoid possible reformating on the operands.  */
+   if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
+     emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
+ })
+ 
+ (define_split
+   [(set (match_operand:DF 0 "register_operand" "")
  	(abs:DF (match_operand:DF 1 "register_operand" "")))
!    (use (match_operand:V2DF 2 "register_operand" ""))
     (clobber (reg:CC 17))]
    "reload_completed && SSE_REG_P (operands[0])"
    [(set (subreg:TI (match_dup 0) 0)
! 	(and:TI (subreg:TI (match_dup 0) 0)
! 		(match_dup 1)))]
  {
+   operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
+   operands[1] = simplify_gen_subreg (TImode, operands[1], V2DFmode, 0);
+   /* Operand2 should match operand0, as the opposite case is handled above.  */
+   if (REGNO (operands[2]) != REGNO (operands[0]))
+     abort ();
    /* Avoid possible reformating on the operands.  */
    if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
!     emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
  })
  
  


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]