This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] x86 peephole2s to optimize "1LL << x"


On Sat, Sep 11, 2004 at 12:04:44PM -0600, Roger Sayle wrote:
> foo:	movl    4(%esp), %ecx
>         movl    $1, %eax
>         xorl    %edx, %edx
>         shldl   %eax, %edx   <---
>         sall    %cl, %eax

Ah, I see what you're going for here.  I don't particularly like
the way you're using peephole2 to get it though.

What about something like this?  In particular:

  (1) Don't rely on various instruction orderings in order to 
      perform this optimization.  Do it at split time.

  (2) Don't unnecessarily increase spills with CMOVE.

  (3) If shifts are not slow, implement 1LL << C with

        xorl    %eax, %eax
        xorl    %edx, %edx
        testb   $32, %cl
        sete    %al
        setne   %dl
        sall    %cl, %edx
        sall    %cl, %eax

      Which, in addition to everything else, is one byte smaller.

  (4) If shifts are slow and cmove is enabled, implement 1LL << C with

        xorl    %eax, %eax
        xorl    %edx, %edx
        movl    $1, %ebx
        sall    %cl, %ebx
        testb   $32, %cl
        cmove   %ebx, %eax
        cmovne  %ebx, %edx

      instead of

        movl    $1, %eax
        xorl    %edx, %edx
        xorl    %ebx, %ebx
        sall    %cl, %eax
        testb   $32, %cl
        cmovne  %eax, %edx
        cmovne  %ebx, %eax

      Which could conceivably run a fraction faster, due to the second
      cmove instruction not being anti-dependent on the first.  Perhaps
      that's wishful thinking, and this new code should be removed.


r~




Index: i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.724
diff -c -p -d -r1.724 i386.c
*** i386.c	10 Sep 2004 02:32:19 -0000	1.724
--- i386.c	11 Sep 2004 22:18:28 -0000
*************** ix86_split_ashldi (rtx *operands, rtx sc
*** 10019,10024 ****
--- 10019,10096 ----
  	  emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
  	  ix86_expand_ashlsi3_const (low[0], count);
  	}
+       return;
+     }
+ 
+   split_di (operands, 1, low, high);
+ 
+   if (operands[1] == const1_rtx)
+     {
+       /* Assuming we've chosen a QImode capable registers, and the
+ 	 shifter isn't broken (hi pentium4), then 1LL << N can be done
+ 	 with two 32-bit shifts, no branches, no cmoves.  */
+       if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0])
+ 	  && ix86_cost->shift_var < 3*ix86_cost->add)
+ 	{
+ 	  rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
+ 
+ 	  ix86_expand_clear (low[0]);
+ 	  ix86_expand_clear (high[0]);
+ 	  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
+ 	  
+ 	  d = gen_lowpart (QImode, low[0]);
+ 	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
+ 	  s = gen_rtx_EQ (QImode, flags, const0_rtx);
+ 	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
+ 
+ 	  d = gen_lowpart (QImode, high[0]);
+ 	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
+ 	  s = gen_rtx_NE (QImode, flags, const0_rtx);
+ 	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
+ 
+ 	  emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
+ 	  emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
+ 	  return;
+ 	}
+ 
+       /* Failing that, perform one shift into a scratch register and
+ 	 use cmove to get it into the appropriate destination.  */
+       if (TARGET_CMOVE && scratch)
+ 	{
+ 	  rtx x, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
+ 
+ 	  emit_move_insn (scratch, const1_rtx);
+ 	  ix86_expand_clear (low[0]);
+ 	  ix86_expand_clear (high[0]);
+ 	  emit_insn (gen_ashlsi3 (scratch, scratch, operands[2]));
+ 	  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
+ 
+ 	  x = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
+ 	  x = gen_rtx_IF_THEN_ELSE (SImode, x, scratch, low[0]);
+ 	  x = gen_rtx_SET (VOIDmode, low[0], x);
+ 	  emit_insn (x);
+ 
+ 	  x = gen_rtx_NE (VOIDmode, flags, const0_rtx);
+ 	  x = gen_rtx_IF_THEN_ELSE (SImode, x, scratch, high[0]);
+ 	  x = gen_rtx_SET (VOIDmode, high[0], x);
+ 	  emit_insn (x);
+ 	  return;
+ 	}
+ 
+       /* Failing that, we can still avoid the shld instruction, because
+ 	 we know that we're shifting 0...31 zeros into a zero.  */
+       emit_move_insn (low[0], const1_rtx);
+       ix86_expand_clear (high[0]);
+     }
+   else if (operands[1] == constm1_rtx)
+     {
+       /* For -1LL << N, we can avoid the shld instruction, because we
+ 	 know that we're shifting 0...31 ones into a -1.  */
+       emit_move_insn (low[0], constm1_rtx);
+       if (optimize_size)
+         emit_move_insn (high[0], low[0]);
+       else
+ 	emit_move_insn (high[0], constm1_rtx);
      }
    else
      {
*************** ix86_split_ashldi (rtx *operands, rtx sc
*** 10026,10048 ****
  	emit_move_insn (operands[0], operands[1]);
  
        split_di (operands, 1, low, high);
- 
        emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
!       emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
  
!       if (TARGET_CMOVE && (! no_new_pseudos || scratch))
! 	{
! 	  if (! no_new_pseudos)
! 	    scratch = force_reg (SImode, const0_rtx);
! 	  else
! 	    emit_move_insn (scratch, const0_rtx);
  
! 	  emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
! 					  scratch));
! 	}
!       else
! 	emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
      }
  }
  
  void
--- 10098,10115 ----
  	emit_move_insn (operands[0], operands[1]);
  
        split_di (operands, 1, low, high);
        emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
!     }
  
!   emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
  
!   if (TARGET_CMOVE && scratch)
!     {
!       ix86_expand_clear (scratch);
!       emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
      }
+   else
+     emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
  }
  
  void
*************** ix86_split_ashrdi (rtx *operands, rtx sc
*** 10066,10080 ****
        else if (count >= 32)
  	{
  	  emit_move_insn (low[0], high[1]);
! 
! 	  if (! reload_completed)
! 	    emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
! 	  else
! 	    {
! 	      emit_move_insn (high[0], low[0]);
! 	      emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
! 	    }
! 
  	  if (count > 32)
  	    emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
  	}
--- 10133,10140 ----
        else if (count >= 32)
  	{
  	  emit_move_insn (low[0], high[1]);
! 	  emit_move_insn (high[0], low[0]);
! 	  emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
  	  if (count > 32)
  	    emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
  	}
*************** ix86_split_ashrdi (rtx *operands, rtx sc
*** 10096,10105 ****
        emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
        emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
  
!       if (TARGET_CMOVE && (! no_new_pseudos || scratch))
  	{
- 	  if (! no_new_pseudos)
- 	    scratch = gen_reg_rtx (SImode);
  	  emit_move_insn (scratch, high[0]);
  	  emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
  	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
--- 10156,10163 ----
        emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
        emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
  
!       if (TARGET_CMOVE && scratch)
  	{
  	  emit_move_insn (scratch, high[0]);
  	  emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
  	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
*************** ix86_split_lshrdi (rtx *operands, rtx sc
*** 10124,10130 ****
        if (count >= 32)
  	{
  	  emit_move_insn (low[0], high[1]);
! 	  emit_move_insn (high[0], const0_rtx);
  
  	  if (count > 32)
  	    emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
--- 10182,10188 ----
        if (count >= 32)
  	{
  	  emit_move_insn (low[0], high[1]);
! 	  ix86_expand_clear (high[0]);
  
  	  if (count > 32)
  	    emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
*************** ix86_split_lshrdi (rtx *operands, rtx sc
*** 10148,10160 ****
        emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
  
        /* Heh.  By reversing the arguments, we can reuse this pattern.  */
!       if (TARGET_CMOVE && (! no_new_pseudos || scratch))
  	{
! 	  if (! no_new_pseudos)
! 	    scratch = force_reg (SImode, const0_rtx);
! 	  else
! 	    emit_move_insn (scratch, const0_rtx);
! 
  	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
  					  scratch));
  	}
--- 10206,10214 ----
        emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
  
        /* Heh.  By reversing the arguments, we can reuse this pattern.  */
!       if (TARGET_CMOVE && scratch)
  	{
! 	  ix86_expand_clear (scratch);
  	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
  					  scratch));
  	}
Index: i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.558
diff -c -p -d -r1.558 i386.md
*** i386.md	8 Sep 2004 05:08:26 -0000	1.558
--- i386.md	11 Sep 2004 22:18:29 -0000
***************
*** 1906,1912 ****
  (define_split
    [(set (match_operand:DI 0 "push_operand" "")
          (match_operand:DI 1 "immediate_operand" ""))]
!   "TARGET_64BIT && (flow2_completed || (reload_completed && !flag_peephole2))
     && !symbolic_operand (operands[1], DImode)
     && !x86_64_immediate_operand (operands[1], DImode)"
    [(set (match_dup 0) (match_dup 1))
--- 1906,1912 ----
  (define_split
    [(set (match_operand:DI 0 "push_operand" "")
          (match_operand:DI 1 "immediate_operand" ""))]
!   "TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed)
     && !symbolic_operand (operands[1], DImode)
     && !x86_64_immediate_operand (operands[1], DImode)"
    [(set (match_dup 0) (match_dup 1))
***************
*** 2168,2174 ****
  (define_split
    [(set (match_operand:DI 0 "memory_operand" "")
          (match_operand:DI 1 "immediate_operand" ""))]
!   "TARGET_64BIT && (flow2_completed || (reload_completed && !flag_peephole2))
     && !symbolic_operand (operands[1], DImode)
     && !x86_64_immediate_operand (operands[1], DImode)"
    [(set (match_dup 2) (match_dup 3))
--- 2168,2174 ----
  (define_split
    [(set (match_operand:DI 0 "memory_operand" "")
          (match_operand:DI 1 "immediate_operand" ""))]
!   "TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed)
     && !symbolic_operand (operands[1], DImode)
     && !x86_64_immediate_operand (operands[1], DImode)"
    [(set (match_dup 2) (match_dup 3))
***************
*** 7924,7930 ****
    "")
  
  (define_insn "*testqi_1"
!   [(set (reg 17)
          (compare (and:QI (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm,r")
  			 (match_operand:QI 1 "general_operand" "n,n,qn,n"))
  		 (const_int 0)))]
--- 7924,7930 ----
    "")
  
  (define_insn "*testqi_1"
!   [(set (reg FLAGS_REG)
          (compare (and:QI (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm,r")
  			 (match_operand:QI 1 "general_operand" "n,n,qn,n"))
  		 (const_int 0)))]
***************
*** 10623,10642 ****
  ;; than 31.
  
  (define_expand "ashldi3"
!   [(parallel [(set (match_operand:DI 0 "shiftdi_operand" "")
! 		   (ashift:DI (match_operand:DI 1 "shiftdi_operand" "")
! 			      (match_operand:QI 2 "nonmemory_operand" "")))
! 	      (clobber (reg:CC FLAGS_REG))])]
    ""
! {
!   if (!TARGET_64BIT && TARGET_CMOVE && ! immediate_operand (operands[2], QImode))
!     {
!       emit_insn (gen_ashldi3_1 (operands[0], operands[1], operands[2]));
!       DONE;
!     }
!   ix86_expand_binary_operator (ASHIFT, DImode, operands);
!   DONE;
! })
  
  (define_insn "*ashldi3_1_rex64"
    [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
--- 10623,10633 ----
  ;; than 31.
  
  (define_expand "ashldi3"
!   [(set (match_operand:DI 0 "shiftdi_operand" "")
! 	(ashift:DI (match_operand:DI 1 "ashldi_input_operand" "")
! 		   (match_operand:QI 2 "nonmemory_operand" "")))]
    ""
!   "ix86_expand_binary_operator (ASHIFT, DImode, operands); DONE;")
  
  (define_insn "*ashldi3_1_rex64"
    [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
***************
*** 10738,10778 ****
  	   (const_string "ishift")))
     (set_attr "mode" "DI")])
  
! (define_insn "ashldi3_1"
!   [(set (match_operand:DI 0 "register_operand" "=r")
! 	(ashift:DI (match_operand:DI 1 "register_operand" "0")
! 		   (match_operand:QI 2 "nonmemory_operand" "Jc")))
!    (clobber (match_scratch:SI 3 "=&r"))
!    (clobber (reg:CC FLAGS_REG))]
!   "!TARGET_64BIT && TARGET_CMOVE"
!   "#"
!   [(set_attr "type" "multi")])
! 
! (define_insn "*ashldi3_2"
!   [(set (match_operand:DI 0 "register_operand" "=r")
! 	(ashift:DI (match_operand:DI 1 "register_operand" "0")
! 		   (match_operand:QI 2 "nonmemory_operand" "Jc")))
     (clobber (reg:CC FLAGS_REG))]
    "!TARGET_64BIT"
    "#"
    [(set_attr "type" "multi")])
  
! (define_split
!   [(set (match_operand:DI 0 "register_operand" "")
! 	(ashift:DI (match_operand:DI 1 "register_operand" "")
! 		   (match_operand:QI 2 "nonmemory_operand" "")))
!    (clobber (match_scratch:SI 3 ""))
!    (clobber (reg:CC FLAGS_REG))]
!   "!TARGET_64BIT && TARGET_CMOVE && reload_completed"
    [(const_int 0)]
    "ix86_split_ashldi (operands, operands[3]); DONE;")
  
  (define_split
    [(set (match_operand:DI 0 "register_operand" "")
! 	(ashift:DI (match_operand:DI 1 "register_operand" "")
  		   (match_operand:QI 2 "nonmemory_operand" "")))
     (clobber (reg:CC FLAGS_REG))]
!   "!TARGET_64BIT && reload_completed"
    [(const_int 0)]
    "ix86_split_ashldi (operands, NULL_RTX); DONE;")
  
--- 10729,10763 ----
  	   (const_string "ishift")))
     (set_attr "mode" "DI")])
  
! (define_insn "*ashldi3_1"
!   [(set (match_operand:DI 0 "register_operand" "=r,r")
! 	(ashift:DI (match_operand:DI 1 "reg_or_pm1_operand" "n,0")
! 		   (match_operand:QI 2 "nonmemory_operand" "Jc,Jc")))
     (clobber (reg:CC FLAGS_REG))]
    "!TARGET_64BIT"
    "#"
    [(set_attr "type" "multi")])
  
! ;; By default we don't ask for a scratch register, because when DImode
! ;; values are manipulated, registers are already at a premium.  But if
! ;; we have one handy, we won't turn it away.
! (define_peephole2
!   [(match_scratch:SI 3 "r")
!    (parallel [(set (match_operand:DI 0 "register_operand" "")
! 		   (ashift:DI (match_operand:DI 1 "nonmemory_operand" "")
! 			      (match_operand:QI 2 "nonmemory_operand" "")))
! 	      (clobber (reg:CC FLAGS_REG))])
!    (match_dup 3)]
!   "!TARGET_64BIT && TARGET_CMOVE"
    [(const_int 0)]
    "ix86_split_ashldi (operands, operands[3]); DONE;")
  
  (define_split
    [(set (match_operand:DI 0 "register_operand" "")
! 	(ashift:DI (match_operand:DI 1 "nonmemory_operand" "")
  		   (match_operand:QI 2 "nonmemory_operand" "")))
     (clobber (reg:CC FLAGS_REG))]
!   "!TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed)"
    [(const_int 0)]
    "ix86_split_ashldi (operands, NULL_RTX); DONE;")
  
***************
*** 10829,10835 ****
    JUMP_LABEL (tmp) = label;
  
    emit_move_insn (operands[0], operands[1]);
!   emit_move_insn (operands[1], const0_rtx);
  
    emit_label (label);
    LABEL_NUSES (label) = 1;
--- 10814,10820 ----
    JUMP_LABEL (tmp) = label;
  
    emit_move_insn (operands[0], operands[1]);
!   ix86_expand_clear (operands[1]);
  
    emit_label (label);
    LABEL_NUSES (label) = 1;
***************
*** 11346,11372 ****
  ;; See comment above `ashldi3' about how this works.
  
  (define_expand "ashrdi3"
!   [(parallel [(set (match_operand:DI 0 "shiftdi_operand" "")
! 		   (ashiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
! 				(match_operand:QI 2 "nonmemory_operand" "")))
! 	      (clobber (reg:CC FLAGS_REG))])]
    ""
! {
!   if (!TARGET_64BIT && TARGET_CMOVE && ! immediate_operand (operands[2], QImode))
!     {
!       emit_insn (gen_ashrdi3_1 (operands[0], operands[1], operands[2]));
!       DONE;
!     }
!   ix86_expand_binary_operator (ASHIFTRT, DImode, operands);
!   DONE;
! })
  
! (define_insn "ashrdi3_63_rex64"
    [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm")
  	(ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0")
  		     (match_operand:DI 2 "const_int_operand" "i,i")))
     (clobber (reg:CC FLAGS_REG))]
!   "TARGET_64BIT && INTVAL (operands[2]) == 63 && (TARGET_USE_CLTD || optimize_size)
     && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
    "@
     {cqto|cqo}
--- 11331,11349 ----
  ;; See comment above `ashldi3' about how this works.
  
  (define_expand "ashrdi3"
!   [(set (match_operand:DI 0 "shiftdi_operand" "")
! 	(ashiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
! 		     (match_operand:QI 2 "nonmemory_operand" "")))]
    ""
!   "ix86_expand_binary_operator (ASHIFTRT, DImode, operands); DONE;")
  
! (define_insn "*ashrdi3_63_rex64"
    [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm")
  	(ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0")
  		     (match_operand:DI 2 "const_int_operand" "i,i")))
     (clobber (reg:CC FLAGS_REG))]
!   "TARGET_64BIT && INTVAL (operands[2]) == 63
!    && (TARGET_USE_CLTD || optimize_size)
     && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
    "@
     {cqto|cqo}
***************
*** 11441,11458 ****
    [(set_attr "type" "ishift")
     (set_attr "mode" "DI")])
  
! 
! (define_insn "ashrdi3_1"
!   [(set (match_operand:DI 0 "register_operand" "=r")
! 	(ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
! 		     (match_operand:QI 2 "nonmemory_operand" "Jc")))
!    (clobber (match_scratch:SI 3 "=&r"))
!    (clobber (reg:CC FLAGS_REG))]
!   "!TARGET_64BIT && TARGET_CMOVE"
!   "#"
!   [(set_attr "type" "multi")])
! 
! (define_insn "*ashrdi3_2"
    [(set (match_operand:DI 0 "register_operand" "=r")
  	(ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
  		     (match_operand:QI 2 "nonmemory_operand" "Jc")))
--- 11418,11424 ----
    [(set_attr "type" "ishift")
     (set_attr "mode" "DI")])
  
! (define_insn "*ashrdi3_1"
    [(set (match_operand:DI 0 "register_operand" "=r")
  	(ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
  		     (match_operand:QI 2 "nonmemory_operand" "Jc")))
***************
*** 11461,11473 ****
    "#"
    [(set_attr "type" "multi")])
  
! (define_split
!   [(set (match_operand:DI 0 "register_operand" "")
! 	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
! 		     (match_operand:QI 2 "nonmemory_operand" "")))
!    (clobber (match_scratch:SI 3 ""))
!    (clobber (reg:CC FLAGS_REG))]
!   "!TARGET_64BIT && TARGET_CMOVE && reload_completed"
    [(const_int 0)]
    "ix86_split_ashrdi (operands, operands[3]); DONE;")
  
--- 11427,11443 ----
    "#"
    [(set_attr "type" "multi")])
  
! ;; By default we don't ask for a scratch register, because when DImode
! ;; values are manipulated, registers are already at a premium.  But if
! ;; we have one handy, we won't turn it away.
! (define_peephole2
!   [(match_scratch:SI 3 "r")
!    (parallel [(set (match_operand:DI 0 "register_operand" "")
! 		   (ashiftrt:DI (match_operand:DI 1 "register_operand" "")
! 			        (match_operand:QI 2 "nonmemory_operand" "")))
! 	      (clobber (reg:CC FLAGS_REG))])
!    (match_dup 3)]
!   "!TARGET_64BIT && TARGET_CMOVE"
    [(const_int 0)]
    "ix86_split_ashrdi (operands, operands[3]); DONE;")
  
***************
*** 11476,11482 ****
  	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
  		     (match_operand:QI 2 "nonmemory_operand" "")))
     (clobber (reg:CC FLAGS_REG))]
!   "!TARGET_64BIT && reload_completed"
    [(const_int 0)]
    "ix86_split_ashrdi (operands, NULL_RTX); DONE;")
  
--- 11446,11452 ----
  	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
  		     (match_operand:QI 2 "nonmemory_operand" "")))
     (clobber (reg:CC FLAGS_REG))]
!   "!TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed)"
    [(const_int 0)]
    "ix86_split_ashrdi (operands, NULL_RTX); DONE;")
  
***************
*** 11858,11877 ****
  ;; See comment above `ashldi3' about how this works.
  
  (define_expand "lshrdi3"
!   [(parallel [(set (match_operand:DI 0 "shiftdi_operand" "")
! 		   (lshiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
! 			        (match_operand:QI 2 "nonmemory_operand" "")))
! 	      (clobber (reg:CC FLAGS_REG))])]
    ""
! {
!   if (!TARGET_64BIT && TARGET_CMOVE && ! immediate_operand (operands[2], QImode))
!     {
!       emit_insn (gen_lshrdi3_1 (operands[0], operands[1], operands[2]));
!       DONE;
!     }
!   ix86_expand_binary_operator (LSHIFTRT, DImode, operands);
!   DONE;
! })
  
  (define_insn "*lshrdi3_1_one_bit_rex64"
    [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
--- 11828,11838 ----
  ;; See comment above `ashldi3' about how this works.
  
  (define_expand "lshrdi3"
!   [(set (match_operand:DI 0 "shiftdi_operand" "")
! 	(lshiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
! 		     (match_operand:QI 2 "nonmemory_operand" "")))]
    ""
!   "ix86_expand_binary_operator (LSHIFTRT, DImode, operands); DONE;")
  
  (define_insn "*lshrdi3_1_one_bit_rex64"
    [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
***************
*** 11937,11953 ****
    [(set_attr "type" "ishift")
     (set_attr "mode" "DI")])
  
! (define_insn "lshrdi3_1"
!   [(set (match_operand:DI 0 "register_operand" "=r")
! 	(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
! 		     (match_operand:QI 2 "nonmemory_operand" "Jc")))
!    (clobber (match_scratch:SI 3 "=&r"))
!    (clobber (reg:CC FLAGS_REG))]
!   "!TARGET_64BIT && TARGET_CMOVE"
!   "#"
!   [(set_attr "type" "multi")])
! 
! (define_insn "*lshrdi3_2"
    [(set (match_operand:DI 0 "register_operand" "=r")
  	(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
  		     (match_operand:QI 2 "nonmemory_operand" "Jc")))
--- 11898,11904 ----
    [(set_attr "type" "ishift")
     (set_attr "mode" "DI")])
  
! (define_insn "*lshrdi3_1"
    [(set (match_operand:DI 0 "register_operand" "=r")
  	(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
  		     (match_operand:QI 2 "nonmemory_operand" "Jc")))
***************
*** 11956,11968 ****
    "#"
    [(set_attr "type" "multi")])
  
! (define_split 
!   [(set (match_operand:DI 0 "register_operand" "")
! 	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
! 		     (match_operand:QI 2 "nonmemory_operand" "")))
!    (clobber (match_scratch:SI 3 ""))
!    (clobber (reg:CC FLAGS_REG))]
!   "!TARGET_64BIT && TARGET_CMOVE && reload_completed"
    [(const_int 0)]
    "ix86_split_lshrdi (operands, operands[3]); DONE;")
  
--- 11907,11923 ----
    "#"
    [(set_attr "type" "multi")])
  
! ;; By default we don't ask for a scratch register, because when DImode
! ;; values are manipulated, registers are already at a premium.  But if
! ;; we have one handy, we won't turn it away.
! (define_peephole2
!   [(match_scratch:SI 3 "r")
!    (parallel [(set (match_operand:DI 0 "register_operand" "")
! 		   (lshiftrt:DI (match_operand:DI 1 "register_operand" "")
! 			        (match_operand:QI 2 "nonmemory_operand" "")))
! 	      (clobber (reg:CC FLAGS_REG))])
!    (match_dup 3)]
!   "!TARGET_64BIT && TARGET_CMOVE"
    [(const_int 0)]
    "ix86_split_lshrdi (operands, operands[3]); DONE;")
  
***************
*** 11971,11977 ****
  	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
  		     (match_operand:QI 2 "nonmemory_operand" "")))
     (clobber (reg:CC FLAGS_REG))]
!   "!TARGET_64BIT && reload_completed"
    [(const_int 0)]
    "ix86_split_lshrdi (operands, NULL_RTX); DONE;")
  
--- 11926,11932 ----
  	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
  		     (match_operand:QI 2 "nonmemory_operand" "")))
     (clobber (reg:CC FLAGS_REG))]
!   "!TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed)"
    [(const_int 0)]
    "ix86_split_lshrdi (operands, NULL_RTX); DONE;")
  
***************
*** 12840,12846 ****
    [(set_attr "type" "setcc")
     (set_attr "mode" "QI")])
  
! (define_insn "setcc_2"
    [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
  	(match_operator:QI 1 "ix86_comparison_operator"
  	  [(reg 17) (const_int 0)]))]
--- 12795,12801 ----
    [(set_attr "type" "setcc")
     (set_attr "mode" "QI")])
  
! (define_insn "*setcc_2"
    [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
  	(match_operator:QI 1 "ix86_comparison_operator"
  	  [(reg 17) (const_int 0)]))]
Index: predicates.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/predicates.md,v
retrieving revision 1.3
diff -c -p -d -r1.3 predicates.md
*** predicates.md	18 Aug 2004 17:05:07 -0000	1.3
--- predicates.md	11 Sep 2004 22:18:29 -0000
***************
*** 553,564 ****
--- 553,575 ----
    return op == const1_rtx || op == constm1_rtx;
  })
  
+ ;; True for registers, or 1 or -1.  Used to optimize double-word shifts.
+ (define_predicate "reg_or_pm1_operand"
+   (ior (match_operand 0 "register_operand")
+        (and (match_code "const_int")
+ 	    (match_test "op == const1_rtx || op == constm1_rtx"))))
+ 
  ;; True if OP is acceptable as operand of DImode shift expander.
  (define_predicate "shiftdi_operand"
    (if_then_else (match_test "TARGET_64BIT")
      (match_operand 0 "nonimmediate_operand")
      (match_operand 0 "register_operand")))
  
+ (define_predicate "ashldi_input_operand"
+   (if_then_else (match_test "TARGET_64BIT")
+     (match_operand 0 "nonimmediate_operand")
+     (match_operand 0 "reg_or_pm1_operand")))
+ 
  ;; Return true if OP is a vector load from the constant pool with just
  ;; the first element non-zero.
  (define_predicate "zero_extended_scalar_load_operand"


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]