This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

RE: [PATCH] Fix PR 61225


> -----Original Message-----
> From: Jeff Law [mailto:law@redhat.com]
> Sent: Tuesday, December 09, 2014 5:29 AM
> To: Zhenqiang Chen
> Cc: Steven Bosscher; gcc-patches@gcc.gnu.org; Jakub Jelinek
> Subject: Re: [PATCH] Fix PR 61225
> 
> On 12/04/14 01:43, Zhenqiang Chen wrote:
> >>> > >
> >>> > >          Part of PR rtl-optimization/61225
> >>> > >          * combine.c (refer_same_reg_p): New function.
> >>> > >          (combine_instructions): Handle I1 -> I2 -> I3; I2 ->
insn.
> >>> > >          (try_combine): Add one more parameter TO_COMBINED_INSN,
> >>> > > which
> >> >is
> >>> > >          used to create a new insn parallel (TO_COMBINED_INSN,
I3).
> >>> > >
> >>> > >testsuite/ChangeLog:
> >>> > >2014-08-04  Zhenqiang Chen<zhenqiang.chen@linaro.org>
> >>> > >
> >>> > >          * gcc.target/i386/pr61225.c: New test.
> THanks for the updates and clarifications.  Just a few minor things and
while
> it's a bit of a hack, I'll approve:
> 
> 
> 
> > +
> > +/* A is a compare (reg1, 0) and B is SINGLE_SET which SET_SRC is reg2.
> > +   It returns TRUE, if reg1 == reg2, and no other refer of reg1
> > +   except A and B.  */
> > +
> > +static bool
> > +refer_same_reg_p (rtx_insn *a, rtx_insn *b)
> > +{
> > +  rtx seta = single_set (a);
> > +  rtx setb = single_set (b);
> > +
> > +  if (BLOCK_FOR_INSN (a) != BLOCK_FOR_INSN (b)
> > +      || !seta
> > +      || !setb)
> > +    return false;
> > 
> > +  if (GET_CODE (SET_SRC (seta)) != COMPARE
> > +      || GET_MODE_CLASS (GET_MODE (SET_DEST (seta))) != MODE_CC
> > +      || !REG_P (XEXP (SET_SRC (seta), 0))
> > +      || XEXP (SET_SRC (seta), 1) != const0_rtx
> > +      || !REG_P (SET_SRC (setb))
> > +      || REGNO (SET_SRC (setb)) != REGNO (XEXP (SET_SRC (seta), 0)))
> > +    return false;
> Do you need to verify SETA and SETB satisfy single_set?  Or has that
> already been done elsewhere?

A is NEXT_INSN (insn)
B is prev_nonnote_nondebug_insn (insn),

For I1 -> I2 -> B; I2 -> A;
LOG_LINK can make sure I1 and I2 are single_set, but not A and B. And I did
found codes in function try_combine, which can make sure B (or I3) is
single_set. 

So I think the check can skip failed cases at early stage.
 
> The name refer_same_reg_p seems wrong -- your function is verifying the
> underlying RTL store as well as the existence of a a dependency between
> the insns.  Can you try to come up with a better name?

Change it to can_reuse_cc_set_p.

> Please use CONST0_RTX (mode)  IIRC that'll allow this to work regardless
> of the size of the modes relative to the host word size.
 
Updated. 
 
> > +
> > +  if (DF_REG_USE_COUNT (REGNO (SET_SRC (setb))) > 2)
> > +    {
> > +      df_ref use;
> > +      rtx insn;
> > +      unsigned int i = REGNO (SET_SRC (setb));
> > +
> > +      for (use = DF_REG_USE_CHAIN (i); use; use = DF_REF_NEXT_REG
(use))
> > +        {
> > +	  insn = DF_REF_INSN (use);
> > +	  if (insn != a && insn != b && !(NOTE_P (insn) || DEBUG_INSN_P
> (insn)))
> > +	    return false;
> > +	}
> > +    }
> > +
> > +  return true;
> > +}
> Is this fragment really needed?  Does it ever trigger?  I'd think that
> for > 2 uses punting would be fine.  Do we really commonly have cases
> with > 2 uses, but where they're all in SETA and SETB?

The check is to make sure the correctness.  Here is a case,

int 
f1 (int *x)
{
  int t = --*x;
  if (!t)
    foo (x);
  return t;
}

  _4 = *x_3(D);
  _5 = _4 + -1;
  *x_3(D) = _5;
  # DEBUG t => _5
  if (_5 == 0)
   ...
  <bb 4>:
  return _5;

"_5" is used in "return _5". So we can not remove "_5 = _4 + -1".
 
> >   		  }
> >   	      }
> >
> > +	  /* Try to combine a compare insn that sets CC
> > +	     with a preceding insn that can set CC, and maybe with its
> > +	     logical predecessor as well.
> > +	     We need this special code because data flow connections
> > +	     do not get entered in LOG_LINKS.  */
> > +	  if ((prev = prev_nonnote_nondebug_insn (insn)) != NULL_RTX
> > +	      && refer_same_reg_p (insn, prev)
> > +	      && max_combine >= 4)
> > +	    {
> > +		struct insn_link *next1;
> > +		FOR_EACH_LOG_LINK (next1, prev)
> > +		  {
> > +		    rtx_insn *link1 = next1->insn;
> > +		    if (NOTE_P (link1))
> > +		      continue;
> > +		    /* I1 -> I2 -> I3; I2 -> insn;
> > +		       output parallel (insn, I3).  */
> > +		    FOR_EACH_LOG_LINK (nextlinks, link1)
> > +		      if ((next = try_combine (prev, link1,
> > +					       nextlinks->insn, NULL,
> > +					       &new_direct_jump_p,
> > +					       last_combined_insn, insn)) !=
0)
> > +
> > +			{
> > +			  delete_insn (insn);
> > +			  insn = next;
> > +			  statistics_counter_event (cfun, "four-insn
combine",
> 1);
> > +			  goto retry;
> > +			}
> > +		    /* I2 -> I3; I2 -> insn
> > +		       output next = parallel (insn, I3).  */
> > +		    if ((next = try_combine (prev, link1,
> > +					     NULL, NULL,
> > +					     &new_direct_jump_p,
> > +					     last_combined_insn, insn)) !=
0)
> > +
> > +		      {
> > +			delete_insn (insn);
> > +			insn = next;
> > +			statistics_counter_event (cfun, "three-insn
combine",
> 1);
> > +			goto retry;
> > +		      }
> > +		  }
> > +	    }
> So you've got two new combine cases here, but I think the testcase only
> tests one of them.  Can you include a testcase for both of hte major
> paths above (I1->I2->I3; I2->insn and I2->I3; I2->INSN)

pr61225.c is the case to cover I1->I2->I3; I2->insn.

For I2 -> I3; I2 -> insn, I tried my test cases and found peephole2 can also
handle them. So I removed the code from the patch.

Here is the final patch.
Bootstrap and no make check regression on X86-64.

ChangeLog:
2014-11-09  Zhenqiang Chen  <zhenqiang.chen@linaro.org>

	Part of PR rtl-optimization/61225
	* combine.c (can_reuse_cc_set_p): New function.
	(combine_instructions): Handle I1 -> I2 -> I3; I2 -> insn.
	(try_combine): Add one more parameter TO_COMBINED_INSN, which
	is used to create a new insn parallel (TO_COMBINED_INSN, I3).

testsuite/ChangeLog:
2014-11-09  Zhenqiang Chen<zhenqiang.chen@linaro.org>

	* gcc.target/i386/pr61225.c: New test.

diff --git a/gcc/combine.c b/gcc/combine.c
index e6deb41..7360ca3 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -431,7 +431,7 @@ static int can_combine_p (rtx_insn *, rtx_insn *,
rtx_insn *, rtx_insn *,
 static int combinable_i3pat (rtx_insn *, rtx *, rtx, rtx, rtx, int, int,
rtx *);
 static int contains_muldiv (rtx);
 static rtx_insn *try_combine (rtx_insn *, rtx_insn *, rtx_insn *, rtx_insn
*,
-			      int *, rtx_insn *);
+			      int *, rtx_insn *, rtx_insn *);
 static void undo_all (void);
 static void undo_commit (void);
 static rtx *find_split_point (rtx *, rtx_insn *, bool);
@@ -1120,6 +1120,46 @@ insn_a_feeds_b (rtx_insn *a, rtx_insn *b)
 #endif
   return false;
 }
+
+/* A is a compare (reg1, 0) and B is SINGLE_SET which SET_SRC is reg2.
+   It returns TRUE, if reg1 == reg2, and no other refer of reg1
+   except A and B.  */
+
+static bool
+can_reuse_cc_set_p (rtx_insn *a, rtx_insn *b)
+{
+  rtx seta = single_set (a);
+  rtx setb = single_set (b);
+
+  if (BLOCK_FOR_INSN (a) != BLOCK_FOR_INSN (b)
+      || !seta
+      || !setb)
+    return false;
+
+  if (GET_CODE (SET_SRC (seta)) != COMPARE
+      || GET_MODE_CLASS (GET_MODE (SET_DEST (seta))) != MODE_CC
+      || !REG_P (XEXP (SET_SRC (seta), 0))
+      || XEXP (SET_SRC (seta), 1) != CONST0_RTX (GET_MODE (SET_SRC (seta)))
+      || !REG_P (SET_SRC (setb))
+      || REGNO (SET_SRC (setb)) != REGNO (XEXP (SET_SRC (seta), 0)))
+    return false;
+
+  if (DF_REG_USE_COUNT (REGNO (SET_SRC (setb))) > 2)
+    {
+      df_ref use;
+      rtx insn;
+      unsigned int i = REGNO (SET_SRC (setb));
+
+      for (use = DF_REG_USE_CHAIN (i); use; use = DF_REF_NEXT_REG (use))
+        {
+	  insn = DF_REF_INSN (use);
+	  if (insn != a && insn != b && !(NOTE_P (insn) || DEBUG_INSN_P
(insn)))
+	    return false;
+	}
+    }
+  return true;
+}
+
 

 /* Main entry point for combiner.  F is the first insn of the function.
    NREGS is the first unused pseudo-reg number.
@@ -1129,10 +1169,7 @@ insn_a_feeds_b (rtx_insn *a, rtx_insn *b)
 static int
 combine_instructions (rtx_insn *f, unsigned int nregs)
 {
-  rtx_insn *insn, *next;
-#ifdef HAVE_cc0
-  rtx_insn *prev;
-#endif
+  rtx_insn *insn, *next, *prev;
   struct insn_link *links, *nextlinks;
   rtx_insn *first;
   basic_block last_bb;
@@ -1279,7 +1316,7 @@ combine_instructions (rtx_insn *f, unsigned int nregs)
 	  FOR_EACH_LOG_LINK (links, insn)
 	    if ((next = try_combine (insn, links->insn, NULL,
 				     NULL, &new_direct_jump_p,
-				     last_combined_insn)) != 0)
+				     last_combined_insn, NULL)) != 0)
 	      {
 		statistics_counter_event (cfun, "two-insn combine", 1);
 		goto retry;
@@ -1300,7 +1337,7 @@ combine_instructions (rtx_insn *f, unsigned int nregs)
 		FOR_EACH_LOG_LINK (nextlinks, link)
 		  if ((next = try_combine (insn, link, nextlinks->insn,
 					   NULL, &new_direct_jump_p,
-					   last_combined_insn)) != 0)
+					   last_combined_insn, NULL)) != 0)
 		    {
 		      statistics_counter_event (cfun, "three-insn combine",
1);
 		      goto retry;
@@ -1322,13 +1359,13 @@ combine_instructions (rtx_insn *f, unsigned int
nregs)
 	    {
 	      if ((next = try_combine (insn, prev, NULL, NULL,
 				       &new_direct_jump_p,
-				       last_combined_insn)) != 0)
+				       last_combined_insn, NULL)) != 0)
 		goto retry;
 
 	      FOR_EACH_LOG_LINK (nextlinks, prev)
 		  if ((next = try_combine (insn, prev, nextlinks->insn,
 					   NULL, &new_direct_jump_p,
-					   last_combined_insn)) != 0)
+					   last_combined_insn, NULL)) != 0)
 		    goto retry;
 	    }
 
@@ -1342,13 +1379,13 @@ combine_instructions (rtx_insn *f, unsigned int
nregs)
 	    {
 	      if ((next = try_combine (insn, prev, NULL, NULL,
 				       &new_direct_jump_p,
-				       last_combined_insn)) != 0)
+				       last_combined_insn, NULL)) != 0)
 		goto retry;
 
 	      FOR_EACH_LOG_LINK (nextlinks, prev)
 		  if ((next = try_combine (insn, prev, nextlinks->insn,
 					   NULL, &new_direct_jump_p,
-					   last_combined_insn)) != 0)
+					   last_combined_insn, NULL)) != 0)
 		    goto retry;
 	    }
 
@@ -1364,7 +1401,7 @@ combine_instructions (rtx_insn *f, unsigned int nregs)
 		  && sets_cc0_p (PATTERN (prev))
 		  && (next = try_combine (insn, links->insn,
 					  prev, NULL, &new_direct_jump_p,
-					  last_combined_insn)) != 0)
+					  last_combined_insn, NULL)) != 0)
 		goto retry;
 #endif
 
@@ -1377,7 +1414,7 @@ combine_instructions (rtx_insn *f, unsigned int nregs)
 		if ((next = try_combine (insn, links->insn,
 					 nextlinks->insn, NULL,
 					 &new_direct_jump_p,
-					 last_combined_insn)) != 0)
+					 last_combined_insn, NULL)) != 0)
 
 		  {
 		    statistics_counter_event (cfun, "three-insn combine",
1);
@@ -1406,7 +1443,7 @@ combine_instructions (rtx_insn *f, unsigned int nregs)
 		      if ((next = try_combine (insn, link, link1,
 					       nextlinks->insn,
 					       &new_direct_jump_p,
-					       last_combined_insn)) != 0)
+					       last_combined_insn, NULL)) !=
0)
 			{
 			  statistics_counter_event (cfun, "four-insn
combine", 1);
 			  goto retry;
@@ -1417,7 +1454,7 @@ combine_instructions (rtx_insn *f, unsigned int nregs)
 		      if ((next = try_combine (insn, link, link1,
 					       nextlinks->insn,
 					       &new_direct_jump_p,
-					       last_combined_insn)) != 0)
+					       last_combined_insn, NULL)) !=
0)
 			{
 			  statistics_counter_event (cfun, "four-insn
combine", 1);
 			  goto retry;
@@ -1434,7 +1471,7 @@ combine_instructions (rtx_insn *f, unsigned int nregs)
 		      if ((next = try_combine (insn, link, link1,
 					       nextlinks->insn,
 					       &new_direct_jump_p,
-					       last_combined_insn)) != 0)
+					       last_combined_insn, NULL)) !=
0)
 			{
 			  statistics_counter_event (cfun, "four-insn
combine", 1);
 			  goto retry;
@@ -1444,7 +1481,7 @@ combine_instructions (rtx_insn *f, unsigned int nregs)
 		      if ((next = try_combine (insn, link, link1,
 					       nextlinks->insn,
 					       &new_direct_jump_p,
-					       last_combined_insn)) != 0)
+					       last_combined_insn, NULL)) !=
0)
 			{
 			  statistics_counter_event (cfun, "four-insn
combine", 1);
 			  goto retry;
@@ -1452,6 +1489,37 @@ combine_instructions (rtx_insn *f, unsigned int
nregs)
 		  }
 	      }
 
+	  /* Try to combine a compare insn that sets CC
+	     with a preceding insn that can set CC, and maybe with its
+	     logical predecessor as well.
+	     We need this special code because data flow connections
+	     do not get entered in LOG_LINKS.  */
+	  if ((prev = prev_nonnote_nondebug_insn (insn)) != NULL_RTX
+	      && can_reuse_cc_set_p (insn, prev)
+	      && max_combine >= 4)
+	    {
+		struct insn_link *next1;
+		FOR_EACH_LOG_LINK (next1, prev)
+		  {
+		    rtx_insn *link1 = next1->insn;
+		    if (NOTE_P (link1))
+		      continue;
+		    /* I1 -> I2 -> I3; I2 -> insn;
+		       output parallel (insn, I3).  */
+		    FOR_EACH_LOG_LINK (nextlinks, link1)
+		      if ((next = try_combine (prev, link1,
+					       nextlinks->insn, NULL,
+					       &new_direct_jump_p,
+					       last_combined_insn, insn)) !=
0)
+
+			{
+			  delete_insn (insn);
+			  insn = next;
+			  statistics_counter_event (cfun, "four-insn
combine", 1);
+			  goto retry;
+			}
+		  }
+	    }
 	  /* Try this insn with each REG_EQUAL note it links back to.  */
 	  FOR_EACH_LOG_LINK (links, insn)
 	    {
@@ -1477,7 +1545,7 @@ combine_instructions (rtx_insn *f, unsigned int nregs)
 		  i2mod_new_rhs = copy_rtx (note);
 		  next = try_combine (insn, i2mod, NULL, NULL,
 				      &new_direct_jump_p,
-				      last_combined_insn);
+				      last_combined_insn, NULL);
 		  i2mod = NULL;
 		  if (next)
 		    {
@@ -2533,11 +2601,15 @@ can_split_parallel_of_n_reg_sets (rtx_insn *insn,
int n)
 
    LAST_COMBINED_INSN is either I3, or some insn after I3 that has
    been I3 passed to an earlier try_combine within the same basic
-   block.  */
+   block.
+
+   TO_COMBINED_INSN is an insn after I3 that sets CC flags.  It is used to
+   combine with I3 to make a new insn.  */
 
 static rtx_insn *
 try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
-	     int *new_direct_jump_p, rtx_insn *last_combined_insn)
+	     int *new_direct_jump_p, rtx_insn *last_combined_insn,
+	     rtx_insn *to_combined_insn)
 {
   /* New patterns for I3 and I2, respectively.  */
   rtx newpat, newi2pat = 0;
@@ -2630,6 +2702,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1,
rtx_insn *i0,
       || cant_combine_insn_p (i2)
       || (i1 && cant_combine_insn_p (i1))
       || (i0 && cant_combine_insn_p (i0))
+      || (to_combined_insn && cant_combine_insn_p (to_combined_insn))
       || likely_spilled_retval_p (i3))
     return 0;
 
@@ -3323,7 +3396,11 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn
*i1, rtx_insn *i0,
 	  rtx old = newpat;
 	  total_sets = 1 + extra_sets;
 	  newpat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_sets));
-	  XVECEXP (newpat, 0, 0) = old;
+
+	  if (to_combined_insn)
+	    XVECEXP (newpat, 0, --total_sets) = old;
+	  else
+	    XVECEXP (newpat, 0, 0) = old;
 	}
 
       if (added_sets_0)
@@ -3346,6 +3423,21 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn
*i1, rtx_insn *i0,
 	  if ((i0_feeds_i1_n && i1_feeds_i2_n) || i0_feeds_i2_n)
 	    t = subst (t, i0dest, i0src_copy2 ? i0src_copy2 : i0src, 0, 0,
0);
 
+	  if (to_combined_insn)
+	    {
+	      rtx todest = NULL_RTX, tosrc = NULL_RTX;
+	      if (can_combine_p (i2, to_combined_insn, NULL, NULL,
+				 i3, NULL, &todest, &tosrc))
+		{
+		  rtx pat = copy_rtx (PATTERN (to_combined_insn));
+		  t = subst (pat, todest, tosrc, 0, 0, 0);
+		}
+	      else
+		{
+		  undo_all ();
+		  return 0;
+		}
+	    }
 	  XVECEXP (newpat, 0, --total_sets) = t;
 	}
     }
diff --git a/gcc/testsuite/gcc.target/i386/pr61225.c
b/gcc/testsuite/gcc.target/i386/pr61225.c
new file mode 100644
index 0000000..9c08102
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr61225.c
@@ -0,0 +1,16 @@
+/* PR rtl-optimization/61225 */
+/* { dg-do compile } */
+/* { dg-options "-Os -fdump-rtl-combine-details" } */
+
+void foo (void *);
+
+int *
+f1 (int *x)
+{
+  if (!--*x)
+    foo (x);
+  return x;
+}
+
+/* { dg-final { scan-rtl-dump "Successfully matched this instruction"
"combine" } } */
+/* { dg-final { cleanup-rtl-dump "combine" } } */





Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]