This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][compare-elim] Fix PR rtl-optimization/82597


On Tue, Oct 31, 2017 at 10:35:59AM +0100, Jakub Jelinek wrote:
> > 2017-10-18  Michael Collison  <michael.collison@arm.com>
> > 
> > 	PR rtl-optimization/82597
> > 	* compare-elim.c: (try_validate_parallel): Constrain operands
> > 	of recognized insn.
> 
> That just duplicates more of insn_invalid_p.
> I wonder if we don't want to do something like the following instead
> (untested so far).  The insn_uid decrement and ggc_free can be left out if
> deemed unnecessary, I don't have an idea how many try_validate_parallel
> calls fail in real-world.
> 
> More importantly, I still don't really like the
>   df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
> part of the earlier changes, that is very expensive and I doubt it is really
> necessary.  You only use the UD/DU chains to find from the comparison insn
> the previous setter of the in_a in the same basic block (if any), but you
> walk before that the basic block from HEAD to END in
> find_comparison_dom_walker::before_dom_children.  So, wouldn't it be cheaper
> to track during that walk the last setter insn of each hard register e.g. in an
> array indexed by REGNO (or perhaps track only selected simple single_set
> arith instructions) and remember for comparisons of a hard reg with
> const0_rtx in struct comparison next to prev_clobber field also the
> reg_setter?  After all, that is the way prev_clobber is computed, except
> in that case it is just a single register (CC) we track it for.
> For many targets that is all we need (if all the arith instructions clobber
> flags), on say x86_64/i686 there are only very few exceptions (e.g. lea, but
> that is typically used in a way that makes it impossible to merge).
> On others such as aarch64 (or arm or both?) not, so we need to track more.

Here is an untested patch (only tried the cmpelim_mult_uses_1.c testcase in
aarch64 cross) that does that.  I don't have aarch64 boxes around for easy
trunk testing, can bootstrap/regtest it on x86_64-linux/i686-linux and
maybe powerpc64le-linux though.

If the memset (last_setter, 0, sizeof (last_setter)); for each bb is a problem
(maybe for ia64/mmix/mips which have huge numbers of hard registers),
one possibility would be to put the array into find_comparison_dom_walker
class, clear it only in the constructor and have next to it an auto_vec
into which we'd push the REGNOs we've set last_setter to non-NULL where
previously they were NULL, then instead of clearing the whole vector we'd
just pop all the REGNOs from the vector and clear just those.
One extra advantage would be that we could also cheaply clear the
last_setter entries when seeing some following flags setter, user,
call/jump/asm_input (stuff that can_merge_compare_into_arith verifies).

2017-10-31  Jakub Jelinek  <jakub@redhat.com>

	PR rtl-optimization/82778
	PR rtl-optimization/82597
	* compare-elim.c (struct comparison): Add in_a_setter field.
	(find_comparison_dom_walker::before_dom_children): Remove killed
	bitmap and df_simulate_find_defs call, instead walk the defs.
	Compute last_setter and initialize in_a_setter.  Merge definitions
	with first initialization for a few variables.
	(try_validate_parallel): Use insn_invalid_p instead of
	recog_memoized.  Return insn rather than just the pattern.
	(try_merge_compare): Fix up comment.  Don't uselessly test if
	in_a is a REG_P.  Use cmp->in_a_setter instead of walking UD
	chains.

	* g++.dg/opt/pr82778.C: New test.

2017-10-31  Michael Collison  <michael.collison@arm.com>

	PR rtl-optimization/82597
	* gcc.dg/pr82597.c: New test.

--- gcc/compare-elim.c.jj	2017-10-19 09:08:17.000000000 +0200
+++ gcc/compare-elim.c	2017-10-31 11:37:29.430797609 +0100
@@ -97,6 +97,9 @@ struct comparison
   /* The insn prior to the comparison insn that clobbers the flags.  */
   rtx_insn *prev_clobber;
 
+  /* The insn prior to the comparison insn that sets in_a REG.  */
+  rtx_insn *in_a_setter;
+
   /* The two values being compared.  These will be either REGs or
      constants.  */
   rtx in_a, in_b;
@@ -309,26 +312,22 @@ can_eliminate_compare (rtx compare, rtx
 edge
 find_comparison_dom_walker::before_dom_children (basic_block bb)
 {
-  struct comparison *last_cmp;
-  rtx_insn *insn, *next, *last_clobber;
-  bool last_cmp_valid;
+  rtx_insn *insn, *next;
   bool need_purge = false;
-  bitmap killed;
-
-  killed = BITMAP_ALLOC (NULL);
+  rtx_insn *last_setter[FIRST_PSEUDO_REGISTER];
 
   /* The last comparison that was made.  Will be reset to NULL
      once the flags are clobbered.  */
-  last_cmp = NULL;
+  struct comparison *last_cmp = NULL;
 
   /* True iff the last comparison has not been clobbered, nor
      have its inputs.  Used to eliminate duplicate compares.  */
-  last_cmp_valid = false;
+  bool last_cmp_valid = false;
 
   /* The last insn that clobbered the flags, if that insn is of
      a form that may be valid for eliminating a following compare.
      To be reset to NULL once the flags are set otherwise.  */
-  last_clobber = NULL;
+  rtx_insn *last_clobber = NULL;
 
   /* Propagate the last live comparison throughout the extended basic block. */
   if (single_pred_p (bb))
@@ -338,6 +337,7 @@ find_comparison_dom_walker::before_dom_c
 	last_cmp_valid = last_cmp->inputs_valid;
     }
 
+  memset (last_setter, 0, sizeof (last_setter));
   for (insn = BB_HEAD (bb); insn; insn = next)
     {
       rtx src;
@@ -346,10 +346,6 @@ find_comparison_dom_walker::before_dom_c
       if (!NONDEBUG_INSN_P (insn))
 	continue;
 
-      /* Compute the set of registers modified by this instruction.  */
-      bitmap_clear (killed);
-      df_simulate_find_defs (insn, killed);
-
       src = conforming_compare (insn);
       if (src)
 	{
@@ -373,6 +369,13 @@ find_comparison_dom_walker::before_dom_c
 	  last_cmp->in_b = XEXP (src, 1);
 	  last_cmp->eh_note = eh_note;
 	  last_cmp->orig_mode = GET_MODE (src);
+	  if (last_cmp->in_b == const0_rtx
+	      && last_setter[REGNO (last_cmp->in_a)])
+	    {
+	      rtx set = single_set (last_setter[REGNO (last_cmp->in_a)]);
+	      if (set && rtx_equal_p (SET_DEST (set), last_cmp->in_a))
+		last_cmp->in_a_setter = last_setter[REGNO (last_cmp->in_a)];
+	    }
 	  all_compares.safe_push (last_cmp);
 
 	  /* It's unusual, but be prepared for comparison patterns that
@@ -388,28 +391,36 @@ find_comparison_dom_walker::before_dom_c
 	    find_flags_uses_in_insn (last_cmp, insn);
 
 	  /* Notice if this instruction kills the flags register.  */
-	  if (bitmap_bit_p (killed, targetm.flags_regnum))
-	    {
-	      /* See if this insn could be the "clobber" that eliminates
-		 a future comparison.   */
-	      last_clobber = (arithmetic_flags_clobber_p (insn) ? insn : NULL);
-
-	      /* In either case, the previous compare is no longer valid.  */
-	      last_cmp = NULL;
-	      last_cmp_valid = false;
-	    }
+	  df_ref def;
+	  FOR_EACH_INSN_DEF (def, insn)
+	    if (DF_REF_REGNO (def) == targetm.flags_regnum)
+	      {
+		/* See if this insn could be the "clobber" that eliminates
+		   a future comparison.   */
+		last_clobber = (arithmetic_flags_clobber_p (insn)
+				? insn : NULL);
+
+		/* In either case, the previous compare is no longer valid.  */
+		last_cmp = NULL;
+		last_cmp_valid = false;
+		break;
+	      }
 	}
 
-      /* Notice if any of the inputs to the comparison have changed.  */
-      if (last_cmp_valid
-	  && (bitmap_bit_p (killed, REGNO (last_cmp->in_a))
-	      || (REG_P (last_cmp->in_b)
-		  && bitmap_bit_p (killed, REGNO (last_cmp->in_b)))))
-	last_cmp_valid = false;
+      /* Notice if any of the inputs to the comparison have changed
+	 and remember last insn that sets each register.  */
+      df_ref def;
+      FOR_EACH_INSN_DEF (def, insn)
+	{
+	  if (last_cmp_valid
+	      && (DF_REF_REGNO (def) == REGNO (last_cmp->in_a)
+		  || (REG_P (last_cmp->in_b)
+		      && DF_REF_REGNO (def) == REGNO (last_cmp->in_b))))
+	    last_cmp_valid = false;
+	  last_setter[DF_REF_REGNO (def)] = insn;
+	}
     }
 
-  BITMAP_FREE (killed);
-
   /* Remember the live comparison for subsequent members of
      the extended basic block.  */
   if (last_cmp)
@@ -625,13 +636,19 @@ can_merge_compare_into_arith (rtx_insn *
 static rtx
 try_validate_parallel (rtx set_a, rtx set_b)
 {
-  rtx par
-    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set_a, set_b));
+  rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set_a, set_b));
+  rtx_insn *insn = make_insn_raw (par);
 
-  rtx_insn *insn;
-  insn = gen_rtx_INSN (VOIDmode, 0, 0, 0, par, 0, -1, 0);
+  if (insn_invalid_p (insn, false))
+    {
+      crtl->emit.x_cur_insn_uid--;
+      return NULL_RTX;
+    }
 
-  return recog_memoized (insn) > 0 ? par : NULL_RTX;
+  SET_PREV_INSN (insn) = NULL_RTX;
+  SET_NEXT_INSN (insn) = NULL_RTX;
+  INSN_LOCATION (insn) = 0;
+  return insn;
 }
 
 /* For a comparison instruction described by CMP check if it compares a
@@ -643,7 +660,7 @@ try_validate_parallel (rtx set_a, rtx se
    <instructions that don't read the condition register>
    I2: CC := CMP R1 0
    I2 can be merged with I1 into:
-   I1: { R1 := R2 + R3 ; CC := CMP (R2 + R3) 0 }
+   I1: { CC := CMP (R2 + R3) 0 ; R1 := R2 + R3 }
    This catches cases where R1 is used between I1 and I2 and therefore
    combine and other RTL optimisations will not try to propagate it into
    I2.  Return true if we succeeded in merging CMP.  */
@@ -653,7 +670,7 @@ try_merge_compare (struct comparison *cm
 {
   rtx_insn *cmp_insn = cmp->insn;
 
-  if (!REG_P (cmp->in_a) || cmp->in_b != const0_rtx)
+  if (cmp->in_b != const0_rtx || cmp->in_a_setter == NULL)
     return false;
   rtx in_a = cmp->in_a;
   df_ref use;
@@ -664,24 +681,8 @@ try_merge_compare (struct comparison *cm
   if (!use)
     return false;
 
-  /* Validate the data flow information before attempting to
-     find the instruction that defines in_a.  */
-
-  struct df_link *ref_chain;
-  ref_chain = DF_REF_CHAIN (use);
-  if (!ref_chain || !ref_chain->ref
-      || !DF_REF_INSN_INFO (ref_chain->ref) || ref_chain->next != NULL)
-    return false;
-
-  rtx_insn *def_insn = DF_REF_INSN (ref_chain->ref);
-  /* We found the insn that defines in_a.  Only consider the cases where
-     it is in the same block as the comparison.  */
-  if (BLOCK_FOR_INSN (cmp_insn) != BLOCK_FOR_INSN (def_insn))
-    return false;
-
+  rtx_insn *def_insn = cmp->in_a_setter;
   rtx set = single_set (def_insn);
-  if (!set)
-    return false;
 
   if (!can_merge_compare_into_arith (cmp_insn, def_insn))
     return false;
--- gcc/testsuite/g++.dg/opt/pr82778.C.jj	2017-10-31 09:01:25.025934660 +0100
+++ gcc/testsuite/g++.dg/opt/pr82778.C	2017-10-31 09:00:08.000000000 +0100
@@ -0,0 +1,37 @@
+// PR rtl-optimization/82778
+// { dg-do compile }
+// { dg-options "-O2" }
+
+template <typename a, int b> struct c {
+  typedef a d[b];
+  static a e(d f, int g) { return f[g]; }
+};
+template <typename a, int b> struct B {
+  typedef c<a, b> h;
+  typename h::d i;
+  long j;
+  a at() { return h::e(i, j); }
+};
+int k, m, r, s, t;
+char l, n, q;
+short o, p, w;
+struct C {
+  int u;
+};
+B<C, 4> v;
+void x() {
+  if (((p > (q ? v.at().u : k)) >> l - 226) + !(n ^ r * m))
+    s = ((-(((p > (q ? v.at().u : k)) >> l - 226) + !(n ^ r * m)) < 0) /
+             (-(((p > (q ? v.at().u : k)) >> l - 226) + !(n ^ r * m)) ^
+              -25 & o) &&
+         p) >>
+        (0 <= 0
+             ? 0 ||
+                   (-(((p > (q ? v.at().u : k)) >> l - 226) + !(n ^ r * m)) <
+                    0) /
+                       (-(((p > (q ? v.at().u : k)) >> l - 226) +
+                          !(n ^ r * m)) ^ -25 & o)
+             : 0);
+  w = (p > (q ? v.at().u : k)) >> l - 226;
+  t = !(n ^ r * m);
+}
--- gcc/testsuite/gcc.dg/pr82597.c.jj	2017-10-31 11:34:39.787915615 +0100
+++ gcc/testsuite/gcc.dg/pr82597.c	2017-10-31 11:34:39.787915615 +0100
@@ -0,0 +1,40 @@
+/* PR rtl-optimization/82597 */
+/* { dg-do compile  }*/
+/* { dg-options "-O2 -funroll-loops" } */
+
+int pb;
+
+void
+ch (unsigned char np, char fc)
+{
+  unsigned char *y6 = &np;
+
+  if (fc != 0)
+    {
+      unsigned char *z1 = &np;
+
+      for (;;)
+        if (*y6 != 0)
+          for (fc = 0; fc < 12; ++fc)
+            {
+              int hh;
+              int tp;
+
+              if (fc != 0)
+                hh = (*z1 != 0) ? fc : 0;
+              else
+                hh = pb;
+
+              tp = fc > 0;
+              if (hh == tp)
+                *y6 = 1;
+            }
+    }
+
+  if (np != 0)
+    y6 = (unsigned char *)&fc;
+  if (pb != 0 && *y6 != 0)
+    for (;;)
+      {
+      }
+}


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]