[PATCH] PR rtl-optimization/64081: Enable RTL loop unrolling for duplicated exit blocks and back edges.

Alexander Fomin afomin.mailbox@gmail.com
Fri Feb 5 13:44:00 GMT 2016


Hi!

Some kind of this patch was submitted about a year ago by Igor
Zamyatin. It's an attempt to fix PR rtl-optimization/64081 by enabling
RTL loop unrolling for duplicated exit blocks and back edges.

At the time it caused AIX bootstrap failure, but now it's OK according
to David's testing. I've also bootstrapped and regtested it on
x86_64-linux-gnu.

Is it still OK for trunk now, or you consider this v7 stuff?
Anyway, it's a regression.

Thanks,
Alexander
---
gcc/

	PR rtl-optimization/64081
	* loop-iv.c (def_pred_latch_p): New function.
	(latch_dominating_def): Allow specific cases with non-single
	definitions.
	(iv_get_reaching_def): Likewise.
	(check_complex_exit_p): New function.
	(check_simple_exit): Use check_complex_exit_p to allow certain cases
	with exits not executing on any iteration.

gcc/testsuite

	PR rtl-optimization/64081
	* gcc.dg/pr64081.c: New test.
---
 gcc/loop-iv.c                  | 114 ++++++++++++++++++++++++++++++++++++-----
 gcc/testsuite/gcc.dg/pr64081.c |  40 +++++++++++++++
 2 files changed, 142 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr64081.c

diff --git a/gcc/loop-iv.c b/gcc/loop-iv.c
index fecaf8f..d7d3e0d 100644
--- a/gcc/loop-iv.c
+++ b/gcc/loop-iv.c
@@ -289,9 +289,27 @@ iv_analysis_loop_init (struct loop *loop)
   check_iv_ref_table_size ();
 }
 
+/* Checks that def is in an immediate predecessor of the latch block.  */
+
+static bool
+def_pred_latch_p (df_ref d_ref)
+{
+  basic_block bb = DF_REF_BB (d_ref);
+  edge_iterator ei;
+  edge e;
+
+  FOR_EACH_EDGE (e, ei, current_loop->latch->preds)
+    {
+      if (e->src == bb)
+	return true;
+    }
+  return false;
+}
+
 /* Finds the definition of REG that dominates loop latch and stores
    it to DEF.  Returns false if there is not a single definition
-   dominating the latch.  If REG has no definition in loop, DEF
+   dominating the latch or all defs are same and they are on different
+   predecessors of loop latch.  If REG has no definition in loop, DEF
    is set to NULL and true is returned.  */
 
 static bool
@@ -303,15 +321,28 @@ latch_dominating_def (rtx reg, df_ref *def)
 
   for (adef = DF_REG_DEF_CHAIN (regno); adef; adef = DF_REF_NEXT_REG (adef))
     {
+      /* Initialize this to true for the very first iteration when
+	 SINGLE_RD is NULL.  */
+      bool def_pred_latch = true;
+
       if (!bitmap_bit_p (df->blocks_to_analyze, DF_REF_BBNO (adef))
 	  || !bitmap_bit_p (&bb_info->out, DF_REF_ID (adef)))
 	continue;
 
-      /* More than one reaching definition.  */
-      if (single_rd)
+      /* More than one reaching definition is ok in case definitions are
+	 in predecessors of latch block and those definitions are the same.
+	 Probably this could be relaxed and check for sub-dominance instead
+	 predecessor.  */
+      if (single_rd
+	  && (!(def_pred_latch = def_pred_latch_p (adef))
+	      || !rtx_equal_p( PATTERN (DF_REF_INSN (single_rd)),
+			       PATTERN (DF_REF_INSN (adef)))))
 	return false;
 
-      if (!just_once_each_iteration_p (current_loop, DF_REF_BB (adef)))
+      /* It's ok if def is not executed on each iteration once we have defs on
+	 latch's predecessors.  */
+      if (!just_once_each_iteration_p (current_loop, DF_REF_BB (adef))
+	  && !def_pred_latch)
 	return false;
 
       single_rd = adef;
@@ -326,10 +357,10 @@ latch_dominating_def (rtx reg, df_ref *def)
 static enum iv_grd_result
 iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def)
 {
-  df_ref use, adef;
+  df_ref use, adef = NULL;
   basic_block def_bb, use_bb;
   rtx_insn *def_insn;
-  bool dom_p;
+  bool dom_p, dom_latch_p = false;
 
   *def = NULL;
   if (!simple_reg_p (reg))
@@ -344,11 +375,26 @@ iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def)
   if (!DF_REF_CHAIN (use))
     return GRD_INVARIANT;
 
-  /* More than one reaching def.  */
+  adef = DF_REF_CHAIN (use)->ref;
+  /* Having more than one reaching def is ok once all defs are in blocks which
+     are latch's predecessors.  */
   if (DF_REF_CHAIN (use)->next)
-    return GRD_INVALID;
+    {
+      df_link* defs;
+      unsigned int def_num = 0;
 
-  adef = DF_REF_CHAIN (use)->ref;
+      for (defs = DF_REF_CHAIN (use); defs; defs = defs->next)
+	{
+	  if (!def_pred_latch_p (defs->ref))
+	    return GRD_INVALID;
+	  def_num++;
+	}
+      /* Make sure all preds contain definitions.  */
+      if (def_num != EDGE_COUNT (current_loop->latch->preds))
+	return GRD_INVALID;
+
+      dom_latch_p = true;
+    }
 
   /* We do not handle setting only part of the register.  */
   if (DF_REF_FLAGS (adef) & DF_REF_READ_WRITE)
@@ -371,8 +417,8 @@ iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def)
 
   /* The definition does not dominate the use.  This is still OK if
      this may be a use of a biv, i.e. if the def_bb dominates loop
-     latch.  */
-  if (just_once_each_iteration_p (current_loop, def_bb))
+     latch or all defs are in latch's predecessors.  */
+  if (dom_latch_p || just_once_each_iteration_p (current_loop, def_bb))
     return GRD_MAYBE_BIV;
 
   return GRD_INVALID;
@@ -2871,6 +2917,49 @@ fail:
   return;
 }
 
+/* Check whether exit is not simple but still good for further analysis.
+   Looks like such loops mostly are a result of jump threading.  */
+
+static bool
+check_complex_exit_p (struct loop* loop, basic_block bb)
+{
+  edge e;
+  basic_block pred, exit;
+
+  if (EDGE_COUNT (bb->preds) > 1)
+    return false;
+
+  e = EDGE_PRED (bb, 0);
+
+  pred = e->src;
+  if (EDGE_COUNT (pred->succs) != 2)
+    return false;
+
+  /* Predecessor must be tested (at least) once during any iteration.  */
+  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, pred))
+    return false;
+
+  if (EDGE_SUCC (pred, 0)->dest == bb)
+    exit = EDGE_SUCC (pred, 1)->dest;
+  else
+    exit = EDGE_SUCC (pred, 0)->dest;
+
+  /* Check that EXIT is really loop exit.  */
+  if (flow_bb_inside_loop_p (loop, exit))
+    {
+      edge_iterator eei;
+      edge ee;
+
+      FOR_EACH_EDGE (ee, eei, exit->succs)
+	{
+	  if (!flow_bb_inside_loop_p (loop, ee->dest))
+	    return true;
+	}
+    }
+  return false;
+
+}
+
 /* Checks whether E is a simple exit from LOOP and stores its description
    into DESC.  */
 
@@ -2890,7 +2979,8 @@ check_simple_exit (struct loop *loop, edge e, struct niter_desc *desc)
     return;
 
   /* It must be tested (at least) once during any iteration.  */
-  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit_bb))
+  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit_bb)
+      && !check_complex_exit_p (loop, exit_bb))
     return;
 
   /* It must end in a simple conditional jump.  */
diff --git a/gcc/testsuite/gcc.dg/pr64081.c b/gcc/testsuite/gcc.dg/pr64081.c
new file mode 100644
index 0000000..5207513
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr64081.c
@@ -0,0 +1,40 @@
+/* PR rtl-optimization/64081 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -funroll-loops -fdump-rtl-loop2_unroll" } */
+
+long token;
+long *data;
+long *arr1;
+long *arr2;
+
+int main (int argc, const char* argv[])
+{
+  unsigned long i;
+  static long pos, start, count;
+  static int dir;
+
+  pos = 0;
+  dir = 1;
+
+  for (i = 0; i < argc; i++)
+    {
+      start = pos;
+      for (count = 0; count <= 400; count++)
+   {
+     if (token == data[pos])
+       break;
+     if (dir == 1)
+       pos = arr1[pos];
+     else
+       pos = arr2[pos];
+     if (pos == -1)
+       {
+         pos = start;
+         dir = !dir;
+       }
+   }
+    }
+  return pos + dir;
+}
+
+/* { dg-final { scan-rtl-dump "loop unrolled 7 times" "loop2_unroll" } } */
-- 
1.9.3



More information about the Gcc-patches mailing list