This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix PR84427, PRE ANTIC iteration (again)


This fixes another testcase that shows that our ANTIC iteration can
fail to converge.  The fix continues what we did with previous fixes,
avoid spuriously removing stuff from the expression side of the sets.
This time going full-in and allowing multiple expressions for the
same value in the sets.  I've added verification that the value-sets
never grow during iteration which barfed on some of the related
existing testcases and thus helped finishing this and fixing all
places where we end up somewhat randomly choosing one or another
expression to drop.

I've verified with a GIMPLE testcase that if at insertion time
we have multiple partially redundant expressions for the same value
we insert/hoist it only once.  So the only "bad" effect of the
patch is that the expression part of the ANTIC sets grows -- by
not throwing away random stuff we might also arrive at larger
(value) solutions for ANTIC which means we may catch previously
missed optimizations (or pessimizations as you know PRE...).

I'm not entirely happy with the timing but I'm quite confident in
the approach also (again) having heavily discussed this with Micha.

Re-boostrap and regtest running on x86_64-unknown-linux-gnu.

I managed to go into a different solution at the beginning asking
for that pred/phiblock->edge cleanup and decided to leave that in...

The new verification is guarded with flag_checking so if it would
trigger but wouldn't result in not converging a release build
should be not affected.

Richard.

2018-01-03  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/84427
	* tree-ssa-pre.c (bitmap_remove_expr_from_set): Remove.
	(bitmap_set_subtract_values): Rewrite to handle multiple
	exprs per value.
	(clean): Likewise.
	(prune_clobbered_mems): Likewise.
	(phi_translate): Take edge instead of pred/phiblock.
	(phi_translate_1): Likewise.
	(phi_translate_set): Likewise.  Insert all translated
	exprs for a value into the set, keeping possibly multiple
	expressions per value.
	(compute_antic_aux): Adjust for phi_translate changes.
	When intersecting union the expressions and prune those
	not in the final value set, keeping possibly multiple
	expressions per value.  Do not use value-insertion
	for unioning ANTIC_OUT U EXP_GEN - TMP_GEN but merge
	all expressions.  Add verification that the value-sets
	only shrink during iteration.
	(compute_partial_antic_aux): Adjust for the phi_translate changes.
	(do_pre_regular_insertion): Likewise.
	(do_pre_partial_partial_insertion): Likewise.

	* gcc.dg/torture/pr84427.c: New testcase.

Index: gcc/tree-ssa-pre.c
===================================================================
--- gcc/tree-ssa-pre.c	(revision 258097)
+++ gcc/tree-ssa-pre.c	(working copy)
@@ -696,16 +696,6 @@ sccvn_valnum_from_value_id (unsigned int
   return NULL_TREE;
 }
 
-/* Remove an expression EXPR from a bitmapped set.  */
-
-static void
-bitmap_remove_expr_from_set (bitmap_set_t set, pre_expr expr)
-{
-  unsigned int val  = get_expr_value_id (expr);
-  bitmap_clear_bit (&set->values, val);
-  bitmap_clear_bit (&set->expressions, get_expression_id (expr));
-}
-
 /* Insert an expression EXPR into a bitmapped set.  */
 
 static void
@@ -805,20 +795,21 @@ bitmap_set_subtract_values (bitmap_set_t
 {
   unsigned int i;
   bitmap_iterator bi;
-  pre_expr to_remove = NULL;
+  unsigned to_remove = -1U;
+  bitmap_and_compl_into (&a->values, &b->values);
   FOR_EACH_EXPR_ID_IN_SET (a, i, bi)
     {
-      if (to_remove)
+      if (to_remove != -1U)
 	{
-	  bitmap_remove_expr_from_set (a, to_remove);
-	  to_remove = NULL;
+	  bitmap_clear_bit (&a->expressions, to_remove);
+	  to_remove = -1U;
 	}
       pre_expr expr = expression_for_id (i);
-      if (bitmap_bit_p (&b->values, get_expr_value_id (expr)))
-	to_remove = expr;
+      if (! bitmap_bit_p (&a->values, get_expr_value_id (expr)))
+	to_remove = i;
     }
-  if (to_remove)
-    bitmap_remove_expr_from_set (a, to_remove);
+  if (to_remove != -1U)
+    bitmap_clear_bit (&a->expressions, to_remove);
 }
 
 
@@ -1335,17 +1326,17 @@ get_representative_for (const pre_expr e
 
 
 static pre_expr
-phi_translate (pre_expr expr, bitmap_set_t set1, bitmap_set_t set2,
-	       basic_block pred, basic_block phiblock);
+phi_translate (pre_expr expr, bitmap_set_t set1, bitmap_set_t set2, edge e);
 
 /* Translate EXPR using phis in PHIBLOCK, so that it has the values of
    the phis in PRED.  Return NULL if we can't find a leader for each part
    of the translated expression.  */
 
 static pre_expr
-phi_translate_1 (pre_expr expr, bitmap_set_t set1, bitmap_set_t set2,
-		 basic_block pred, basic_block phiblock)
+phi_translate_1 (pre_expr expr, bitmap_set_t set1, bitmap_set_t set2, edge e)
 {
+  basic_block pred = e->src;
+  basic_block phiblock = e->dest;
   switch (expr->kind)
     {
     case NARY:
@@ -1366,7 +1357,7 @@ phi_translate_1 (pre_expr expr, bitmap_s
                 pre_expr leader, result;
 		unsigned int op_val_id = VN_INFO (newnary->op[i])->value_id;
 		leader = find_leader_in_sets (op_val_id, set1, set2);
-                result = phi_translate (leader, set1, set2, pred, phiblock);
+		result = phi_translate (leader, set1, set2, e);
 		if (result && result != leader)
 		  /* Force a leader as well as we are simplifying this
 		     expression.  */
@@ -1397,7 +1388,7 @@ phi_translate_1 (pre_expr expr, bitmap_s
 		       to be inserted and increased register pressure.
 		       See PR77498 - this avoids doing predcoms work in
 		       a less efficient way.  */
-		    if (find_edge (pred, phiblock)->flags & EDGE_DFS_BACK)
+		    if (e->flags & EDGE_DFS_BACK)
 		      ;
 		    else
 		      {
@@ -1488,7 +1479,7 @@ phi_translate_1 (pre_expr expr, bitmap_s
 		  }
 		op_val_id = VN_INFO (op[n])->value_id;
 		leader = find_leader_in_sets (op_val_id, set1, set2);
-		opresult = phi_translate (leader, set1, set2, pred, phiblock);
+		opresult = phi_translate (leader, set1, set2, e);
 		if (opresult && opresult != leader)
 		  {
 		    tree name = get_representative_for (opresult);
@@ -1616,7 +1607,6 @@ phi_translate_1 (pre_expr expr, bitmap_s
 	if (gimple_code (def_stmt) == GIMPLE_PHI
 	    && gimple_bb (def_stmt) == phiblock)
 	  {
-	    edge e = find_edge (pred, gimple_bb (def_stmt));
 	    tree def = PHI_ARG_DEF (def_stmt, e->dest_idx);
 
 	    /* Handle constant. */
@@ -1639,8 +1629,7 @@ phi_translate_1 (pre_expr expr, bitmap_s
 /* Wrapper around phi_translate_1 providing caching functionality.  */
 
 static pre_expr
-phi_translate (pre_expr expr, bitmap_set_t set1, bitmap_set_t set2,
-	       basic_block pred, basic_block phiblock)
+phi_translate (pre_expr expr, bitmap_set_t set1, bitmap_set_t set2, edge e)
 {
   expr_pred_trans_t slot = NULL;
   pre_expr phitrans;
@@ -1658,7 +1647,7 @@ phi_translate (pre_expr expr, bitmap_set
   /* Don't add translations of NAMEs as those are cheap to translate.  */
   if (expr->kind != NAME)
     {
-      if (phi_trans_add (&slot, expr, pred))
+      if (phi_trans_add (&slot, expr, e->src))
 	return slot->v;
       /* Store NULL for the value we want to return in the case of
 	 recursing.  */
@@ -1666,7 +1655,7 @@ phi_translate (pre_expr expr, bitmap_set
     }
 
   /* Translate.  */
-  phitrans = phi_translate_1 (expr, set1, set2, pred, phiblock);
+  phitrans = phi_translate_1 (expr, set1, set2, e);
 
   if (slot)
     {
@@ -1687,14 +1676,13 @@ phi_translate (pre_expr expr, bitmap_set
    expressions in DEST.  */
 
 static void
-phi_translate_set (bitmap_set_t dest, bitmap_set_t set, basic_block pred,
-		   basic_block phiblock)
+phi_translate_set (bitmap_set_t dest, bitmap_set_t set, edge e)
 {
   vec<pre_expr> exprs;
   pre_expr expr;
   int i;
 
-  if (gimple_seq_empty_p (phi_nodes (phiblock)))
+  if (gimple_seq_empty_p (phi_nodes (e->dest)))
     {
       bitmap_set_copy (dest, set);
       return;
@@ -1704,18 +1692,11 @@ phi_translate_set (bitmap_set_t dest, bi
   FOR_EACH_VEC_ELT (exprs, i, expr)
     {
       pre_expr translated;
-      translated = phi_translate (expr, set, NULL, pred, phiblock);
+      translated = phi_translate (expr, set, NULL, e);
       if (!translated)
 	continue;
 
-      /* We might end up with multiple expressions from SET being
-	 translated to the same value.  In this case we do not want
-	 to retain the NARY or REFERENCE expression but prefer a NAME
-	 which would be the leader.  */
-      if (translated->kind == NAME)
-	bitmap_value_replace_in_set (dest, translated);
-      else
-	bitmap_value_insert_into_set (dest, translated);
+      bitmap_insert_into_set (dest, translated);
     }
   exprs.release ();
 }
@@ -1918,7 +1899,15 @@ clean (bitmap_set_t set1, bitmap_set_t s
   FOR_EACH_VEC_ELT (exprs, i, expr)
     {
       if (!valid_in_sets (set1, set2, expr))
-	bitmap_remove_expr_from_set (set1, expr);
+	{
+	  unsigned int val  = get_expr_value_id (expr);
+	  bitmap_clear_bit (&set1->expressions, get_expression_id (expr));
+	  /* We are entered with possibly multiple expressions for a value
+	     so before removing a value from the set see if there's an
+	     expression for it left.  */
+	  if (! bitmap_find_leader (set1, val))
+	    bitmap_clear_bit (&set1->values, val);
+	}
     }
   exprs.release ();
 }
@@ -1931,15 +1920,17 @@ prune_clobbered_mems (bitmap_set_t set,
 {
   bitmap_iterator bi;
   unsigned i;
-  pre_expr to_remove = NULL;
+  unsigned to_remove = -1U;
+  bool any_removed = false;
 
   FOR_EACH_EXPR_ID_IN_SET (set, i, bi)
     {
       /* Remove queued expr.  */
-      if (to_remove)
+      if (to_remove != -1U)
 	{
-	  bitmap_remove_expr_from_set (set, to_remove);
-	  to_remove = NULL;
+	  bitmap_clear_bit (&set->expressions, to_remove);
+	  any_removed = true;
+	  to_remove = -1U;
 	}
 
       pre_expr expr = expression_for_id (i);
@@ -1955,7 +1946,7 @@ prune_clobbered_mems (bitmap_set_t set,
 					   block, gimple_bb (def_stmt)))
 		      || (gimple_bb (def_stmt) == block
 			  && value_dies_in_block_x (expr, block))))
-		to_remove = expr;
+		to_remove = i;
 	    }
 	}
       else if (expr->kind == NARY)
@@ -1967,13 +1958,36 @@ prune_clobbered_mems (bitmap_set_t set,
 	     as the available expression might be after the exit point.  */
 	  if (BB_MAY_NOTRETURN (block)
 	      && vn_nary_may_trap (nary))
-	    to_remove = expr;
+	    to_remove = i;
 	}
     }
 
   /* Remove queued expr.  */
-  if (to_remove)
-    bitmap_remove_expr_from_set (set, to_remove);
+  if (to_remove != -1U)
+    {
+      bitmap_clear_bit (&set->expressions, to_remove);
+      any_removed = true;
+    }
+
+  /* Above we only removed expressions, now clean the set of values
+     which no longer have any corresponding expression.  We cannot
+     clear the value at the time we remove an expression since there
+     may be multiple expressions per value.
+     If we'd queue possibly to be removed values we could use
+     the bitmap_find_leader way to see if there's still an expression
+     for it.  For some ratio of to be removed values and number of
+     values/expressions in the set this might be faster than rebuilding
+     the value-set.  */
+  if (any_removed)
+    {
+      bitmap_clear (&set->values);
+      FOR_EACH_EXPR_ID_IN_SET (set, i, bi)
+	{
+	  pre_expr expr = expression_for_id (i);
+	  unsigned int value_id = get_expr_value_id (expr);
+	  bitmap_set_bit (&set->values, value_id);
+	}
+    }
 }
 
 static sbitmap has_abnormal_preds;
@@ -1993,11 +2007,10 @@ static bool
 compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge)
 {
   bitmap_set_t S, old, ANTIC_OUT;
-  bitmap_iterator bi;
-  unsigned int bii;
   edge e;
   edge_iterator ei;
 
+  bool was_visited = BB_VISITED (block);
   bool changed = ! BB_VISITED (block);
   BB_VISITED (block) = 1;
   old = ANTIC_OUT = S = NULL;
@@ -2017,9 +2030,9 @@ compute_antic_aux (basic_block block, bo
      translate through.  */
   else if (single_succ_p (block))
     {
-      basic_block succ_bb = single_succ (block);
-      gcc_assert (BB_VISITED (succ_bb));
-      phi_translate_set (ANTIC_OUT, ANTIC_IN (succ_bb), block, succ_bb);
+      e = single_succ_edge (block);
+      gcc_assert (BB_VISITED (e->dest));
+      phi_translate_set (ANTIC_OUT, ANTIC_IN (e->dest), e);
     }
   /* If we have multiple successors, we take the intersection of all of
      them.  Note that in the case of loop exit phi nodes, we may have
@@ -2027,16 +2040,16 @@ compute_antic_aux (basic_block block, bo
   else
     {
       size_t i;
-      basic_block bprime, first = NULL;
+      edge first = NULL;
 
-      auto_vec<basic_block> worklist (EDGE_COUNT (block->succs));
+      auto_vec<edge> worklist (EDGE_COUNT (block->succs));
       FOR_EACH_EDGE (e, ei, block->succs)
 	{
 	  if (!first
 	      && BB_VISITED (e->dest))
-	    first = e->dest;
+	    first = e;
 	  else if (BB_VISITED (e->dest))
-	    worklist.quick_push (e->dest);
+	    worklist.quick_push (e);
 	  else
 	    {
 	      /* Unvisited successors get their ANTIC_IN replaced by the
@@ -2053,7 +2066,7 @@ compute_antic_aux (basic_block block, bo
          which is guaranteed by iteration order.  */
       gcc_assert (first != NULL);
 
-      phi_translate_set (ANTIC_OUT, ANTIC_IN (first), block, first);
+      phi_translate_set (ANTIC_OUT, ANTIC_IN (first->dest), first);
 
       /* If we have multiple successors we need to intersect the ANTIC_OUT
          sets.  For values that's a simple intersection but for
@@ -2062,31 +2075,29 @@ compute_antic_aux (basic_block block, bo
 	 Avoid randomness and running into cycles like for PR82129 and
 	 canonicalize the expression we choose to the one with the
 	 lowest id.  This requires we actually compute the union first.  */
-      FOR_EACH_VEC_ELT (worklist, i, bprime)
+      FOR_EACH_VEC_ELT (worklist, i, e)
 	{
-	  if (!gimple_seq_empty_p (phi_nodes (bprime)))
+	  if (!gimple_seq_empty_p (phi_nodes (e->dest)))
 	    {
 	      bitmap_set_t tmp = bitmap_set_new ();
-	      phi_translate_set (tmp, ANTIC_IN (bprime), block, bprime);
+	      phi_translate_set (tmp, ANTIC_IN (e->dest), e);
 	      bitmap_and_into (&ANTIC_OUT->values, &tmp->values);
 	      bitmap_ior_into (&ANTIC_OUT->expressions, &tmp->expressions);
 	      bitmap_set_free (tmp);
 	    }
 	  else
 	    {
-	      bitmap_and_into (&ANTIC_OUT->values, &ANTIC_IN (bprime)->values);
+	      bitmap_and_into (&ANTIC_OUT->values, &ANTIC_IN (e->dest)->values);
 	      bitmap_ior_into (&ANTIC_OUT->expressions,
-			       &ANTIC_IN (bprime)->expressions);
+			       &ANTIC_IN (e->dest)->expressions);
 	    }
 	}
       if (! worklist.is_empty ())
 	{
-	  /* Prune expressions not in the value set, canonicalizing to
-	     expression with lowest ID.  */
+	  /* Prune expressions not in the value set.  */
 	  bitmap_iterator bi;
 	  unsigned int i;
 	  unsigned int to_clear = -1U;
-	  bitmap seen_value = BITMAP_ALLOC (NULL);
 	  FOR_EACH_EXPR_ID_IN_SET (ANTIC_OUT, i, bi)
 	    {
 	      if (to_clear != -1U)
@@ -2096,13 +2107,11 @@ compute_antic_aux (basic_block block, bo
 		}
 	      pre_expr expr = expression_for_id (i);
 	      unsigned int value_id = get_expr_value_id (expr);
-	      if (!bitmap_bit_p (&ANTIC_OUT->values, value_id)
-		  || !bitmap_set_bit (seen_value, value_id))
+	      if (!bitmap_bit_p (&ANTIC_OUT->values, value_id))
 		to_clear = i;
 	    }
 	  if (to_clear != -1U)
 	    bitmap_clear_bit (&ANTIC_OUT->expressions, to_clear);
-	  BITMAP_FREE (seen_value);
 	}
     }
 
@@ -2119,15 +2128,26 @@ compute_antic_aux (basic_block block, bo
 
   /* Then union in the ANTIC_OUT - TMP_GEN values,
      to get ANTIC_OUT U EXP_GEN - TMP_GEN */
-  FOR_EACH_EXPR_ID_IN_SET (S, bii, bi)
-    bitmap_value_insert_into_set (ANTIC_IN (block),
-				  expression_for_id (bii));
+  bitmap_ior_into (&ANTIC_IN (block)->values, &S->values);
+  bitmap_ior_into (&ANTIC_IN (block)->expressions, &S->expressions);
 
   /* clean (ANTIC_IN (block)) is defered to after the iteration converged
      because it can cause non-convergence, see for example PR81181.  */
 
   if (!bitmap_set_equal (old, ANTIC_IN (block)))
-    changed = true;
+    {
+      changed = true;
+      /* After the initial value set computation the value set may
+         only shrink during the iteration.  */
+      if (was_visited && flag_checking)
+	{
+	  bitmap_iterator bi;
+	  unsigned int i;
+	  EXECUTE_IF_AND_COMPL_IN_BITMAP (&ANTIC_IN (block)->values,
+					  &old->values, 0, i, bi)
+	    gcc_unreachable ();
+	}
+    }
 
  maybe_dump_sets:
   if (dump_file && (dump_flags & TDF_DETAILS))
@@ -2202,45 +2222,44 @@ compute_partial_antic_aux (basic_block b
      VH.1 + 1 (VH.2), VH.2 + 1 (VH.3), etc), forever.  */
   else if (single_succ_p (block))
     {
-      basic_block succ = single_succ (block);
-      if (!(single_succ_edge (block)->flags & EDGE_DFS_BACK))
-	phi_translate_set (PA_OUT, PA_IN (succ), block, succ);
+      e = single_succ_edge (block);
+      if (!(e->flags & EDGE_DFS_BACK))
+	phi_translate_set (PA_OUT, PA_IN (e->dest), e);
     }
   /* If we have multiple successors, we take the union of all of
      them.  */
   else
     {
       size_t i;
-      basic_block bprime;
 
-      auto_vec<basic_block> worklist (EDGE_COUNT (block->succs));
+      auto_vec<edge> worklist (EDGE_COUNT (block->succs));
       FOR_EACH_EDGE (e, ei, block->succs)
 	{
 	  if (e->flags & EDGE_DFS_BACK)
 	    continue;
-	  worklist.quick_push (e->dest);
+	  worklist.quick_push (e);
 	}
       if (worklist.length () > 0)
 	{
-	  FOR_EACH_VEC_ELT (worklist, i, bprime)
+	  FOR_EACH_VEC_ELT (worklist, i, e)
 	    {
 	      unsigned int i;
 	      bitmap_iterator bi;
 
-	      FOR_EACH_EXPR_ID_IN_SET (ANTIC_IN (bprime), i, bi)
+	      FOR_EACH_EXPR_ID_IN_SET (ANTIC_IN (e->dest), i, bi)
 		bitmap_value_insert_into_set (PA_OUT,
 					      expression_for_id (i));
-	      if (!gimple_seq_empty_p (phi_nodes (bprime)))
+	      if (!gimple_seq_empty_p (phi_nodes (e->dest)))
 		{
 		  bitmap_set_t pa_in = bitmap_set_new ();
-		  phi_translate_set (pa_in, PA_IN (bprime), block, bprime);
+		  phi_translate_set (pa_in, PA_IN (e->dest), e);
 		  FOR_EACH_EXPR_ID_IN_SET (pa_in, i, bi)
 		    bitmap_value_insert_into_set (PA_OUT,
 						  expression_for_id (i));
 		  bitmap_set_free (pa_in);
 		}
 	      else
-		FOR_EACH_EXPR_ID_IN_SET (PA_IN (bprime), i, bi)
+		FOR_EACH_EXPR_ID_IN_SET (PA_IN (e->dest), i, bi)
 		  bitmap_value_insert_into_set (PA_OUT,
 						expression_for_id (i));
 	    }
@@ -3158,8 +3177,7 @@ do_pre_regular_insertion (basic_block bl
 	      gcc_assert (!(pred->flags & EDGE_FAKE));
 	      bprime = pred->src;
 	      /* We are looking at ANTIC_OUT of bprime.  */
-	      eprime = phi_translate (expr, ANTIC_IN (block), NULL,
-				      bprime, block);
+	      eprime = phi_translate (expr, ANTIC_IN (block), NULL, pred);
 
 	      /* eprime will generally only be NULL if the
 		 value of the expression, translated
@@ -3315,8 +3333,7 @@ do_pre_partial_partial_insertion (basic_
 	      gcc_assert (!(pred->flags & EDGE_FAKE));
 	      bprime = pred->src;
 	      eprime = phi_translate (expr, ANTIC_IN (block),
-				      PA_IN (block),
-				      bprime, block);
+				      PA_IN (block), pred);
 
 	      /* eprime will generally only be NULL if the
 		 value of the expression, translated
Index: gcc/testsuite/gcc.dg/torture/pr84427.c
===================================================================
--- gcc/testsuite/gcc.dg/torture/pr84427.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/torture/pr84427.c	(working copy)
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+
+short a, d, e;
+unsigned char b;
+int c, f;
+char g, h;
+void fn2(int, int);
+void fn1() { fn2(e, a); }
+void fn2(int p1, int p2)
+{
+l1:
+  b = a;
+  for (; h; h--)
+    if (p1)
+      g = p2 * c;
+    else
+      {
+	c = d;
+	if (f)
+	  goto l1;
+      }
+}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]