This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch] Fix PR41783, PRE improvement


Hi,

I noticed this while hunting some zeusmp regression which was hidden by 
our maximal set, that Richi finally removed.  Old bugs in PRE prevented us 
from reusing the same value ID when we PHI-translated a vuse on references 
(documented by the xfail on tree-ssa/ssa-pre-24.c), resulting in "new" 
expressions which weren't in the maximal set before.  Because of that this 
never was much of an issue because it only meant missed optimizations.  
Now, without the maximal set it becomes an issue because the new 
expressions are sometimes thought to be partially redundant and hence 
inserted in quite stupid places (like in the testcase for this PR, into 
the inner loop itself).

Meanwhile the unrelated other bugs are fixed, so we can reuse the same 
value ID in some circumstances (in particular if we find an outer 
dominating vuse for the vuse we are returning), fixing the immediate bug.  
This also fixes ssa-pre-24, improves ssa-pre-23.  By code inspection I 
also created ssa-pre-26 that needs the hunk in compute_avail now (allowing 
to talk vuses when looking up references).

Regstrapped on x86_64-linux (all langs+Ada), no regressions.  Okay for 
trunk?


Ciao,
Michael.
-- 
	PR tree-optimization/41783
	* tree-ssa-alias.c (get_continuation_for_phi): Export, add a special
	case for simple diamonds
	* tree-ssa-alias.h (get_continuation_for_phi): Declare.
	* tree-ssa-pre.c (translate_vuse_through_block): Add same_valid
	argument, use alias oracle to skip some vdefs.
	(phi_translate_1): Change call to above, don't allocate new
	value ids if they can stay the same.
	(compute_avail): Allow vuse walking when looking up references.

testsuite/
	* gcc.dg/pr41783.c: New test.
	* gcc.dg/tree-ssa/ssa-pre-23.c: Adjust.
	* gcc.dg/tree-ssa/ssa-pre-24.c: Don't xfail anymore.
	* gcc.dg/tree-ssa/ssa-pre-26.c: New test.

Index: tree-ssa-alias.c
===================================================================
--- tree-ssa-alias.c	(Revision 153456)
+++ tree-ssa-alias.c	(Arbeitskopie)
@@ -1303,8 +1303,6 @@ stmt_may_clobber_ref_p (gimple stmt, tre
 }
 
 
-static tree get_continuation_for_phi (gimple, ao_ref *, bitmap *);
-
 /* Walk the virtual use-def chain of VUSE until hitting the virtual operand
    TARGET or a statement clobbering the memory reference REF in which
    case false is returned.  The walk starts with VUSE, one argument of PHI.  */
@@ -1348,7 +1346,7 @@ maybe_skip_until (gimple phi, tree targe
    clobber REF.  Returns NULL_TREE if no suitable virtual operand can
    be found.  */
 
-static tree
+tree
 get_continuation_for_phi (gimple phi, ao_ref *ref, bitmap *visited)
 {
   unsigned nargs = gimple_phi_num_args (phi);
@@ -1365,6 +1363,7 @@ get_continuation_for_phi (gimple phi, ao
       tree arg1 = PHI_ARG_DEF (phi, 1);
       gimple def0 = SSA_NAME_DEF_STMT (arg0);
       gimple def1 = SSA_NAME_DEF_STMT (arg1);
+      tree common_vuse;
 
       if (arg0 == arg1)
 	return arg0;
@@ -1383,6 +1382,26 @@ get_continuation_for_phi (gimple phi, ao
 	  if (maybe_skip_until (phi, arg1, ref, arg0, visited))
 	    return arg1;
 	}
+      /* Special case of a diamond:
+	   MEM_1 = ...
+	   goto (cond) ? L1 : L2
+	   L1: store1 = ...    #MEM_2 = vuse(MEM_1)
+	       goto L3
+	   L2: store2 = ...    #MEM_3 = vuse(MEM_1)
+	   L3: MEM_4 = PHI<MEM_2, MEM_3>
+	 We were called with the PHI at L3, MEM_2 and MEM_3 don't
+	 dominate each other, but still we can easily skip this PHI node
+	 if we recognize that the vuse MEM operand is the same for both,
+	 and that we can skip both statements (they don't clobber us).
+	 This is still linear.  Don't use maybe_skip_until, that might
+	 potentially be slow.  */
+      else if ((common_vuse = gimple_vuse (def0))
+	       && common_vuse == gimple_vuse (def1))
+	{
+	  if (!stmt_may_clobber_ref_p_1 (def0, ref)
+	      && !stmt_may_clobber_ref_p_1 (def1, ref))
+	    return common_vuse;
+	}
     }
 
   return NULL_TREE;
Index: tree-ssa-alias.h
===================================================================
--- tree-ssa-alias.h	(Revision 153456)
+++ tree-ssa-alias.h	(Arbeitskopie)
@@ -99,6 +99,7 @@ extern bool refs_output_dependent_p (tre
 extern bool ref_maybe_used_by_stmt_p (gimple, tree);
 extern bool stmt_may_clobber_ref_p (gimple, tree);
 extern bool stmt_may_clobber_ref_p_1 (gimple, ao_ref *);
+extern tree get_continuation_for_phi (gimple, ao_ref *, bitmap *);
 extern void *walk_non_aliased_vuses (ao_ref *, tree,
 				     void *(*)(ao_ref *, tree, void *),
 				     void *(*)(ao_ref *, tree, void *), void *);
Index: tree-ssa-pre.c
===================================================================
--- tree-ssa-pre.c	(Revision 153456)
+++ tree-ssa-pre.c	(Arbeitskopie)
@@ -1243,43 +1243,74 @@ do_unary:
 }
 
 /* Translate the VUSE backwards through phi nodes in PHIBLOCK, so that
-   it has the value it would have in BLOCK.  */
+   it has the value it would have in BLOCK.  Set *SAME_VALID to true
+   in case the new vuse doesn't change the value id of the OPERANDS.  */
 
 static tree
 translate_vuse_through_block (VEC (vn_reference_op_s, heap) *operands,
 			      alias_set_type set, tree type, tree vuse,
 			      basic_block phiblock,
-			      basic_block block)
+			      basic_block block, bool *same_valid)
 {
   gimple phi = SSA_NAME_DEF_STMT (vuse);
   ao_ref ref;
+  edge e = NULL;
+  bool use_oracle;
+
+  *same_valid = true;
 
   if (gimple_bb (phi) != phiblock)
     return vuse;
 
-  if (gimple_code (phi) == GIMPLE_PHI)
-    {
-      edge e = find_edge (block, phiblock);
-      return PHI_ARG_DEF (phi, e->dest_idx);
-    }
-
-  if (!ao_ref_init_from_vn_reference (&ref, set, type, operands))
-    return NULL_TREE;
+  use_oracle = ao_ref_init_from_vn_reference (&ref, set, type, operands);
 
   /* Use the alias-oracle to find either the PHI node in this block,
      the first VUSE used in this block that is equivalent to vuse or
      the first VUSE which definition in this block kills the value.  */
-  while (!stmt_may_clobber_ref_p_1 (phi, &ref))
+  if (gimple_code (phi) == GIMPLE_PHI)
+    e = find_edge (block, phiblock);
+  else if (use_oracle)
+    while (!stmt_may_clobber_ref_p_1 (phi, &ref))
+      {
+	vuse = gimple_vuse (phi);
+	phi = SSA_NAME_DEF_STMT (vuse);
+	if (gimple_bb (phi) != phiblock)
+	  return vuse;
+	if (gimple_code (phi) == GIMPLE_PHI)
+	  {
+	    e = find_edge (block, phiblock);
+	    break;
+	  }
+      }
+  else
+    return NULL_TREE;
+
+  if (e)
     {
-      vuse = gimple_vuse (phi);
-      phi = SSA_NAME_DEF_STMT (vuse);
-      if (gimple_bb (phi) != phiblock)
-	return vuse;
-      if (gimple_code (phi) == GIMPLE_PHI)
+      if (use_oracle)
 	{
-	  edge e = find_edge (block, phiblock);
-	  return PHI_ARG_DEF (phi, e->dest_idx);
+	  bitmap visited = NULL;
+	  /* Try to find a vuse that dominates this phi node by skipping
+	     non-clobbering statements.  */
+	  vuse = get_continuation_for_phi (phi, &ref, &visited);
+	  if (visited)
+	    BITMAP_FREE (visited);
 	}
+      else
+	vuse = NULL_TREE;
+      if (!vuse)
+	{
+	  /* If we didn't find any, the value ID can't stay the same,
+	     but return the translated vuse.  */
+	  *same_valid = false;
+	  vuse = PHI_ARG_DEF (phi, e->dest_idx);
+	}
+      /* ??? We would like to return vuse here as this is the canonical
+         upmost vdef that this reference is associated with.  But during
+	 insertion of the references into the hash tables we only ever
+	 directly insert with their direct gimple_vuse, hence returning
+	 something else would make us not find the other expression.  */
+      return PHI_ARG_DEF (phi, e->dest_idx);
     }
 
   return NULL_TREE;
@@ -1541,7 +1572,7 @@ phi_translate_1 (pre_expr expr, bitmap_s
 	tree vuse = ref->vuse;
 	tree newvuse = vuse;
 	VEC (vn_reference_op_s, heap) *newoperands = NULL;
-	bool changed = false;
+	bool changed = false, same_valid = true;
 	unsigned int i, j;
 	vn_reference_op_t operand;
 	vn_reference_t newref;
@@ -1647,16 +1678,16 @@ phi_translate_1 (pre_expr expr, bitmap_s
 	  {
 	    newvuse = translate_vuse_through_block (newoperands,
 						    ref->set, ref->type,
-						    vuse, phiblock, pred);
+						    vuse, phiblock, pred,
+						    &same_valid);
 	    if (newvuse == NULL_TREE)
 	      {
 		VEC_free (vn_reference_op_s, heap, newoperands);
 		return NULL;
 	      }
 	  }
-	changed |= newvuse != vuse;
 
-	if (changed)
+	if (changed || newvuse != vuse)
 	  {
 	    unsigned int new_val_id;
 	    pre_expr constant;
@@ -1690,9 +1721,15 @@ phi_translate_1 (pre_expr expr, bitmap_s
 	      }
 	    else
 	      {
-		new_val_id = get_next_value_id ();
-		VEC_safe_grow_cleared (bitmap_set_t, heap, value_expressions,
-				       get_max_value_id() + 1);
+		if (changed || !same_valid)
+		  {
+		    new_val_id = get_next_value_id ();
+		    VEC_safe_grow_cleared (bitmap_set_t, heap,
+					   value_expressions,
+					   get_max_value_id() + 1);
+		  }
+		else
+		  new_val_id = ref->value_id;
 		newref = vn_reference_insert_pieces (newvuse, ref->set,
 						     ref->type,
 						     newoperands,
@@ -3908,7 +3945,7 @@ compute_avail (void)
 
 		      vn_reference_lookup (gimple_assign_rhs1 (stmt),
 					   gimple_vuse (stmt),
-					   false, &ref);
+					   true, &ref);
 		      if (!ref)
 			continue;
 
Index: testsuite/gcc.dg/pr41783.c
===================================================================
--- testsuite/gcc.dg/pr41783.c	(Revision 0)
+++ testsuite/gcc.dg/pr41783.c	(Revision 0)
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-pre" } */
+int db[100];
+int a_global_var, fact;
+int main()
+{
+  int i,j=0;
+  do
+    {
+      for (i=0; i<100; ++i)
+        db[i] = i;
+      fact = a_global_var * i;
+    }
+  while (j++ < 100);
+}
+/* We want to have exactly one load (not two) from a_global_var,
+   and we want that load to be into a PRE temporary.  */
+/* { dg-final { scan-tree-dump-times "= a_global_var;" 1 "pre" } } */
+/* { dg-final { scan-tree-dump "pretmp\[^\\n\]* = a_global_var;" "pre" } } */
+/* { dg-final { cleanup-tree-dump "pre" } } */
Index: testsuite/gcc.dg/tree-ssa/ssa-pre-23.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/ssa-pre-23.c	(Revision 153456)
+++ testsuite/gcc.dg/tree-ssa/ssa-pre-23.c	(Arbeitskopie)
@@ -9,5 +9,5 @@ void foo(int n)
     global.y += global.x*global.x;
 }
 
-/* { dg-final { scan-tree-dump "Eliminated: 2" "pre" } } */
+/* { dg-final { scan-tree-dump "Eliminated: 3" "pre" } } */
 /* { dg-final { cleanup-tree-dump "pre" } } */
Index: testsuite/gcc.dg/tree-ssa/ssa-pre-24.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/ssa-pre-24.c	(Revision 153456)
+++ testsuite/gcc.dg/tree-ssa/ssa-pre-24.c	(Arbeitskopie)
@@ -9,12 +9,7 @@ void foo(int *p, double *x, int n)
 }
 
 /* We should remove the unnecessary insertion of a phi-node and
-   _not_ end up using the phi result for replacement *p.
-   The issue here is that when PHI-translating the virtual operands
-   we assign different value-numbers to the load.  Re-running VN
-   after insertion or trying to be clever and doing this on the
-   fly during PHI translation would solve this.  The next copyprop
-   fixes this anyway.  */
+   _not_ end up using the phi result for replacement *p.  */
 
-/* { dg-final { scan-tree-dump-not "= prephitmp" "pre" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-not "= prephitmp" "pre" } } */
 /* { dg-final { cleanup-tree-dump "pre" } } */
Index: testsuite/gcc.dg/tree-ssa/ssa-pre-26.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/ssa-pre-26.c	(Revision 0)
+++ testsuite/gcc.dg/tree-ssa/ssa-pre-26.c	(Revision 0)
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-pre-stats" } */
+
+int a,b,c;
+int f (int cond)
+{
+  int t;
+  if (cond)
+    t = a, c = 41;
+  else
+    t = b, c = 42;
+  return t + b;
+}
+
+/* We should be able to eliminate the second load of 'b' by inserting
+   a load into the 't = a' block.  This should happen despite the store
+   to 'c' in both arms and the resulting PHI on the .MEM vuse that the
+   second load uses.  */
+
+/* { dg-final { scan-tree-dump "Eliminated: 1" "pre" } } */
+/* { dg-final { cleanup-tree-dump "pre" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]