This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 4/5 ][P1] [PR tree-optimization/71437] Use a dominator order walk rather than random for VRP threading



This is patch #4 in the kit for solving 71437. This turns the random walk of edges for threading from VRP into a dominator walk. It became clear very quickly in that work that DOM and VRP were going to have duplicated code with this change. That duplicated code was refactored and pushed down into the threading module. So the patch looks much larger than it actually is.

As part of using a domwalk for threading in VRP, we also allocate and use an available expression table. Knowing that we always have an expression table allows for some light cleanups in the threading code.

The setup/teardown of data structures for threading gets cleaned up ever so slightly here as well.


Bootstrapped and regression tested on top of patches #1-#3 of this kit on x86-64-linux-gnu and ppc64le-linux-gnu. I'll be installing it momentarily.

Jeff

	PR tree-optimization/71437
	* tree-ssa-dom.c (dom_opt_dom_walker): Remove thread_across_edge
	member function.  Implementation moved into after_dom_children
	member function and into the threader's thread_outgoing_edges
	function.
	(dom_opt_dom_walker::after_dom_children): Simplify by moving
	some code into new thread_outgoing_edges.
	* tree-ssa-threadedge.c (thread_across_edge): Make static and simplify
	definition.  Simplify marker handling (do it here).   Assume we always
	have the available expression and the const/copies tables.
	(thread_outgoing_edges): New function extracted from tree-ssa-dom.c
	and tree-vrp.c
	* tree-ssa-threadedge.h (thread_outgoing_edges): Declare.
	* tree-vrp.c (equiv_stack): No longer file scoped.
	(vrp_dom_walker): New class.
	(vrp_dom_walker::before_dom_children): New member function.
	(vrp_dom_walker::after_dom_children): Likewise.
	(identify_jump_threads):  Setup domwalker.  Use it rather than
	walking edges in a random order by hand.  Simplify setup/finalization.
	(finalize_jump_threads): Remove.
	(vrp_finalize): Do not call identify_jump_threads here.
	(execute_vrp): Do it here instead and call thread_through_all_blocks
	here too.
	
	

diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c
index 32468e9..c6ffc38 100644
--- a/gcc/tree-ssa-dom.c
+++ b/gcc/tree-ssa-dom.c
@@ -350,7 +350,6 @@ public:
   virtual void after_dom_children (basic_block);
 
 private:
-  void thread_across_edge (edge);
 
   /* Unwindable equivalences, both const/copy and expression varieties.  */
   class const_and_copies *m_const_and_copies;
@@ -816,39 +815,6 @@ record_temporary_equivalences (edge e,
     }
 }
 
-/* Wrapper for common code to attempt to thread an edge.  For example,
-   it handles lazily building the dummy condition and the bookkeeping
-   when jump threading is successful.  */
-
-void
-dom_opt_dom_walker::thread_across_edge (edge e)
-{
-  if (! m_dummy_cond)
-    m_dummy_cond =
-        gimple_build_cond (NE_EXPR,
-                           integer_zero_node, integer_zero_node,
-                           NULL, NULL);
-
-  /* Push a marker on both stacks so we can unwind the tables back to their
-     current state.  */
-  m_avail_exprs_stack->push_marker ();
-  m_const_and_copies->push_marker ();
-
-  /* With all the edge equivalences in the tables, go ahead and attempt
-     to thread through E->dest.  */
-  ::thread_across_edge (m_dummy_cond, e,
-		        m_const_and_copies, m_avail_exprs_stack,
-		        simplify_stmt_for_jump_threading);
-
-  /* And restore the various tables to their state before
-     we threaded this edge.
-
-     XXX The code in tree-ssa-threadedge.c will restore the state of
-     the const_and_copies table.  We we just have to restore the expression
-     table.  */
-  m_avail_exprs_stack->pop_to_marker ();
-}
-
 /* PHI nodes can create equivalences too.
 
    Ignoring any alternatives which are the same as the result, if
@@ -1224,38 +1190,13 @@ dom_opt_dom_walker::before_dom_children (basic_block bb)
 void
 dom_opt_dom_walker::after_dom_children (basic_block bb)
 {
-  gimple *last;
-
-  /* If we have an outgoing edge to a block with multiple incoming and
-     outgoing edges, then we may be able to thread the edge, i.e., we
-     may be able to statically determine which of the outgoing edges
-     will be traversed when the incoming edge from BB is traversed.  */
-  if (single_succ_p (bb)
-      && (single_succ_edge (bb)->flags & EDGE_ABNORMAL) == 0
-      && potentially_threadable_block (single_succ (bb)))
-    {
-      thread_across_edge (single_succ_edge (bb));
-    }
-  else if ((last = last_stmt (bb))
-	   && gimple_code (last) == GIMPLE_COND
-	   && EDGE_COUNT (bb->succs) == 2
-	   && (EDGE_SUCC (bb, 0)->flags & EDGE_ABNORMAL) == 0
-	   && (EDGE_SUCC (bb, 1)->flags & EDGE_ABNORMAL) == 0)
-    {
-      edge true_edge, false_edge;
-
-      extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
-
-      /* Only try to thread the edge if it reaches a target block with
-	 more than one predecessor and more than one successor.  */
-      if (potentially_threadable_block (true_edge->dest))
-	thread_across_edge (true_edge);
-
-      /* Similarly for the ELSE arm.  */
-      if (potentially_threadable_block (false_edge->dest))
-	thread_across_edge (false_edge);
+  if (! m_dummy_cond)
+    m_dummy_cond = gimple_build_cond (NE_EXPR, integer_zero_node,
+				      integer_zero_node, NULL, NULL);
 
-    }
+  thread_outgoing_edges (bb, m_dummy_cond, m_const_and_copies,
+			 m_avail_exprs_stack,
+			 simplify_stmt_for_jump_threading);
 
   /* These remove expressions local to BB from the tables.  */
   m_avail_exprs_stack->pop_to_marker ();
diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
index 7f70b40..536c471 100644
--- a/gcc/tree-ssa-threadedge.c
+++ b/gcc/tree-ssa-threadedge.c
@@ -1100,16 +1100,18 @@ thread_through_normal_block (edge e,
 
    SIMPLIFY is a pass-specific function used to simplify statements.  */
 
-void
+static void
 thread_across_edge (gcond *dummy_cond,
 		    edge e,
 		    class const_and_copies *const_and_copies,
 		    class avail_exprs_stack *avail_exprs_stack,
-		    tree (*simplify) (gimple *, gimple *,
-				      class avail_exprs_stack *, basic_block))
+		    pfn_simplify simplify)
 {
   bitmap visited = BITMAP_ALLOC (NULL);
 
+  const_and_copies->push_marker ();
+  avail_exprs_stack->push_marker ();
+
   stmt_count = 0;
 
   vec<jump_thread_edge *> *path = new vec<jump_thread_edge *> ();
@@ -1132,6 +1134,7 @@ thread_across_edge (gcond *dummy_cond,
       propagate_threaded_block_debug_into (path->last ()->e->dest,
 					   e->dest);
       const_and_copies->pop_to_marker ();
+      avail_exprs_stack->pop_to_marker ();
       BITMAP_FREE (visited);
       register_jump_thread (path);
       return;
@@ -1156,6 +1159,7 @@ thread_across_edge (gcond *dummy_cond,
 	{
 	  BITMAP_FREE (visited);
 	  const_and_copies->pop_to_marker ();
+          avail_exprs_stack->pop_to_marker ();
 	  return;
 	}
     }
@@ -1182,6 +1186,7 @@ thread_across_edge (gcond *dummy_cond,
       if (taken_edge->flags & EDGE_ABNORMAL)
 	{
 	  const_and_copies->pop_to_marker ();
+          avail_exprs_stack->pop_to_marker ();
 	  BITMAP_FREE (visited);
 	  return;
 	}
@@ -1196,8 +1201,7 @@ thread_across_edge (gcond *dummy_cond,
 	/* Push a fresh marker so we can unwind the equivalences created
 	   for each of E->dest's successors.  */
 	const_and_copies->push_marker ();
-	if (avail_exprs_stack)
-	  avail_exprs_stack->push_marker ();
+	avail_exprs_stack->push_marker ();
 
 	/* Avoid threading to any block we have already visited.  */
 	bitmap_clear (visited);
@@ -1240,12 +1244,71 @@ thread_across_edge (gcond *dummy_cond,
 	  delete_jump_thread_path (path);
 
 	/* And unwind the equivalence table.  */
-	if (avail_exprs_stack)
-	  avail_exprs_stack->pop_to_marker ();
+	avail_exprs_stack->pop_to_marker ();
 	const_and_copies->pop_to_marker ();
       }
     BITMAP_FREE (visited);
   }
 
   const_and_copies->pop_to_marker ();
+  avail_exprs_stack->pop_to_marker ();
+}
+
+/* Examine the outgoing edges from BB and conditionally
+   try to thread them.
+
+   DUMMY_COND is a shared cond_expr used by condition simplification as scratch,
+   to avoid allocating memory.
+
+   CONST_AND_COPIES is used to undo temporary equivalences created during the
+   walk of E->dest.
+
+   The available expression table is referenced vai AVAIL_EXPRS_STACK.
+
+   SIMPLIFY is a pass-specific function used to simplify statements.  */
+
+void
+thread_outgoing_edges (basic_block bb, gcond *dummy_cond,
+		       class const_and_copies *const_and_copies,
+		       class avail_exprs_stack *avail_exprs_stack,
+		       tree (*simplify) (gimple *, gimple *,
+					 class avail_exprs_stack *,
+					 basic_block))
+{
+  int flags = (EDGE_IGNORE | EDGE_COMPLEX | EDGE_ABNORMAL);
+  gimple *last;
+
+  /* If we have an outgoing edge to a block with multiple incoming and
+     outgoing edges, then we may be able to thread the edge, i.e., we
+     may be able to statically determine which of the outgoing edges
+     will be traversed when the incoming edge from BB is traversed.  */
+  if (single_succ_p (bb)
+      && (single_succ_edge (bb)->flags & flags) == 0
+      && potentially_threadable_block (single_succ (bb)))
+    {
+      thread_across_edge (dummy_cond, single_succ_edge (bb),
+			  const_and_copies, avail_exprs_stack,
+			  simplify);
+    }
+  else if ((last = last_stmt (bb))
+	   && gimple_code (last) == GIMPLE_COND
+	   && EDGE_COUNT (bb->succs) == 2
+	   && (EDGE_SUCC (bb, 0)->flags & flags) == 0
+	   && (EDGE_SUCC (bb, 1)->flags & flags) == 0)
+    {
+      edge true_edge, false_edge;
+
+      extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
+
+      /* Only try to thread the edge if it reaches a target block with
+	 more than one predecessor and more than one successor.  */
+      if (potentially_threadable_block (true_edge->dest))
+	thread_across_edge (dummy_cond, true_edge,
+			    const_and_copies, avail_exprs_stack, simplify);
+
+      /* Similarly for the ELSE arm.  */
+      if (potentially_threadable_block (false_edge->dest))
+	thread_across_edge (dummy_cond, false_edge,
+			    const_and_copies, avail_exprs_stack, simplify);
+    }
 }
diff --git a/gcc/tree-ssa-threadedge.h b/gcc/tree-ssa-threadedge.h
index 3516f14..49dfa9c 100644
--- a/gcc/tree-ssa-threadedge.h
+++ b/gcc/tree-ssa-threadedge.h
@@ -30,10 +30,10 @@ extern void threadedge_initialize_values (void);
 extern void threadedge_finalize_values (void);
 extern bool potentially_threadable_block (basic_block);
 extern void propagate_threaded_block_debug_into (basic_block, basic_block);
-extern void thread_across_edge (gcond *, edge,
-				const_and_copies *,
-				avail_exprs_stack *,
-				tree (*) (gimple *, gimple *,
-					  avail_exprs_stack *, basic_block));
+extern void thread_outgoing_edges (basic_block, gcond *,
+				   const_and_copies *,
+				   avail_exprs_stack *,
+				   tree (*) (gimple *, gimple *,
+					     avail_exprs_stack *, basic_block));
 
 #endif /* GCC_TREE_SSA_THREADEDGE_H */
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 82ef742..4a09a57 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -10746,9 +10746,6 @@ vrp_fold_stmt (gimple_stmt_iterator *si)
   return simplify_stmt_using_ranges (si);
 }
 
-/* Unwindable const/copy equivalences.  */
-const_and_copies *equiv_stack;
-
 /* Return the LHS of any ASSERT_EXPR where OP appears as the first
    argument to the ASSERT_EXPR and in which the ASSERT_EXPR dominates
    BB.  If no such ASSERT_EXPR is found, return OP.  */
@@ -10879,6 +10876,74 @@ simplify_stmt_for_jump_threading (gimple *stmt, gimple *within_stmt,
   return NULL_TREE;
 }
 
+class vrp_dom_walker : public dom_walker
+{
+public:
+  vrp_dom_walker (cdi_direction direction,
+		  class const_and_copies *const_and_copies,
+		  class avail_exprs_stack *avail_exprs_stack)
+    : dom_walker (direction, true),
+      m_const_and_copies (const_and_copies),
+      m_avail_exprs_stack (avail_exprs_stack),
+      m_dummy_cond (NULL) {}
+
+  virtual edge before_dom_children (basic_block);
+  virtual void after_dom_children (basic_block);
+
+private:
+  class const_and_copies *m_const_and_copies;
+  class avail_exprs_stack *m_avail_exprs_stack;
+
+  gcond *m_dummy_cond;
+};
+
+/* Called before processing dominator children of BB.  We want to look
+   at ASSERT_EXPRs and record information from them in the appropriate
+   tables.
+
+   We could look at other statements here.  It's not seen as likely
+   to significantly increase the jump threads we discover.  */
+
+edge
+vrp_dom_walker::before_dom_children (basic_block bb)
+{
+  gimple_stmt_iterator gsi;
+
+  for (gsi = gsi_start_nondebug_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple *stmt = gsi_stmt (gsi);
+      if (gimple_assign_single_p (stmt)
+         && TREE_CODE (gimple_assign_rhs1 (stmt)) == ASSERT_EXPR)
+	{
+	  tree lhs = gimple_assign_lhs (stmt);
+	  tree rhs1 = gimple_assign_rhs1 (stmt);
+	  m_const_and_copies->record_const_or_copy (lhs,
+						    TREE_OPERAND (rhs1, 0));
+	  continue;
+	}
+      break;
+    }
+  return NULL;
+}
+
+/* Called after processing dominator children of BB.  This is where we
+   actually call into the threader.  */
+void
+vrp_dom_walker::after_dom_children (basic_block bb)
+{
+  if (!m_dummy_cond)
+    m_dummy_cond = gimple_build_cond (NE_EXPR,
+				      integer_zero_node, integer_zero_node,
+				      NULL, NULL);
+
+  thread_outgoing_edges (bb, m_dummy_cond, m_const_and_copies,
+			 m_avail_exprs_stack,
+			 simplify_stmt_for_jump_threading);
+
+  m_avail_exprs_stack->pop_to_marker ();
+  m_const_and_copies->pop_to_marker ();
+}
+
 /* Blocks which have more than one predecessor and more than
    one successor present jump threading opportunities, i.e.,
    when the block is reached from a specific predecessor, we
@@ -10902,8 +10967,6 @@ simplify_stmt_for_jump_threading (gimple *stmt, gimple *within_stmt,
 static void
 identify_jump_threads (void)
 {
-  basic_block bb;
-  gcond *dummy;
   int i;
   edge e;
 
@@ -10926,69 +10989,15 @@ identify_jump_threads (void)
 
   /* Allocate our unwinder stack to unwind any temporary equivalences
      that might be recorded.  */
-  equiv_stack = new const_and_copies ();
-
-  /* To avoid lots of silly node creation, we create a single
-     conditional and just modify it in-place when attempting to
-     thread jumps.  */
-  dummy = gimple_build_cond (EQ_EXPR,
-			     integer_zero_node, integer_zero_node,
-			     NULL, NULL);
-
-  /* Walk through all the blocks finding those which present a
-     potential jump threading opportunity.  We could set this up
-     as a dominator walker and record data during the walk, but
-     I doubt it's worth the effort for the classes of jump
-     threading opportunities we are trying to identify at this
-     point in compilation.  */
-  FOR_EACH_BB_FN (bb, cfun)
-    {
-      gimple *last;
+  const_and_copies *equiv_stack = new const_and_copies ();
 
-      /* If the generic jump threading code does not find this block
-	 interesting, then there is nothing to do.  */
-      if (! potentially_threadable_block (bb))
-	continue;
+  hash_table<expr_elt_hasher> *avail_exprs
+    = new hash_table<expr_elt_hasher> (1024);
+  avail_exprs_stack *avail_exprs_stack
+    = new class avail_exprs_stack (avail_exprs);
 
-      last = last_stmt (bb);
-
-      /* We're basically looking for a switch or any kind of conditional with
-	 integral or pointer type arguments.  Note the type of the second
-	 argument will be the same as the first argument, so no need to
-	 check it explicitly. 
-
-	 We also handle the case where there are no statements in the
-	 block.  This come up with forwarder blocks that are not
-	 optimized away because they lead to a loop header.  But we do
-	 want to thread through them as we can sometimes thread to the
-	 loop exit which is obviously profitable.  */
-      if (!last
-	  || gimple_code (last) == GIMPLE_SWITCH
-	  || (gimple_code (last) == GIMPLE_COND
-      	      && TREE_CODE (gimple_cond_lhs (last)) == SSA_NAME
-	      && (INTEGRAL_TYPE_P (TREE_TYPE (gimple_cond_lhs (last)))
-		  || POINTER_TYPE_P (TREE_TYPE (gimple_cond_lhs (last))))
-	      && (TREE_CODE (gimple_cond_rhs (last)) == SSA_NAME
-		  || is_gimple_min_invariant (gimple_cond_rhs (last)))))
-	{
-	  edge_iterator ei;
-
-	  /* We've got a block with multiple predecessors and multiple
-	     successors which also ends in a suitable conditional or
-	     switch statement.  For each predecessor, see if we can thread
-	     it to a specific successor.  */
-	  FOR_EACH_EDGE (e, ei, bb->preds)
-	    {
-	      /* Do not thread across edges marked to ignoreor abnormal
-		 edges in the CFG.  */
-	      if (e->flags & (EDGE_IGNORE | EDGE_COMPLEX))
-		continue;
-
-	      thread_across_edge (dummy, e, equiv_stack, NULL,
-				  simplify_stmt_for_jump_threading);
-	    }
-	}
-    }
+  vrp_dom_walker walker (CDI_DOMINATORS, equiv_stack, avail_exprs_stack);
+  walker.walk (cfun->cfg->x_entry_block_ptr);
 
   /* Clear EDGE_IGNORE.  */
   FOR_EACH_VEC_ELT (to_remove_edges, i, e)
@@ -10997,19 +11006,8 @@ identify_jump_threads (void)
   /* We do not actually update the CFG or SSA graphs at this point as
      ASSERT_EXPRs are still in the IL and cfg cleanup code does not yet
      handle ASSERT_EXPRs gracefully.  */
-}
-
-/* We identified all the jump threading opportunities earlier, but could
-   not transform the CFG at that time.  This routine transforms the
-   CFG and arranges for the dominator tree to be rebuilt if necessary.
-
-   Note the SSA graph update will occur during the normal TODO
-   processing by the pass manager.  */
-static void
-finalize_jump_threads (void)
-{
-  thread_through_all_blocks (false);
   delete equiv_stack;
+  delete avail_exprs_stack;
 }
 
 /* Free VRP lattice.  */
@@ -11075,10 +11073,6 @@ vrp_finalize (bool warn_array_bounds_p)
 
   if (warn_array_bounds && warn_array_bounds_p)
     check_all_array_refs ();
-
-  /* We must identify jump threading opportunities before we release
-     the datastructures built by VRP.  */
-  identify_jump_threads ();
 }
 
 /* evrp_dom_walker visits the basic blocks in the dominance order and set
@@ -11670,6 +11664,11 @@ execute_vrp (bool warn_array_bounds_p)
   vrp_initialize ();
   ssa_propagate (vrp_visit_stmt, vrp_visit_phi_node);
   vrp_finalize (warn_array_bounds_p);
+
+  /* We must identify jump threading opportunities before we release
+     the datastructures built by VRP.  */
+  identify_jump_threads ();
+
   vrp_free_lattice ();
 
   free_numbers_of_iterations_estimates (cfun);
@@ -11686,7 +11685,13 @@ execute_vrp (bool warn_array_bounds_p)
      duplication and CFG manipulation.  */
   update_ssa (TODO_update_ssa);
 
-  finalize_jump_threads ();
+  /* We identified all the jump threading opportunities earlier, but could
+     not transform the CFG at that time.  This routine transforms the
+     CFG and arranges for the dominator tree to be rebuilt if necessary.
+
+     Note the SSA graph update will occur during the normal TODO
+     processing by the pass manager.  */
+  thread_through_all_blocks (false);
 
   /* Remove dead edges from SWITCH_EXPR optimization.  This leaves the
      CFG in a broken state and requires a cfg_cleanup run.  */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]