This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][RFC] Fix CFG cleanup compile-time hog, PR85964


On Tue, 29 May 2018, Richard Biener wrote:

> 
> The following fixes the situation where the initial sweep over the
> CFG to remove trivially dead code regions causes excessive compile-time
> because of using remove_edge_and_dominated_blocks and thus
> iterate_fix_dominators.
> 
> The good thing is that I added cleanup_control_flow_pre doing this
> initial sweep in PRE order.  So we can simply remove the entry edges
> into the dead code regions and use the visited bitmap kept by the PRE
> walk to remove unreachable blocks afterwards.
> 
> For dominators we then re-compute them from scratch which is way faster
> for the testcase (a reduced one gets CFG cleanup time down from
> 19s to 0.16s).  The testcase still runs into a backwards jump-threading
> scalability issue.
> 
> Note the patch will be slower for the case of not many removed edges
> but it looks hard to find a good cut-off upfront.
> 
> Note we unfortunately cannot merge this with the unreachable block
> removal because of the intermediate code to insert loop entry
> forwarders which needs dominators to identify natural loop backedges.
> 
> Any opinions?

This is a less intrusive variant also factoring in the unreachable
block removal at the start of CFG cleanup.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

I'll go ahead with this tomorrow unless there are any comments.

Richard.

2018-06-04  Richard Biener  <rguenther@suse.de>

	* tree-cfgcleanup.c (cleanup_control_flow_pre): For edge
	removal pretend DOM info isn't available so we do not update
	it and only remove edges, not dominated blocks.  Actually free
	DOM info in case we removed something.  Remove unreachable blocks.
	(mfb_keep_latches): Work with either DOM info or marked backedges.
	(cleanup_tree_cfg_noloop): Do not remove unreachable blocks
	first.  Mark backedges if DOM info isn't available.
	(Re-)compute DOM info after cleanup_control_flow_pre.

Index: gcc/tree-cfgcleanup.c
===================================================================
--- gcc/tree-cfgcleanup.c	(revision 261145)
+++ gcc/tree-cfgcleanup.c	(working copy)
@@ -684,8 +684,8 @@ want_merge_blocks_p (basic_block bb1, ba
 }
 
 
-/* Tries to cleanup cfg in basic block BB.  Returns true if anything
-   changes.  */
+/* Tries to cleanup cfg in basic block BB by merging blocks.  Returns
+   true if anything changes.  */
 
 static bool
 cleanup_tree_cfg_bb (basic_block bb)
@@ -725,6 +725,12 @@ cleanup_control_flow_pre ()
 {
   bool retval = false;
 
+  /* We want remove_edge_and_dominated_blocks to only remove edges,
+     not dominated blocks which it does when dom info isn't available.
+     Pretend so.  */
+  dom_state saved_state = dom_info_state (CDI_DOMINATORS);
+  set_dom_info_availability (CDI_DOMINATORS, DOM_NONE);
+
   auto_vec<edge_iterator, 20> stack (n_basic_blocks_for_fn (cfun) + 1);
   auto_sbitmap visited (last_basic_block_for_fn (cfun));
   bitmap_clear (visited);
@@ -741,6 +747,8 @@ cleanup_control_flow_pre ()
 	  && ! bitmap_bit_p (visited, dest->index))
 	{
 	  bitmap_set_bit (visited, dest->index);
+	  /* We only possibly remove edges from DEST here, leaving
+	     possibly unreachable code in the IL.  */
 	  retval |= cleanup_control_flow_bb (dest);
 	  if (EDGE_COUNT (dest->succs) > 0)
 	    stack.quick_push (ei_start (dest->succs));
@@ -754,13 +762,35 @@ cleanup_control_flow_pre ()
 	}
     }
 
+  set_dom_info_availability (CDI_DOMINATORS, saved_state);
+
+  /* We are deleting BBs in non-reverse dominator order, make sure
+     insert_debug_temps_for_defs is prepared for that.  */
+  if (retval)
+    free_dominance_info (CDI_DOMINATORS);
+
+  /* Remove all now (and previously) unreachable blocks.  */
+  for (int i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); ++i)
+    {
+      basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
+      if (bb && !bitmap_bit_p (visited, bb->index))
+	{
+	  if (!retval)
+	    free_dominance_info (CDI_DOMINATORS);
+	  delete_basic_block (bb);
+	  retval = true;
+	}
+    }
+
   return retval;
 }
 
 static bool
 mfb_keep_latches (edge e)
 {
-  return ! dominated_by_p (CDI_DOMINATORS, e->src, e->dest);
+  return !((dom_info_available_p (CDI_DOMINATORS)
+	    && dominated_by_p (CDI_DOMINATORS, e->src, e->dest))
+	   || (e->flags & EDGE_DFS_BACK));
 }
 
 /* Remove unreachable blocks and other miscellaneous clean up work.
@@ -769,23 +799,8 @@ mfb_keep_latches (edge e)
 static bool
 cleanup_tree_cfg_noloop (void)
 {
-  bool changed;
-
   timevar_push (TV_TREE_CLEANUP_CFG);
 
-  /* If dominance information is available, there cannot be any unreachable
-     blocks.  */
-  if (!dom_info_available_p (CDI_DOMINATORS))
-    {
-      changed = delete_unreachable_blocks ();
-      calculate_dominance_info (CDI_DOMINATORS);
-    }
-  else
-    {
-      checking_verify_dominators (CDI_DOMINATORS);
-      changed = false;
-    }
-
   /* Ensure that we have single entries into loop headers.  Otherwise
      if one of the entries is becoming a latch due to CFG cleanup
      (from formerly being part of an irreducible region) then we mess
@@ -795,6 +810,10 @@ cleanup_tree_cfg_noloop (void)
      we need to capture the dominance state before the pending transform.  */
   if (current_loops)
     {
+      /* This needs backedges or dominators.  */
+      if (!dom_info_available_p (CDI_DOMINATORS))
+	mark_dfs_back_edges ();
+
       loop_p loop;
       unsigned i;
       FOR_EACH_VEC_ELT (*get_loops (cfun), i, loop)
@@ -816,7 +835,9 @@ cleanup_tree_cfg_noloop (void)
 		    any_abnormal = true;
 		    break;
 		  }
-		if (dominated_by_p (CDI_DOMINATORS, e->src, bb))
+		if ((dom_info_available_p (CDI_DOMINATORS)
+		     && dominated_by_p (CDI_DOMINATORS, e->src, bb))
+		    || (e->flags & EDGE_DFS_BACK))
 		  {
 		    found_latch = true;
 		    continue;
@@ -850,12 +871,19 @@ cleanup_tree_cfg_noloop (void)
   /* Start by iterating over all basic blocks in PRE order looking for
      edge removal opportunities.  Do this first because incoming SSA form
      may be invalid and we want to avoid performing SSA related tasks such
-     as propgating out a PHI node during BB merging in that state.  */
-  changed |= cleanup_control_flow_pre ();
+     as propgating out a PHI node during BB merging in that state.  This
+     also gets rid of unreachable blocks.  */
+  bool changed = cleanup_control_flow_pre ();
 
   /* After doing the above SSA form should be valid (or an update SSA
      should be required).  */
 
+  /* Compute dominator info which we need for the iterative process below.  */
+  if (!dom_info_available_p (CDI_DOMINATORS))
+    calculate_dominance_info (CDI_DOMINATORS);
+  else
+    checking_verify_dominators (CDI_DOMINATORS);
+
   /* During forwarder block cleanup, we may redirect edges out of
      SWITCH_EXPRs, which can get expensive.  So we want to enable
      recording of edge to CASE_LABEL_EXPR.  */
@@ -884,6 +912,10 @@ cleanup_tree_cfg_noloop (void)
       if (!bb)
 	continue;
 
+      /* BB merging done by cleanup_tree_cfg_bb can end up propagating
+	 out single-argument PHIs which in turn can expose
+	 cleanup_control_flow_bb opportunities so we have to repeat
+	 that here.  */
       changed |= cleanup_control_flow_bb (bb);
       changed |= cleanup_tree_cfg_bb (bb);
     }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]