[PATCH v2] bb-reorder: Improve compgotos pass (PR71785)

Segher Boessenkool segher@kernel.crashing.org
Tue Nov 1 16:27:00 GMT 2016


For code like the testcase in PR71785 GCC factors all the indirect branches
to a single dispatcher that then everything jumps to.  This is because
having many indirect branches with each many jump targets does not scale
in large parts of the compiler.  Very late in the pass pipeline (right
before peephole2) the indirect branches are then unfactored again, by
the duplicate_computed_gotos pass.

This pass works by replacing branches to such a common dispatcher by a
copy of the dispatcher.  For code like this testcase this does not work
so well: most cases do a single addition instruction right before the
dispatcher, but not all, and we end up with only two indirect jumps: the
one without the addition, and the one with the addition in its own basic
block, and now everything else jumps _there_.

This patch solves this problem by simply running the core of the
duplicate_computed_gotos pass again, as long as it does any work.  The
patch looks much bigger than it is, because I factored out two routines
to simplify the control flow.

Tested on powerpc64-linux {-m32,-m64}, and on the testcase, and on a version
of the testcase that has 2000 cases instead of 4.  Is this okay for trunk?


Segher


2016-10-30  Segher Boessenkool  <segher@kernel.crashing.org>

	PR rtl-optimization/71785
	* bb-reorder.c (duplicate_computed_gotos_find_candidates): New
	function, factored out from pass_duplicate_computed_gotos::execute.
	(duplicate_computed_gotos_do_duplicate): Ditto.  Don't use BB_VISITED.
	(pass_duplicate_computed_gotos::execute): Rewrite.  Rerun the pass as
	long as it makes changes.

---
 gcc/bb-reorder.c | 219 +++++++++++++++++++++++++++++++------------------------
 1 file changed, 123 insertions(+), 96 deletions(-)

diff --git a/gcc/bb-reorder.c b/gcc/bb-reorder.c
index 85bc569..f93d684 100644
--- a/gcc/bb-reorder.c
+++ b/gcc/bb-reorder.c
@@ -2593,6 +2593,102 @@ make_pass_reorder_blocks (gcc::context *ctxt)
    which can seriously pessimize code with many computed jumps in the source
    code, such as interpreters.  See e.g. PR15242.  */
 
+/* Look for blocks that end in a computed jump in function FUN, and see if
+   such blocks are suitable for unfactoring.  If a block is a candidate for
+   unfactoring, mark it in the CANDIDATES.  A block bigger than MAX_SIZE is
+   not suitable.  */
+static void
+duplicate_computed_gotos_find_candidates (function *fun, bitmap candidates,
+					  int max_size)
+{
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, fun)
+    {
+      /* Obviously the block has to end in a computed jump.  */
+      if (!computed_jump_p (BB_END (bb)))
+	continue;
+
+      /* Only consider blocks that can be duplicated.  */
+      if (CROSSING_JUMP_P (BB_END (bb))
+	  || !can_duplicate_block_p (bb))
+	continue;
+
+      /* Make sure that the block is small enough.  */
+      int size = 0;
+      rtx_insn *insn;
+      FOR_BB_INSNS (bb, insn)
+	if (INSN_P (insn))
+	  {
+	    size += get_attr_min_length (insn);
+	    if (size > max_size)
+	       break;
+	  }
+      if (size > max_size)
+	continue;
+
+      /* Final check: there must not be any incoming abnormal edges.  */
+      int all_flags = 0;
+      edge e;
+      edge_iterator ei;
+      FOR_EACH_EDGE (e, ei, bb->preds)
+	all_flags |= e->flags;
+      if (all_flags & EDGE_COMPLEX)
+	continue;
+
+      bitmap_set_bit (candidates, bb->index);
+    }
+}
+
+/* For every jump in FUN to a block in CANDIDATES try to unfactor that block
+   (i.e. duplicate it and jump to the copy instead).  Return whether any
+   change is made.  */
+static bool
+duplicate_computed_gotos_do_duplicate (function *fun, bitmap candidates)
+{
+  bool changed = false;
+
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, fun)
+    {
+      /* BB must not already be a copy.  We cannot access CANDIDATES with
+	 its index if it is.  */
+      if (get_bb_original (bb))
+	continue;
+
+      /* BB must have one outgoing edge.  That edge must not lead to
+	 the exit block or the next block.
+	 The destination must have more than one predecessor.  */
+      if (!single_succ_p (bb)
+	  || single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (fun)
+	  || single_succ (bb) == bb->next_bb
+	  || single_pred_p (single_succ (bb)))
+	continue;
+
+      /* The successor block has to be a duplication candidate.  */
+      if (!bitmap_bit_p (candidates, single_succ (bb)->index))
+	continue;
+
+      /* Don't duplicate a partition crossing edge, which requires difficult
+         fixup.  */
+      if (JUMP_P (BB_END (bb)) && CROSSING_JUMP_P (BB_END (bb)))
+	continue;
+
+      basic_block new_bb = duplicate_block (single_succ (bb),
+					    single_succ_edge (bb), bb);
+      new_bb->aux = bb->aux;
+      bb->aux = new_bb;
+      changed = true;
+    }
+
+  /* Duplicating blocks above will redirect edges and may cause hot
+     blocks previously reached by both hot and cold blocks to become
+     dominated only by cold blocks.  */
+  if (changed)
+    fixup_partitions ();
+
+  return changed;
+}
+
 namespace {
 
 const pass_data pass_data_duplicate_computed_gotos =
@@ -2634,125 +2730,56 @@ pass_duplicate_computed_gotos::gate (function *fun)
 unsigned int
 pass_duplicate_computed_gotos::execute (function *fun)
 {
-  basic_block bb, new_bb;
-  bitmap candidates;
-  int max_size;
-  bool changed = false;
-
   if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
     return 0;
 
-  clear_bb_flags ();
-  cfg_layout_initialize (0);
-
   /* We are estimating the length of uncond jump insn only once
      since the code for getting the insn length always returns
      the minimal length now.  */
   if (uncond_jump_length == 0)
     uncond_jump_length = get_uncond_jump_length ();
 
-  max_size
+  int max_size
     = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
-  candidates = BITMAP_ALLOC (NULL);
 
-  /* Look for blocks that end in a computed jump, and see if such blocks
-     are suitable for unfactoring.  If a block is a candidate for unfactoring,
-     mark it in the candidates.  */
-  FOR_EACH_BB_FN (bb, fun)
+  bitmap candidates = BITMAP_ALLOC (NULL);
+
+  cfg_layout_initialize (0);
+
+  int tries = n_basic_blocks_for_fn (fun);
+  for (;;)
     {
-      rtx_insn *insn;
-      edge e;
-      edge_iterator ei;
-      int size, all_flags;
+      /* Make sure we don't iterate infinitely, that would be a bug.  */
+      tries--;
+      gcc_assert (tries != 0);
 
-      /* Build the reorder chain for the original order of blocks.  */
-      if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
-	bb->aux = bb->next_bb;
+      duplicate_computed_gotos_find_candidates (fun, candidates, max_size);
 
-      /* Obviously the block has to end in a computed jump.  */
-      if (!computed_jump_p (BB_END (bb)))
-	continue;
+      basic_block bb;
+      FOR_EACH_BB_FN (bb, fun)
+	{
+	  /* Build the reorder chain for the original order of blocks.  */
+	  if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
+	    bb->aux = bb->next_bb;
+	}
 
-      /* Only consider blocks that can be duplicated.  */
-      if (CROSSING_JUMP_P (BB_END (bb))
-	  || !can_duplicate_block_p (bb))
-	continue;
+      if (bitmap_empty_p (candidates))
+	break;
 
-      /* Make sure that the block is small enough.  */
-      size = 0;
-      FOR_BB_INSNS (bb, insn)
-	if (INSN_P (insn))
-	  {
-	    size += get_attr_min_length (insn);
-	    if (size > max_size)
-	       break;
-	  }
-      if (size > max_size)
-	continue;
+      bool changed = duplicate_computed_gotos_do_duplicate (fun, candidates);
+      if (!changed)
+	break;
 
-      /* Final check: there must not be any incoming abnormal edges.  */
-      all_flags = 0;
-      FOR_EACH_EDGE (e, ei, bb->preds)
-	all_flags |= e->flags;
-      if (all_flags & EDGE_COMPLEX)
-	continue;
-
-      bitmap_set_bit (candidates, bb->index);
-    }
-
-  /* Nothing to do if there is no computed jump here.  */
-  if (bitmap_empty_p (candidates))
-    goto done;
-
-  /* Duplicate computed gotos.  */
-  FOR_EACH_BB_FN (bb, fun)
-    {
-      if (bb->flags & BB_VISITED)
-	continue;
-
-      bb->flags |= BB_VISITED;
-
-      /* BB must have one outgoing edge.  That edge must not lead to
-	 the exit block or the next block.
-	 The destination must have more than one predecessor.  */
-      if (!single_succ_p (bb)
-	  || single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (fun)
-	  || single_succ (bb) == bb->next_bb
-	  || single_pred_p (single_succ (bb)))
-	continue;
-
-      /* The successor block has to be a duplication candidate.  */
-      if (!bitmap_bit_p (candidates, single_succ (bb)->index))
-	continue;
-
-      /* Don't duplicate a partition crossing edge, which requires difficult
-         fixup.  */
-      if (JUMP_P (BB_END (bb)) && CROSSING_JUMP_P (BB_END (bb)))
-	continue;
-
-      new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
-      new_bb->aux = bb->aux;
-      bb->aux = new_bb;
-      new_bb->flags |= BB_VISITED;
-      changed = true;
-    }
-
- done:
-  if (changed)
-    {
-      /* Duplicating blocks above will redirect edges and may cause hot
-	 blocks previously reached by both hot and cold blocks to become
-	 dominated only by cold blocks.  */
-      fixup_partitions ();
+      relink_block_chain (true);
 
       /* Merge the duplicated blocks into predecessors, when possible.  */
-      cfg_layout_finalize ();
       cleanup_cfg (0);
     }
-  else
-    cfg_layout_finalize ();
+
+  cfg_layout_finalize ();
 
   BITMAP_FREE (candidates);
+
   return 0;
 }
 
-- 
1.9.3



More information about the Gcc-patches mailing list