PowerPC shrink-wrap support 3 of 3

Alan Modra amodra@gmail.com
Wed Oct 26 13:03:00 GMT 2011


On Sun, Oct 16, 2011 at 02:51:01PM -0400, David Edelsohn wrote:
> The patch is okay, although I am not thrilled about the need to change
> the register allocation order.

Committed revision 180522.  It turns out that shrink-wrapping isn't as
effective as it used to be with the 20110915 based sources I was using
originally.  povray Ray_In_Bound no longer gets the benefit of shrink
wrap, likely due to some cfg optimization.  We end up with a simple
block that just does r3=1 then jumps to last_bb being reached from
blocks that need a prologue as well as blocks that don't.  That's
enough to kill our current shrink wrap implementation.  What we need
is something to duplicate these tail blocks..

Patch here for comment.  I haven't yet run benchmarks, but this passes
bootstrap and regression test (on rev 180286, current virgin mainline
fails bootstrap on powerpc-linux).

	* function.c (thread_prologue_and_epilogue_insns): Delete
	shadowing variables.  Don't do prologue register clobber tests
	when shrink wrapping already failed.  Compute tail block
	candidates for duplicating exit path.  Remove these from
	antic set.  Duplicate tails when reached from both blocks
	needing a prologue/epilogue and blocks not needing such.

Index: gcc/function.c
===================================================================
--- gcc/function.c	(revision 180467)
+++ gcc/function.c	(working copy)
@@ -5697,11 +5697,11 @@ thread_prologue_and_epilogue_insns (void
       HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge;
       HARD_REG_SET set_up_by_prologue;
       rtx p_insn;
-
       VEC(basic_block, heap) *vec;
       basic_block bb;
       bitmap_head bb_antic_flags;
       bitmap_head bb_on_list;
+      bitmap_head bb_tail;
 
       if (dump_file)
 	fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
@@ -5732,6 +5732,7 @@ thread_prologue_and_epilogue_insns (void
 
       bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack);
       bitmap_initialize (&bb_on_list, &bitmap_default_obstack);
+      bitmap_initialize (&bb_tail, &bitmap_default_obstack);
 
       /* Find the set of basic blocks that require a stack frame.  */
 
@@ -5774,19 +5775,22 @@ thread_prologue_and_epilogue_insns (void
 		}
 	}
 
+      /* Save a copy of blocks that really need a prologue.  */
+      bitmap_copy (&bb_antic_flags, &bb_flags);
+
       /* For every basic block that needs a prologue, mark all blocks
 	 reachable from it, so as to ensure they are also seen as
 	 requiring a prologue.  */
       while (!VEC_empty (basic_block, vec))
 	{
 	  basic_block tmp_bb = VEC_pop (basic_block, vec);
-	  edge e;
-	  edge_iterator ei;
+
 	  FOR_EACH_EDGE (e, ei, tmp_bb->succs)
 	    if (e->dest != EXIT_BLOCK_PTR
 		&& bitmap_set_bit (&bb_flags, e->dest->index))
 	      VEC_quick_push (basic_block, vec, e->dest);
 	}
+
       /* If the last basic block contains only a label, we'll be able
 	 to convert jumps to it to (potentially conditional) return
 	 insns later.  This means we don't necessarily need a prologue
@@ -5799,14 +5803,29 @@ thread_prologue_and_epilogue_insns (void
 	    goto fail_shrinkwrap;
 	}
 
+      /* Find the set of basic blocks that need no prologue and only
+	 go to the exit.  */
+      bitmap_set_bit (&bb_tail, EXIT_BLOCK_PTR->index);
+      VEC_quick_push (basic_block, vec, EXIT_BLOCK_PTR);
+      while (!VEC_empty (basic_block, vec))
+	{
+	  basic_block tmp_bb = VEC_pop (basic_block, vec);
+
+	  FOR_EACH_EDGE (e, ei, tmp_bb->preds)
+	    if (single_succ_p (e->src)
+		&& !bitmap_bit_p (&bb_antic_flags, e->src->index)
+		&& bitmap_set_bit (&bb_tail, e->src->index))
+	      VEC_quick_push (basic_block, vec, e->src);
+	}
+
       /* Now walk backwards from every block that is marked as needing
-	 a prologue to compute the bb_antic_flags bitmap.  */
-      bitmap_copy (&bb_antic_flags, &bb_flags);
+	 a prologue to compute the bb_antic_flags bitmap.  Exclude
+	 tail blocks; They can be duplicated to be used on paths not
+	 needing a prologue.  */
+      bitmap_and_compl (&bb_antic_flags, &bb_flags, &bb_tail);
       FOR_EACH_BB (bb)
 	{
-	  edge e;
-	  edge_iterator ei;
-	  if (!bitmap_bit_p (&bb_flags, bb->index))
+	  if (!bitmap_bit_p (&bb_antic_flags, bb->index))
 	    continue;
 	  FOR_EACH_EDGE (e, ei, bb->preds)
 	    if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
@@ -5816,8 +5835,6 @@ thread_prologue_and_epilogue_insns (void
       while (!VEC_empty (basic_block, vec))
 	{
 	  basic_block tmp_bb = VEC_pop (basic_block, vec);
-	  edge e;
-	  edge_iterator ei;
 	  bool all_set = true;
 
 	  bitmap_clear_bit (&bb_on_list, tmp_bb->index);
@@ -5862,28 +5879,172 @@ thread_prologue_and_epilogue_insns (void
 		}
 	  }
 
-      /* Test whether the prologue is known to clobber any register
-	 (other than FP or SP) which are live on the edge.  */
-      CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
-      if (frame_pointer_needed)
-	CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
-      CLEAR_HARD_REG_SET (live_on_edge);
-      reg_set_to_hard_reg_set (&live_on_edge,
-			       df_get_live_in (entry_edge->dest));
-      if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered))
+      if (entry_edge != orig_entry_edge)
 	{
-	  entry_edge = orig_entry_edge;
-	  if (dump_file)
-	    fprintf (dump_file, "Shrink-wrapping aborted due to clobber.\n");
+	  /* Test whether the prologue is known to clobber any register
+	     (other than FP or SP) which are live on the edge.  */
+	  CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
+	  if (frame_pointer_needed)
+	    CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
+	  CLEAR_HARD_REG_SET (live_on_edge);
+	  reg_set_to_hard_reg_set (&live_on_edge,
+				   df_get_live_in (entry_edge->dest));
+	  if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered))
+	    {
+	      entry_edge = orig_entry_edge;
+	      if (dump_file)
+		fprintf (dump_file,
+			 "Shrink-wrapping aborted due to clobber.\n");
+	    }
 	}
-      else if (entry_edge != orig_entry_edge)
+      if (entry_edge != orig_entry_edge)
 	{
 	  crtl->shrink_wrapped = true;
 	  if (dump_file)
 	    fprintf (dump_file, "Performing shrink-wrapping.\n");
+
+	  /* Find tail blocks reachable from both blocks needing a
+	     prologue and blocks not needing a prologue.  */
+	  bitmap_clear_bit (&bb_tail, EXIT_BLOCK_PTR->index);
+	  if (!bitmap_empty_p (&bb_tail))
+	    FOR_EACH_BB (bb)
+	      {
+		bool some_pro, some_no_pro;
+		if (!bitmap_bit_p (&bb_tail, bb->index))
+		  continue;
+		some_pro = some_no_pro = false;
+		FOR_EACH_EDGE (e, ei, bb->preds)
+		  {
+		    if (bitmap_bit_p (&bb_flags, e->src->index))
+		      some_pro = true;
+		    else
+		      some_no_pro = true;
+		  }
+		if (some_pro && some_no_pro)
+		  VEC_quick_push (basic_block, vec, bb);
+		else
+		  bitmap_clear_bit (&bb_tail, bb->index);
+	      }
+	  /* Find the head of each tail.  */
+	  while (!VEC_empty (basic_block, vec))
+	    {
+	      basic_block tbb = VEC_pop (basic_block, vec);
+
+	      if (!bitmap_bit_p (&bb_tail, tbb->index))
+		continue;
+
+	      while (single_succ_p (tbb))
+		{
+		  tbb = single_succ (tbb);
+		  bitmap_clear_bit (&bb_tail, tbb->index);
+		}
+	    }
+	  /* Now duplicate the tails.  */
+	  if (!bitmap_empty_p (&bb_tail))
+	    FOR_EACH_BB_REVERSE (bb)
+	      {
+		basic_block copy_bb, next_bb, fall_bb;
+		edge fall_into;
+		rtx insert_point, last;
+
+		if (!bitmap_clear_bit (&bb_tail, bb->index))
+		  continue;
+
+		copy_bb = NULL;
+		next_bb = single_succ (bb);
+		fall_into = find_fallthru_edge (bb->preds);
+		fall_bb = (fall_into ? fall_into->src : NULL);
+
+		if (fall_into
+		    && !bitmap_bit_p (&bb_flags, fall_bb->index))
+		  e = fall_into;
+		else
+		  {
+		    FOR_EACH_EDGE (e, ei, bb->preds)
+		      if (!bitmap_bit_p (&bb_flags, e->src->index))
+			break;
+		    gcc_assert (e);
+		  }
+
+		/* Create a copy of BB, instructions and all, for
+		   use on paths that don't need a prologue.  We know
+		   BB has a single successor, so there is no need to
+		   copy a jump at the end of BB.  */
+		start_sequence ();
+		last = BB_END (bb);
+		if (simplejump_p (last))
+		  last = PREV_INSN (last);
+		duplicate_insn_chain (BB_HEAD (bb), last);
+		copy_bb = split_edge (e);
+		emit_insn_after (get_insns (), BB_END (copy_bb));
+		end_sequence ();
+		df_set_bb_dirty (copy_bb);
+		force_nonfallthru_and_redirect (single_succ_edge (copy_bb),
+						EXIT_BLOCK_PTR,
+						simple_return_rtx);
+		insert_point = BB_END (copy_bb);
+		gcc_assert (JUMP_P (insert_point));
+
+		/* If there was a fall-thru edge into bb, then
+		   creating copy_bb may have required inserting a
+		   jump block.  Set bb_flags for the jump block.  */
+		if (fall_bb
+		    && bitmap_bit_p (&bb_flags, fall_bb->index))
+		  FOR_EACH_EDGE (e, ei, fall_bb->succs)
+		    if (e->dest != EXIT_BLOCK_PTR)
+		      bitmap_set_bit (&bb_flags, e->dest->index);
+
+		/* Redirect all the paths that need no prologue into
+		   copy_bb.  */
+		for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); )
+		  if (!bitmap_bit_p (&bb_flags, e->src->index))
+		    {
+		      redirect_edge_and_branch_force (e, copy_bb);
+		      continue;
+		    }
+		  else
+		    ei_next (&ei);
+
+		while (next_bb != EXIT_BLOCK_PTR)
+		  {
+		    basic_block tbb = next_bb;
+		    next_bb = single_succ (tbb);
+		    e = split_block (copy_bb, PREV_INSN (insert_point));
+		    copy_bb = e->dest;
+		    start_sequence ();
+		    last = BB_END (tbb);
+		    if (simplejump_p (last))
+		      last = PREV_INSN (last);
+		    duplicate_insn_chain (BB_HEAD (tbb), last);
+		    emit_insn_before (get_insns (), insert_point);
+		    end_sequence ();
+
+		    for (ei = ei_start (tbb->preds); (e = ei_safe_edge (ei)); )
+		      if (!bitmap_bit_p (&bb_flags, e->src->index))
+			{
+			  redirect_edge_and_branch_force (e, copy_bb);
+			  continue;
+			}
+		      else
+			ei_next (&ei);
+		  }
+		/* Our tail may just end in a sibling call, in which
+		   case we don't want the simple_return jump added by
+		   force_nonfallthru_and_redirect.  */
+		if (CALL_P (PREV_INSN (insert_point))
+		    && SIBLING_CALL_P (PREV_INSN (insert_point)))
+		  {
+		    delete_insn (insert_point);
+		    e = single_succ_edge (copy_bb);
+		    e->flags = EDGE_SIBCALL | EDGE_ABNORMAL;
+		  }
+		if (bitmap_empty_p (&bb_tail))
+		  break;
+	      }
 	}
 
     fail_shrinkwrap:
+      bitmap_clear (&bb_tail);
       bitmap_clear (&bb_antic_flags);
       bitmap_clear (&bb_on_list);
       VEC_free (basic_block, heap, vec);

-- 
Alan Modra
Australia Development Lab, IBM



More information about the Gcc-patches mailing list