[committed][PR rtl-optimization/87761] Limited iteration in regcprop to pick up secondary opportunities

Jakub Jelinek jakub@redhat.com
Wed Mar 27 14:51:00 GMT 2019


On Sun, Mar 24, 2019 at 09:20:07AM -0600, Jeff Law wrote:
> However, I'm increasingly of the opinion that MIPS targets need to drop
> off the priority platform list.  Given the trajectory I see for MIPS
> based processors in industry, it's really hard to justify spending this
> much time on them, particularly for low priority code quality issues.

Besides what has been discussed on IRC for the PR89826 fix, that we really
need a df_analyze before processing the first block, because otherwise we
can't rely on the REG_UNUSED notes in the IL, I see some other issues, but I
admit I don't know much about df nor regcprop.

1) the df_analyze () after every (successful) processing of a basic block
is IMHO way too expensive, I would be very surprised if df_analyze () isn't
quadratic in number of basic blocks and so one could construct testcases
with millions of basic blocks and at least one regcprop change in each bb
and get at cubic complexity (correct me if I'm wrong, and I'm aware of the
95% bbs you said won't have any changes at all)

2) another thing I've noticed today, we queue up the debug insn changes,
but if we iterate the second time for any bb, we overwrite and thus lose any
of the debug insn queued changes from the first iteration, so those changes
aren't applied (wrong-debug?)

3) as mentioned on IRC, the order of tests in copyprop_hardreg_forward_1
conditional doesn't start from the cheapest to most expensive one

So, do we want something like the following untested patch (only tested
that the PR89826 testcase is fixed on ARM, but df_analyze right after
df_set_flags is all that is needed to cure that.  I've outlined
two hunks of code into separate functions, so that I can call them from two
different spots.  And, I don't see why in the debug processing loop we
should test the visited bitmap, isn't it guaranteed that it is set on all
the bbs after going through the first FOR_EACH_BB_FN loop?
On the other side, I thought it might be beneficial not to do the debug
FOR_EACH_BB_FN loop(s) if there aren't any debug insn changes queued (not
sure how often would that happen, if not often enough, that part can be
dropped).

--- gcc/regcprop.c.jj	2019-03-25 11:02:55.826998948 +0100
+++ gcc/regcprop.c	2019-03-27 15:15:23.505651565 +0100
@@ -800,9 +800,9 @@ copyprop_hardreg_forward_1 (basic_block
 
       /* Detect obviously dead sets (via REG_UNUSED notes) and remove them.  */
       if (set
-	  && !may_trap_p (set)
-	  && !RTX_FRAME_RELATED_P (insn)
 	  && INSN_P (insn)
+	  && !RTX_FRAME_RELATED_P (insn)
+	  && !may_trap_p (set)
 	  && find_reg_note (insn, REG_UNUSED, SET_DEST (set))
 	  && !side_effects_p (SET_SRC (set))
 	  && !side_effects_p (SET_DEST (set)))
@@ -1282,6 +1282,66 @@ public:
 
 }; // class pass_cprop_hardreg
 
+static bool
+cprop_hardreg_bb (basic_block bb, struct value_data *all_vd, sbitmap visited)
+{
+  bitmap_set_bit (visited, bb->index);
+
+  /* If a block has a single predecessor, that we've already
+     processed, begin with the value data that was live at
+     the end of the predecessor block.  */
+  /* ??? Ought to use more intelligent queuing of blocks.  */
+  if (single_pred_p (bb)
+      && bitmap_bit_p (visited, single_pred (bb)->index)
+      && ! (single_pred_edge (bb)->flags & (EDGE_ABNORMAL_CALL | EDGE_EH)))
+    {
+      all_vd[bb->index] = all_vd[single_pred (bb)->index];
+      if (all_vd[bb->index].n_debug_insn_changes)
+	{
+	  unsigned int regno;
+
+	  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+	    {
+	      if (all_vd[bb->index].e[regno].debug_insn_changes)
+		{
+		  all_vd[bb->index].e[regno].debug_insn_changes = NULL;
+		  if (--all_vd[bb->index].n_debug_insn_changes == 0)
+		    break;
+		}
+	    }
+	}
+    }
+  else
+    init_value_data (all_vd + bb->index);
+
+  return copyprop_hardreg_forward_1 (bb, all_vd + bb->index);
+}
+
+static void
+cprop_hardreg_debug (function *fun, struct value_data *all_vd)
+{
+  basic_block bb;
+
+  FOR_EACH_BB_FN (bb, fun)
+    if (all_vd[bb->index].n_debug_insn_changes)
+      {
+	unsigned int regno;
+	bitmap live;
+
+	live = df_get_live_out (bb);
+	for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+	  if (all_vd[bb->index].e[regno].debug_insn_changes)
+	    {
+	      if (REGNO_REG_SET_P (live, regno))
+		apply_debug_insn_changes (all_vd + bb->index, regno);
+	      if (all_vd[bb->index].n_debug_insn_changes == 0)
+		break;
+	    }
+      }
+
+  queued_debug_insn_change_pool.release ();
+}
+
 unsigned int
 pass_cprop_hardreg::execute (function *fun)
 {
@@ -1293,6 +1353,9 @@ pass_cprop_hardreg::execute (function *f
   auto_sbitmap visited (last_basic_block_for_fn (fun));
   bitmap_clear (visited);
 
+  auto_vec<int> redo;
+  bool any_debug_changes = false;
+
   df_note_add_problem ();
 
   /* It is tempting to set DF_LR_RUN_DCE, but DCE may choose to delete
@@ -1305,71 +1368,42 @@ pass_cprop_hardreg::execute (function *f
      data structures appropriately.  */
   df_set_flags (DF_DEFER_INSN_RESCAN);
 
+  df_analyze ();
+
   FOR_EACH_BB_FN (bb, fun)
     {
-      bitmap_set_bit (visited, bb->index);
-
-      for (int pass = 0; pass < 2; pass++)
-	{
-	  /* If a block has a single predecessor, that we've already
-	     processed, begin with the value data that was live at
-	    the end of the predecessor block.  */
-	  /* ??? Ought to use more intelligent queuing of blocks.  */
-	  if (single_pred_p (bb)
-	      && bitmap_bit_p (visited, single_pred (bb)->index)
-	      && ! (single_pred_edge (bb)->flags & (EDGE_ABNORMAL_CALL | EDGE_EH)))
-	    {
-	      all_vd[bb->index] = all_vd[single_pred (bb)->index];
-	      if (all_vd[bb->index].n_debug_insn_changes)
-		{
-		  unsigned int regno;
-
-		  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-		    {
-		      if (all_vd[bb->index].e[regno].debug_insn_changes)
-			{
-			  all_vd[bb->index].e[regno].debug_insn_changes = NULL;
-			  if (--all_vd[bb->index].n_debug_insn_changes == 0)
-			    break;
-			}
-		    }
-		}
-	    }
-	  else
-	    init_value_data (all_vd + bb->index);
-
-	  /* If we were unable to propagate, then break the loop.  */
-	  if (!copyprop_hardreg_forward_1 (bb, all_vd + bb->index))
-	    break;
-	  df_analyze ();
-	}
+      if (cprop_hardreg_bb (bb, all_vd, visited))
+	redo.safe_push (bb->index);
+      if (all_vd[bb->index].n_debug_insn_changes)
+	any_debug_changes = true;
     }
 
   /* We must call df_analyze here unconditionally to ensure that the
      REG_UNUSED and REG_DEAD notes are consistent with and without -g.  */
   df_analyze ();
 
-  if (MAY_HAVE_DEBUG_BIND_INSNS)
+  if (MAY_HAVE_DEBUG_BIND_INSNS && any_debug_changes)
+    cprop_hardreg_debug (fun, all_vd);
+
+  /* Second pass if we've changed anything, only for the bbs where we have
+     changed anything though.  */
+  if (!redo.is_empty ())
     {
-      FOR_EACH_BB_FN (bb, fun)
-	if (bitmap_bit_p (visited, bb->index)
-	    && all_vd[bb->index].n_debug_insn_changes)
-	  {
-	    unsigned int regno;
-	    bitmap live;
+      unsigned int i;
+      int index;
 
-	    live = df_get_live_out (bb);
-	    for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
-	      if (all_vd[bb->index].e[regno].debug_insn_changes)
-		{
-		  if (REGNO_REG_SET_P (live, regno))
-		    apply_debug_insn_changes (all_vd + bb->index, regno);
-		  if (all_vd[bb->index].n_debug_insn_changes == 0)
-		    break;
-		}
-	  }
+      any_debug_changes = false;
+      FOR_EACH_VEC_ELT (redo, i, index)
+	{
+	  bb = BASIC_BLOCK_FOR_FN (fun, index);
+	  cprop_hardreg_bb (bb, all_vd, visited);
+	  if (all_vd[bb->index].n_debug_insn_changes)
+	    any_debug_changes = true;
+	}
 
-      queued_debug_insn_change_pool.release ();
+      df_analyze ();
+      if (MAY_HAVE_DEBUG_BIND_INSNS && any_debug_changes)
+	cprop_hardreg_debug (fun, all_vd);
     }
 
   free (all_vd);


	Jakub



More information about the Gcc-patches mailing list