[RFC] Don't move cold code out of loop by checking bb count

Xiong Hu Luo luoxhu@linux.ibm.com
Mon Aug 2 05:05:01 GMT 2021


There was a patch trying to avoid move cold block out of loop:

https://gcc.gnu.org/pipermail/gcc/2014-November/215551.html

Richard suggested to "never hoist anything from a bb with lower execution
frequency to a bb with higher one in LIM invariantness_dom_walker
before_dom_children".

This patch does this profile count check in both gimple LIM
move_computations_worker and RTL loop-invariant.c find_invariants_bb,
if the loop bb is colder than loop preheader, don't hoist it out of
loop.

Also, the profile count in loop split pass should be corrected to avoid
lim2 and lim4 mismatch behavior, currently, the new loop preheader generated
by loop_version is set to "[count: 0]:", then lim4 after lsplt pass will
move statement out of loop unexpectely when lim2 didn't move it.  This
change could fix regression on 544.nab_r from -1.55% to +0.46%.

SPEC2017 performance evaluation shows 1% performance improvement for
intrate GEOMEAN and no obvious regression for others.  Especially,
500.perlbench_r +7.52% (Perf shows function S_regtry of perlbench is
largely improved.), and 548.exchange2_r+1.98%, 526.blender_r +1.00%
on P8LE.

Regression and bootstrap tested pass on P8LE, any comments?  Thanks.

gcc/ChangeLog:

	* loop-invariant.c (find_invariants_bb): Check profile count
	before motion.
	(find_invariants_body): Add argument.
	* tree-ssa-loop-im.c (move_computations_worker): Check profile
	count before motion.
	(execute_sm): Likewise.
	(execute_sm_exit): Check pointer validness.
	* tree-ssa-loop-split.c (split_loop): Correct probability.
	(do_split_loop_on_cond): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/recip-3.c: Adjust.
---
 gcc/loop-invariant.c                    |  10 +-
 gcc/testsuite/gcc.dg/tree-ssa/recip-3.c |   2 +-
 gcc/tree-ssa-loop-im.c                  | 164 +++++++++++++++++++++++-
 gcc/tree-ssa-loop-split.c               |  14 +-
 4 files changed, 177 insertions(+), 13 deletions(-)

diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c
index bdc7b59dd5f..7b5d64d11f9 100644
--- a/gcc/loop-invariant.c
+++ b/gcc/loop-invariant.c
@@ -1183,9 +1183,14 @@ find_invariants_insn (rtx_insn *insn, bool always_reached, bool always_executed)
    call.  */
 
 static void
-find_invariants_bb (basic_block bb, bool always_reached, bool always_executed)
+find_invariants_bb (class loop *loop, basic_block bb, bool always_reached,
+		    bool always_executed)
 {
   rtx_insn *insn;
+  basic_block preheader = loop_preheader_edge (loop)->src;
+
+  if (preheader->count > bb->count)
+    return;
 
   FOR_BB_INSNS (bb, insn)
     {
@@ -1214,8 +1219,7 @@ find_invariants_body (class loop *loop, basic_block *body,
   unsigned i;
 
   for (i = 0; i < loop->num_nodes; i++)
-    find_invariants_bb (body[i],
-			bitmap_bit_p (always_reached, i),
+    find_invariants_bb (loop, body[i], bitmap_bit_p (always_reached, i),
 			bitmap_bit_p (always_executed, i));
 }
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/recip-3.c b/gcc/testsuite/gcc.dg/tree-ssa/recip-3.c
index 638bf38db8c..641c91e719e 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/recip-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/recip-3.c
@@ -23,4 +23,4 @@ float h ()
 	F[0] += E / d;
 }
 
-/* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */
+/* { dg-final { scan-tree-dump-times " / " 5 "recip" } } */
diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
index 7de47edbcb3..2bfb5e8ec15 100644
--- a/gcc/tree-ssa-loop-im.c
+++ b/gcc/tree-ssa-loop-im.c
@@ -1147,6 +1147,61 @@ move_computations_worker (basic_block bb)
 	  continue;
 	}
 
+      edge e = loop_preheader_edge (level);
+      if (e->src->count > bb->count)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file, "PHI node NOT moved to %d from %d:\n",
+		       e->src->index, bb->index);
+	      print_gimple_stmt (dump_file, stmt, 0);
+	      fprintf (dump_file, "(cost %u) out of loop %d.\n\n", cost,
+		       level->num);
+	    }
+	  gsi_next (&bsi);
+	  continue;
+	}
+      else
+	{
+	  unsigned i;
+	  bool skip_phi_move = false;
+	  for (i = 0; i < gimple_phi_num_args (stmt); i++)
+	    {
+	      tree def = PHI_ARG_DEF (stmt, i);
+
+	      if (TREE_CODE (def) != SSA_NAME)
+		continue;
+
+	      gimple *def_stmt = SSA_NAME_DEF_STMT (def);
+
+	      if (!gimple_bb (def_stmt))
+		continue;
+
+	      if (!dominated_by_p (CDI_DOMINATORS, e->src,
+				   gimple_bb (def_stmt)))
+		{
+		  if (dump_file && (dump_flags & TDF_DETAILS))
+		    {
+		      fprintf (dump_file,
+			       "PHI node NOT moved to %d [local count:%d] from "
+			       "%d [local count:%d]:\n",
+			       e->src->index, e->src->count.value (), bb->index,
+			       bb->count.value ());
+		      print_gimple_stmt (dump_file, stmt, 0);
+		      fprintf (dump_file, "(cost %u) out of loop %d.\n\n", cost,
+			       level->num);
+		    }
+		  skip_phi_move = true;
+		  break;
+		}
+	    }
+	  if (skip_phi_move)
+	    {
+	      gsi_next (&bsi);
+	      continue;
+	    }
+	}
+
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
 	  fprintf (dump_file, "Moving PHI node\n");
@@ -1184,14 +1239,13 @@ move_computations_worker (basic_block bb)
 	  tree lhs = gimple_assign_lhs (new_stmt);
 	  SSA_NAME_RANGE_INFO (lhs) = NULL;
 	}
-      gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
+      gsi_insert_on_edge (e, new_stmt);
       remove_phi_node (&bsi, false);
     }
 
   for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
     {
       edge e;
-
       gimple *stmt = gsi_stmt (bsi);
 
       lim_data = get_lim_data (stmt);
@@ -1214,7 +1268,90 @@ move_computations_worker (basic_block bb)
       /* We do not really want to move conditionals out of the loop; we just
 	 placed it here to force its operands to be moved if necessary.  */
       if (gimple_code (stmt) == GIMPLE_COND)
-	continue;
+	{
+	  gsi_next (&bsi);
+	  continue;
+	}
+
+      e = loop_preheader_edge (level);
+      if (e->src->count > bb->count)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file,
+		       "stmt: Statement NOT moved to %d [local count:%d] from "
+		       "%d [local count:%d]:\n",
+		       e->src->index, e->src->count.value (), bb->index,
+		       bb->count.value ());
+	      print_gimple_stmt (dump_file, stmt, 0);
+	      fprintf (dump_file, "(cost %u) out of loop %d.\n\n", cost,
+		       level->num);
+	    }
+	  gsi_next (&bsi);
+	  continue;
+	}
+      else
+	{
+	  if (is_gimple_assign (stmt))
+	    {
+	      tree rhs1 = gimple_assign_rhs1 (stmt);
+	      tree rhs2 = gimple_assign_rhs2 (stmt);
+	      if (TREE_CODE (rhs1) == MEM_REF)
+		{
+		  rhs2 = TREE_OPERAND (rhs1, 1);
+		  rhs1 = TREE_OPERAND (rhs1, 0);
+		}
+	      gimple *stmt1 = NULL, *stmt2 = NULL;
+	      basic_block def_bb;
+	      if (rhs1 && TREE_CODE (rhs1) == SSA_NAME)
+		{
+		  stmt1 = SSA_NAME_DEF_STMT (rhs1);
+		  def_bb = gimple_bb (stmt1);
+		  if (stmt1
+		      && def_bb
+		      && (def_bb == bb
+			  || !dominated_by_p (CDI_DOMINATORS, e->src, def_bb)))
+		    {
+		      if (dump_file && (dump_flags & TDF_DETAILS))
+			{
+			  fprintf (dump_file,
+				   "stmt1: Statement NOT moved to %d [local "
+				   "count:%d] from %d [local count:%d]:\n",
+				   e->src->index, e->src->count.value (),
+				   bb->index, bb->count.value ());
+			  print_gimple_stmt (dump_file, stmt, 0);
+			  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
+				   cost, level->num);
+			}
+		      gsi_next (&bsi);
+		      continue;
+		    }
+		}
+	      if (rhs2 && TREE_CODE (rhs2) == SSA_NAME)
+		{
+		  stmt2 = SSA_NAME_DEF_STMT (rhs2);
+		  def_bb = gimple_bb (stmt2);
+		  if (stmt2 && def_bb
+		      && (def_bb == bb
+			  || !dominated_by_p (CDI_DOMINATORS, e->src, def_bb)))
+		    {
+		      if (dump_file && (dump_flags & TDF_DETAILS))
+			{
+			  fprintf (dump_file,
+				   "stmt2: Statement NOT moved to %d [local "
+				   "count:%d] from %d [local count:%d]:\n",
+				   e->src->index, e->src->count.value (),
+				   bb->index, bb->count.value ());
+			  print_gimple_stmt (dump_file, stmt, 0);
+			  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
+				   cost, level->num);
+			}
+		      gsi_next (&bsi);
+		      continue;
+		    }
+		}
+	    }
+	}
 
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
@@ -1224,7 +1361,6 @@ move_computations_worker (basic_block bb)
 		   cost, level->num);
 	}
 
-      e = loop_preheader_edge (level);
       gcc_assert (!gimple_vdef (stmt));
       if (gimple_vuse (stmt))
 	{
@@ -2094,6 +2230,19 @@ execute_sm (class loop *loop, im_mem_ref *ref,
   bool multi_threaded_model_p = false;
   gimple_stmt_iterator gsi;
   sm_aux *aux = new sm_aux;
+  basic_block bb = gimple_bb (first_mem_ref_loc (loop, ref)->stmt);
+
+  edge e = loop_preheader_edge (loop);
+  if (e->src->count > bb->count)
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  fprintf (dump_file, "Don't execute store motion of ");
+	  print_generic_expr (dump_file, ref->mem.ref);
+	  fprintf (dump_file, " from loop %d\n", loop->num);
+	}
+      return;
+    }
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
@@ -2202,7 +2351,12 @@ execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
 	}
       else
 	{
-	  sm_aux *aux = *aux_map.get (ref);
+	  sm_aux **paux = aux_map.get (ref);
+	  sm_aux *aux;
+	  if (paux)
+	    aux = *paux;
+	  else
+	    continue;
 	  if (!aux->store_flag || kind == sm_ord)
 	    {
 	      gassign *store;
diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index 3a09bbc39e5..4cae82936b9 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
@@ -577,14 +577,17 @@ split_loop (class loop *loop1)
 	if (!initial_true)
 	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); 
 
+	edge true_edge = EDGE_SUCC (bbs[i], 0)->flags & EDGE_TRUE_VALUE
+			   ? EDGE_SUCC (bbs[i], 0)
+			   : EDGE_SUCC (bbs[i], 1);
 	/* Now version the loop, placing loop2 after loop1 connecting
 	   them, and fix up SSA form for that.  */
 	initialize_original_copy_tables ();
 	basic_block cond_bb;
 
 	class loop *loop2 = loop_version (loop1, cond, &cond_bb,
-					   profile_probability::always (),
-					   profile_probability::always (),
+					   true_edge->probability,
+					   true_edge->probability.invert (),
 					   profile_probability::always (),
 					   profile_probability::always (),
 					   true);
@@ -1486,8 +1489,8 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
   initialize_original_copy_tables ();
 
   struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
-				     profile_probability::always (),
-				     profile_probability::never (),
+				     invar_branch->probability,
+				     invar_branch->probability.invert (),
 				     profile_probability::always (),
 				     profile_probability::always (),
 				     true);
@@ -1530,6 +1533,9 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
   to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
   to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
 
+  to_loop1->probability = invar_branch->probability.invert ();
+  to_loop2->probability = invar_branch->probability;
+
   /* Due to introduction of a control flow edge from loop1 latch to loop2
      pre-header, we should update PHIs in loop2 to reflect this connection
      between loop1 and loop2.  */
-- 
2.27.0.90.geebb51ba8c



More information about the Gcc-patches mailing list