[PATCH] tree-optimization/102436 - restore loop store motion

Richard Biener rguenther@suse.de
Fri Nov 19 08:35:13 GMT 2021


This restores a case of conditional store motion we fail to handle
after the rewrite.  We can recognize the special case of all
stores in a loop happening in a single conditionally executed
block which ensures stores are not re-ordered by executing them
in different loop iterations.  Separating out the case avoids
complicating the already complex main path.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-11-18  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/102436
	* tree-ssa-loop-im.c (execute_sm_if_changed): Add mode
	to just create the if structure and return the then block.
	(execute_sm): Add flag to indicate the var will re-use
	another flag var.
	(hoist_memory_references): Support a single conditional
	block with all stores as special case.

	* gcc.dg/torture/20211118-1.c: New testcase.
	* gcc.dg/tree-ssa/ssa-lim-18.c: Likewise.
---
 gcc/testsuite/gcc.dg/torture/20211118-1.c  |  27 ++++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-18.c |  19 +++
 gcc/tree-ssa-loop-im.c                     | 162 +++++++++++++++++++--
 3 files changed, 199 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/20211118-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-18.c

diff --git a/gcc/testsuite/gcc.dg/torture/20211118-1.c b/gcc/testsuite/gcc.dg/torture/20211118-1.c
new file mode 100644
index 00000000000..67ef68420df
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/20211118-1.c
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+
+unsigned p[3];
+void __attribute__((noipa))
+bar (float *q, int n, int m)
+{
+  for (int i = 0; i < m; ++i)
+    {
+      if (i == n)
+        {
+          unsigned a = p[1];
+          p[1] = a + 1;
+          *q = 1.;
+        }
+      q++;
+    }
+}
+
+int main()
+{
+#if __SIZEOF_FLOAT__ == __SIZEOF_INT__
+  bar ((float *)p, 1, 3);
+  if (((float *)p)[1] != 1.)
+    __builtin_abort ();
+#endif
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-18.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-18.c
new file mode 100644
index 00000000000..da19806b712
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-18.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fstrict-aliasing -fdump-tree-lim2-details" } */
+
+unsigned p;
+
+void foo (float *q)
+{
+  for (int i = 0; i < 256; ++i)
+    {
+      if (p)
+        {
+          unsigned a = p;
+          *(q++) = 1.;
+          p = a + 1;
+        }
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "Executing store motion" 1 "lim2" } } */
diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
index 8a81acae115..682406d26c1 100644
--- a/gcc/tree-ssa-loop-im.c
+++ b/gcc/tree-ssa-loop-im.c
@@ -1911,10 +1911,13 @@ first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
        }
      }
      if (lsm_flag)	<--
-       MEM = lsm;	<--
+       MEM = lsm;	<-- (X)
+
+  In case MEM and TMP_VAR are NULL the function will return the then
+  block so the caller can insert (X) and other related stmts. 
 */
 
-static void
+static basic_block
 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
 		       edge preheader, hash_set <basic_block> *flag_bbs,
 		       edge &append_cond_position, edge &last_cond_fallthru)
@@ -2009,10 +2012,13 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
 			    NULL_TREE, NULL_TREE);
   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
 
-  gsi = gsi_start_bb (then_bb);
   /* Insert actual store.  */
-  stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
-  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+  if (mem)
+    {
+      gsi = gsi_start_bb (then_bb);
+      stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
+      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+    }
 
   edge e1 = single_succ_edge (new_bb);
   edge e2 = make_edge (new_bb, then_bb,
@@ -2060,6 +2066,8 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
 	      update_stmt (phi);
 	    }
       }
+
+  return then_bb;
 }
 
 /* When REF is set on the location, set flag indicating the store.  */
@@ -2117,7 +2125,8 @@ struct sm_aux
 
 static void
 execute_sm (class loop *loop, im_mem_ref *ref,
-	    hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt)
+	    hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt,
+	    bool use_other_flag_var)
 {
   gassign *load;
   struct fmt_data fmt_data;
@@ -2146,7 +2155,7 @@ execute_sm (class loop *loop, im_mem_ref *ref,
 	  || (! flag_store_data_races && ! always_stored)))
     multi_threaded_model_p = true;
 
-  if (multi_threaded_model_p)
+  if (multi_threaded_model_p && !use_other_flag_var)
     aux->store_flag
       = execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs);
   else
@@ -2182,7 +2191,7 @@ execute_sm (class loop *loop, im_mem_ref *ref,
   lim_data->tgt_loop = loop;
   gsi_insert_before (&gsi, load, GSI_SAME_STMT);
 
-  if (multi_threaded_model_p)
+  if (aux->store_flag)
     {
       load = gimple_build_assign (aux->store_flag, boolean_false_node);
       lim_data = init_lim_data (load);
@@ -2555,6 +2564,140 @@ hoist_memory_references (class loop *loop, bitmap mem_refs,
   unsigned  i;
   bitmap_iterator bi;
 
+  /* There's a special case we can use ordered re-materialization for
+     conditionally excuted stores which is when all stores in the loop
+     happen in the same basic-block.  In that case we know we'll reach
+     all stores and thus can simply process that BB and emit a single
+     conditional block of ordered materializations.  See PR102436.  */
+  basic_block single_store_bb = NULL;
+  EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num],
+			    0, i, bi)
+    {
+      bool fail = false;
+      ref = memory_accesses.refs_list[i];
+      for (auto loc : ref->accesses_in_loop)
+	if (!gimple_vdef (loc.stmt))
+	  ;
+	else if (!single_store_bb)
+	  {
+	    single_store_bb = gimple_bb (loc.stmt);
+	    bool conditional = false;
+	    for (edge e : exits)
+	      if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb))
+		{
+		  /* Conditional as seen from e.  */
+		  conditional = true;
+		  break;
+		}
+	    if (!conditional)
+	      {
+		fail = true;
+		break;
+	      }
+	  }
+	else if (single_store_bb != gimple_bb (loc.stmt))
+	  {
+	    fail = true;
+	    break;
+	  }
+      if (fail)
+	{
+	  single_store_bb = NULL;
+	  break;
+	}
+    }
+  if (single_store_bb)
+    {
+      /* Analyze the single block with stores.  */
+      auto_bitmap fully_visited;
+      auto_bitmap refs_not_supported;
+      auto_bitmap refs_not_in_seq;
+      auto_vec<seq_entry> seq;
+      bitmap_copy (refs_not_in_seq, mem_refs);
+      int res = sm_seq_valid_bb (loop, single_store_bb, NULL_TREE,
+				 seq, refs_not_in_seq, refs_not_supported,
+				 false, fully_visited);
+      if (res != 1)
+	{
+	  /* Unhandled refs can still fail this.  */
+	  bitmap_clear (mem_refs);
+	  return;
+	}
+
+      /* We cannot handle sm_other since we neither remember the
+	 stored location nor the value at the point we execute them.  */
+      for (unsigned i = 0; i < seq.length (); ++i)
+	{
+	  unsigned new_i;
+	  if (seq[i].second == sm_other
+	      && seq[i].from != NULL_TREE)
+	    seq[i].from = NULL_TREE;
+	  else if ((seq[i].second == sm_ord
+		    || (seq[i].second == sm_other
+			&& seq[i].from != NULL_TREE))
+		   && !sm_seq_push_down (seq, i, &new_i))
+	    {
+	      bitmap_set_bit (refs_not_supported, seq[new_i].first);
+	      seq[new_i].second = sm_other;
+	      seq[new_i].from = NULL_TREE;
+	    }
+	}
+      bitmap_and_compl_into (mem_refs, refs_not_supported);
+      if (bitmap_empty_p (mem_refs))
+	return;
+
+      /* Prune seq.  */
+      while (seq.last ().second == sm_other
+	     && seq.last ().from == NULL_TREE)
+	seq.pop ();
+
+      hash_map<im_mem_ref *, sm_aux *> aux_map;
+
+      /* Execute SM but delay the store materialization for ordered
+	 sequences on exit.  */
+      bool first_p = true;
+      EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
+	{
+	  ref = memory_accesses.refs_list[i];
+	  execute_sm (loop, ref, aux_map, true, !first_p);
+	  first_p = false;
+	}
+
+      /* Get at the single flag variable we eventually produced.  */
+      im_mem_ref *ref
+	= memory_accesses.refs_list[bitmap_first_set_bit (mem_refs)];
+      sm_aux *aux = *aux_map.get (ref);
+
+      /* Materialize ordered store sequences on exits.  */
+      edge e;
+      FOR_EACH_VEC_ELT (exits, i, e)
+	{
+	  edge append_cond_position = NULL;
+	  edge last_cond_fallthru = NULL;
+	  edge insert_e = e;
+	  /* Construct the single flag variable control flow and insert
+	     the ordered seq of stores in the then block.  With
+	     -fstore-data-races we can do the stores unconditionally.  */
+	  if (aux->store_flag)
+	    insert_e
+	      = single_pred_edge
+		  (execute_sm_if_changed (e, NULL_TREE, NULL_TREE,
+					  aux->store_flag,
+					  loop_preheader_edge (loop),
+					  &aux->flag_bbs, append_cond_position,
+					  last_cond_fallthru));
+	  execute_sm_exit (loop, insert_e, seq, aux_map, sm_ord,
+			   append_cond_position, last_cond_fallthru);
+	  gsi_commit_one_edge_insert (insert_e, NULL);
+	}
+
+      for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
+	   iter != aux_map.end (); ++iter)
+	delete (*iter).second;
+
+      return;
+    }
+
   /* To address PR57359 before actually applying store-motion check
      the candidates found for validity with regards to reordering
      relative to other stores which we until here disambiguated using
@@ -2693,7 +2836,8 @@ hoist_memory_references (class loop *loop, bitmap mem_refs,
   EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
     {
       ref = memory_accesses.refs_list[i];
-      execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i));
+      execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i),
+		  false);
     }
 
   /* Materialize ordered store sequences on exits.  */
-- 
2.31.1


More information about the Gcc-patches mailing list