Use modref summary to DSE calls to non-pure functions

Jan Hubicka hubicka@kam.mff.cuni.cz
Fri Nov 12 11:39:16 GMT 2021


Hi,
this is updated patch.  It moves the summary walk checking if we can
possibly suceed on dse to summary->finalize member function so it is done
once per summary and refactors dse_optimize_call to be called from
dse_optimize_stmt after early checks.

I did not try to handle the special case of parm_offset_known but we can
do it incrementally.  I think initializing range with offset being
polyin64_int_min and max_size unkonwn as suggested is going to work.
I am bit worried this is in bits so we have 2^61 range instead of 2^64
but I guess once can not offset pointer back and forth in valid program?

Bootstrapped/regtested x86_64-linux, ltobootstrap in progress, OK if it
succeeds?

gcc/ChangeLog:

	* ipa-modref.c (modref_summary::modref_summary): Clear new flags.
	(modref_summary::dump): Dump try_dse.
	(modref_summary::finalize): Add FUN attribute; compute try-dse.
	(analyze_function): Update.
	(read_section): Update.
	(update_signature): Update.
	(pass_ipa_modref::execute): Update.
	* ipa-modref.h (struct modref_summary):
	* tree-ssa-alias.c (ao_ref_init_from_ptr_and_range): Export.
	* tree-ssa-alias.h (ao_ref_init_from_ptr_and_range): Declare.
	* tree-ssa-dse.c: Include cgraph.h, ipa-modref-tree.h and
	ipa-modref.h
	(dse_optimize_call): New function.
	(dse_optimize_stmt): Use it.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/modref-dse-1.c: New test.
	* gcc.dg/tree-ssa/modref-dse-2.c: New test.

diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
index c6efacb0e20..ea6a27ae767 100644
--- a/gcc/ipa-modref.c
+++ b/gcc/ipa-modref.c
@@ -276,7 +276,8 @@ static GTY(()) fast_function_summary <modref_summary_lto *, va_gc>
 
 modref_summary::modref_summary ()
   : loads (NULL), stores (NULL), retslot_flags (0), static_chain_flags (0),
-    writes_errno (false), side_effects (false)
+    writes_errno (false), side_effects (false), global_memory_read (false),
+    global_memory_written (false), try_dse (false)
 {
 }
 
@@ -605,6 +606,8 @@ modref_summary::dump (FILE *out)
     fprintf (out, "  Global memory read\n");
   if (global_memory_written)
     fprintf (out, "  Global memory written\n");
+  if (try_dse)
+    fprintf (out, "  Try dse\n");
   if (arg_flags.length ())
     {
       for (unsigned int i = 0; i < arg_flags.length (); i++)
@@ -661,12 +664,56 @@ modref_summary_lto::dump (FILE *out)
 }
 
 /* Called after summary is produced and before it is used by local analysis.
-   Can be called multiple times in case summary needs to update signature.  */
+   Can be called multiple times in case summary needs to update signature.
+   FUN is decl of function summary is attached to.  */
 void
-modref_summary::finalize ()
+modref_summary::finalize (tree fun)
 {
   global_memory_read = !loads || loads->global_access_p ();
   global_memory_written = !stores || stores->global_access_p ();
+
+  /* We can do DSE if we know function has no side effects and
+     we can analyse all stores.  Disable dse if there are too many
+     stores to try.  */
+  if (side_effects || global_memory_written || writes_errno)
+    try_dse = false;
+  else
+    {
+      try_dse = true;
+      size_t i, j, k;
+      int num_tests = 0, max_tests
+       	= opt_for_fn (fun, param_modref_max_tests);
+      modref_base_node <alias_set_type> *base_node;
+      modref_ref_node <alias_set_type> *ref_node;
+      modref_access_node *access_node;
+      FOR_EACH_VEC_SAFE_ELT (stores->bases, i, base_node)
+	{
+	  if (base_node->every_ref)
+	    {
+	      try_dse = false;
+	      break;
+	    }
+	  FOR_EACH_VEC_SAFE_ELT (base_node->refs, j, ref_node)
+	    {
+	      if (base_node->every_ref)
+		{
+		  try_dse = false;
+		  break;
+		}
+	      FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node)
+		if (num_tests++ > max_tests
+		    || !access_node->parm_offset_known)
+		  {
+		    try_dse = false;
+		    break;
+		  }
+	      if (!try_dse)
+		break;
+	    }
+	  if (!try_dse)
+	    break;
+	}
+    }
 }
 
 /* Get function summary for FUNC if it exists, return NULL otherwise.  */
@@ -2803,7 +2850,7 @@ analyze_function (function *f, bool ipa)
       summary = NULL;
     }
   else if (summary)
-    summary->finalize ();
+    summary->finalize (current_function_decl);
   if (summary_lto && !summary_lto->useful_p (ecf_flags))
     {
       summaries_lto->remove (fnode);
@@ -3524,7 +3571,7 @@ read_section (struct lto_file_decl_data *file_data, const char *data,
 	    }
 	}
       if (flag_ltrans)
-	modref_sum->finalize ();
+	modref_sum->finalize (node->decl);
       if (dump_file)
 	{
 	  fprintf (dump_file, "Read modref for %s\n",
@@ -3682,7 +3729,7 @@ update_signature (struct cgraph_node *node)
 	r_lto->dump (dump_file);
     }
   if (r)
-    r->finalize ();
+    r->finalize (node->decl);
   return;
 }
 
@@ -4907,7 +4954,7 @@ pass_ipa_modref::execute (function *)
 	for (struct cgraph_node *cur = component_node; cur;
 	     cur = ((struct ipa_dfs_info *) cur->aux)->next_cycle)
 	  if (modref_summary *sum = optimization_summaries->get (cur))
-	    sum->finalize ();
+	    sum->finalize (cur->decl);
       if (dump_file)
 	modref_propagate_dump_scc (component_node);
     }
diff --git a/gcc/ipa-modref.h b/gcc/ipa-modref.h
index 9a8d565d770..43353ad2ef4 100644
--- a/gcc/ipa-modref.h
+++ b/gcc/ipa-modref.h
@@ -38,13 +38,14 @@ struct GTY(()) modref_summary
   /* Flags coputed by finalize method.  */
   unsigned global_memory_read : 1;
   unsigned global_memory_written : 1;
+  unsigned try_dse : 1;
 
 
   modref_summary ();
   ~modref_summary ();
   void dump (FILE *);
   bool useful_p (int ecf_flags, bool check_flags = true);
-  void finalize ();
+  void finalize (tree);
 };
 
 modref_summary *get_modref_function_summary (cgraph_node *func);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-1.c b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-1.c
new file mode 100644
index 00000000000..1f80cc57c57
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dse1-details"  } */
+volatile int *ptr;
+struct a {
+	int a,b,c;
+} a;
+__attribute__((noinline))
+static int init (struct a*a)
+{
+	a->a=0;
+	a->b=1;
+}
+__attribute__((noinline))
+static int use (struct a*a)
+{
+	if (a->c != 3)
+		*ptr=5;
+}
+
+void
+main(void)
+{
+	struct a a;
+	init (&a);
+	a.c=3;
+	use (&a);
+}
+/* { dg-final { scan-tree-dump "Deleted dead store: init" "dse1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-2.c b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-2.c
new file mode 100644
index 00000000000..e41d065a5e3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-2.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dse2-details"  } */
+volatile int *ptr;
+struct a {
+	int a,b,c;
+} a;
+__attribute__((noinline))
+static int init (struct a*a)
+{
+	a->a=0;
+	a->b=1;
+	a->c=1;
+}
+__attribute__((noinline))
+static int use (struct a*a)
+{
+	if (a->c != 3)
+		*ptr=5;
+}
+
+void
+main(void)
+{
+	struct a a;
+	init (&a);
+	a.c=3;
+	use (&a);
+}
+/* Only DSE2 is tracking live bytes needed to figure out that store to c is
+   also dead above.  */
+/* { dg-final { scan-tree-dump "Deleted dead store: init" "dse2" } } */
diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
index 17ff6bb582c..2965902912f 100644
--- a/gcc/tree-ssa-alias.c
+++ b/gcc/tree-ssa-alias.c
@@ -782,7 +782,7 @@ ao_ref_alias_ptr_type (ao_ref *ref)
    The access is assumed to be only to or after of the pointer target adjusted
    by the offset, not before it (even in the case RANGE_KNOWN is false).  */
 
-static void
+void
 ao_ref_init_from_ptr_and_range (ao_ref *ref, tree ptr,
 				bool range_known,
 				poly_int64 offset,
diff --git a/gcc/tree-ssa-alias.h b/gcc/tree-ssa-alias.h
index 275dea10397..64d4865f58d 100644
--- a/gcc/tree-ssa-alias.h
+++ b/gcc/tree-ssa-alias.h
@@ -111,6 +111,9 @@ ao_ref::max_size_known_p () const
 /* In tree-ssa-alias.c  */
 extern void ao_ref_init (ao_ref *, tree);
 extern void ao_ref_init_from_ptr_and_size (ao_ref *, tree, tree);
+extern void ao_ref_init_from_ptr_and_range (ao_ref *, tree, bool,
+					    poly_int64, poly_int64,
+					    poly_int64);
 extern tree ao_ref_base (ao_ref *);
 extern alias_set_type ao_ref_alias_set (ao_ref *);
 extern alias_set_type ao_ref_base_alias_set (ao_ref *);
diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
index 27287fe88ee..0e8c4ed1435 100644
--- a/gcc/tree-ssa-dse.c
+++ b/gcc/tree-ssa-dse.c
@@ -40,6 +40,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimplify.h"
 #include "tree-eh.h"
 #include "cfganal.h"
+#include "cgraph.h"
+#include "ipa-modref-tree.h"
+#include "ipa-modref.h"
 
 /* This file implements dead store elimination.
 
@@ -1027,6 +1030,101 @@ delete_dead_or_redundant_assignment (gimple_stmt_iterator *gsi, const char *type
   release_defs (stmt);
 }
 
+/* Try to prove, using modref summary, that all memory written to by a call is
+   dead and remove it.  Assume that if return value is written to memory
+   it is already proved to be dead.  */
+
+static bool
+dse_optimize_call (gimple_stmt_iterator *gsi, sbitmap live_bytes)
+{
+  gcall *stmt = dyn_cast <gcall *> (gsi_stmt (*gsi));
+
+  if (!stmt)
+    return false;
+
+  tree callee = gimple_call_fndecl (stmt);
+
+  if (!callee)
+    return false;
+
+  /* Pure/const functions are optimized by normal DCE
+     or handled as store above.  */
+  int flags = gimple_call_flags (stmt);
+  if ((flags & (ECF_PURE|ECF_CONST|ECF_NOVOPS))
+      && !(flags & (ECF_LOOPING_CONST_OR_PURE)))
+    return false;
+
+  cgraph_node *node = cgraph_node::get (callee);
+  if (!node)
+    return false;
+
+  if (stmt_could_throw_p (cfun, stmt)
+      && !cfun->can_delete_dead_exceptions)
+    return false;
+
+  /* If return value is used the call is not dead.  */
+  tree lhs = gimple_call_lhs (stmt);
+  if (lhs && TREE_CODE (lhs) == SSA_NAME)
+    {
+      imm_use_iterator ui;
+      gimple *use_stmt;
+      FOR_EACH_IMM_USE_STMT (use_stmt, ui, lhs)
+	if (!is_gimple_debug (use_stmt))
+	  return false;
+    }
+
+  /* Verify that there are no side-effects except for return value
+     and memory writes tracked by modref.  */
+  modref_summary *summary = get_modref_function_summary (node);
+  if (!summary || !summary->try_dse)
+    return false;
+
+  modref_base_node <alias_set_type> *base_node;
+  modref_ref_node <alias_set_type> *ref_node;
+  modref_access_node *access_node;
+  size_t i, j, k;
+  bool by_clobber_p = false;
+
+  /* Walk all memory writes and verify that they are dead.  */
+  FOR_EACH_VEC_SAFE_ELT (summary->stores->bases, i, base_node)
+    FOR_EACH_VEC_SAFE_ELT (base_node->refs, j, ref_node)
+      FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node)
+	{
+	  gcc_checking_assert (access_node->parm_offset_known);
+
+	  tree arg;
+	  if (access_node->parm_index == MODREF_STATIC_CHAIN_PARM)
+	    arg = gimple_call_chain (stmt);
+	  else
+	    arg = gimple_call_arg (stmt, access_node->parm_index);
+
+	  ao_ref ref;
+	  poly_offset_int off = (poly_offset_int)access_node->offset
+		+ ((poly_offset_int)access_node->parm_offset
+		   << LOG2_BITS_PER_UNIT);
+	  poly_int64 off2;
+	  if (!off.to_shwi (&off2))
+	    return false;
+	  ao_ref_init_from_ptr_and_range
+		 (&ref, arg, true, off2, access_node->size,
+		  access_node->max_size);
+	  ref.ref_alias_set = ref_node->ref;
+	  ref.base_alias_set = base_node->base;
+
+	  bool byte_tracking_enabled
+	      = setup_live_bytes_from_ref (&ref, live_bytes);
+	  enum dse_store_status store_status;
+
+	  store_status = dse_classify_store (&ref, stmt,
+					     byte_tracking_enabled,
+					     live_bytes, &by_clobber_p);
+	  if (store_status != DSE_STORE_DEAD)
+	    return false;
+	}
+  delete_dead_or_redundant_assignment (gsi, "dead", need_eh_cleanup);
+  return true;
+}
+
 /* Attempt to eliminate dead stores in the statement referenced by BSI.
 
    A dead store is a store into a memory location which will later be
@@ -1050,8 +1148,13 @@ dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes)
     return;
 
   ao_ref ref;
+  /* If this is not a store we can still remove dead call using
+     modref summary.  */
   if (!initialize_ao_ref_for_dse (stmt, &ref))
-    return;
+    {
+      dse_optimize_call (gsi, live_bytes);
+      return;
+    }
 
   /* We know we have virtual definitions.  We can handle assignments and
      some builtin calls.  */
@@ -1162,6 +1265,9 @@ dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes)
 	  || (stmt_could_throw_p (fun, stmt)
 	      && !fun->can_delete_dead_exceptions)))
     {
+      /* See if we can remove complete call.  */
+      if (dse_optimize_call (gsi, live_bytes))
+	return;
       /* Make sure we do not remove a return slot we cannot reconstruct
 	 later.  */
       if (gimple_call_return_slot_opt_p (as_a <gcall *>(stmt))


More information about the Gcc-patches mailing list