Use modref summary to DSE calls to non-pure functions

Jan Hubicka hubicka@kam.mff.cuni.cz
Wed Nov 10 12:43:16 GMT 2021


Hi,
this patch implements DSE using modref summaries: if function has no side effects
besides storing to memory pointed to by its argument and if we can prove those stores
to be dead, we can optimize out. So we handle for example:

volatile int *ptr;
struct a {
	int a,b,c;
} a;
__attribute__((noinline))
static int init (struct a*a)
{
	a->a=0;
	a->b=1;
}
__attribute__((noinline))
static int use (struct a*a)
{
	if (a->c != 3)
		*ptr=5;
}

void
main(void)
{
	struct a a;
	init (&a);
	a.c=3;
	use (&a);
}

And optimize out call to init (&a).

We work quite hard to inline such constructors and this patch is only
effective if inlining did not happen (for whatever reason).  Still, we
optimize about 26 calls building tramp3d and about 70 calls during
bootstrap (mostly ctors of poly_int). During bootstrap most removal
happens early and we would inline the ctors unless we decide to optimize
for size. 1 call per cc1* binary is removed late during LTO build.

This is more frequent in codebases with higher abstraction penalty, with
-Os or with profile feedback in sections optimized for size. I also hope
we will be able to CSE such calls and that would make DSE more
important.

Bootstrapped/regtested x86_64-linux, OK?

gcc/ChangeLog:

	* tree-ssa-alias.c (ao_ref_alias_ptr_type): Export.
	* tree-ssa-alias.h (ao_ref_init_from_ptr_and_range): Declare.
	* tree-ssa-dse.c (dse_optimize_stmt): Rename to ...
	(dse_optimize_store): ... this;
	(dse_optimize_call): New function.
	(pass_dse::execute): Use dse_optimize_call and update
	call to dse_optimize_store.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/modref-dse-1.c: New test.
	* gcc.dg/tree-ssa/modref-dse-2.c: New test.

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-1.c b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-1.c
new file mode 100644
index 00000000000..e78693b349a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dse1"  } */
+volatile int *ptr;
+struct a {
+	int a,b,c;
+} a;
+__attribute__((noinline))
+static int init (struct a*a)
+{
+	a->a=0;
+	a->b=1;
+}
+__attribute__((noinline))
+static int use (struct a*a)
+{
+	if (a->c != 3)
+		*ptr=5;
+}
+
+void
+main(void)
+{
+	struct a a;
+	init (&a);
+	a.c=3;
+	use (&a);
+}
+/* { dg-final { scan-tree-dump "Deleted dead store: init" "dse1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-2.c b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-2.c
new file mode 100644
index 00000000000..99c8ceb8127
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-2.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dse2 -fno-ipa-sra -fno-ipa-cp"  } */
+volatile int *ptr;
+struct a {
+	int a,b,c;
+} a;
+__attribute__((noinline))
+static int init (struct a*a)
+{
+	a->a=0;
+	a->b=1;
+	a->c=1;
+}
+__attribute__((noinline))
+static int use (struct a*a)
+{
+	if (a->c != 3)
+		*ptr=5;
+}
+
+void
+main(void)
+{
+	struct a a;
+	init (&a);
+	a.c=3;
+	use (&a);
+}
+/* Only DSE2 is tracking live bytes needed to figure out that store to c is
+   also dead above.  */
+/* { dg-final { scan-tree-dump "Deleted dead store: init" "dse2" } } */
diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
index eabf6805f2b..affb5d40d4b 100644
--- a/gcc/tree-ssa-alias.c
+++ b/gcc/tree-ssa-alias.c
@@ -782,7 +782,7 @@ ao_ref_alias_ptr_type (ao_ref *ref)
    The access is assumed to be only to or after of the pointer target adjusted
    by the offset, not before it (even in the case RANGE_KNOWN is false).  */
 
-static void
+void
 ao_ref_init_from_ptr_and_range (ao_ref *ref, tree ptr,
 				bool range_known,
 				poly_int64 offset,
diff --git a/gcc/tree-ssa-alias.h b/gcc/tree-ssa-alias.h
index 275dea10397..c2e28a74999 100644
--- a/gcc/tree-ssa-alias.h
+++ b/gcc/tree-ssa-alias.h
@@ -111,6 +111,8 @@ ao_ref::max_size_known_p () const
 /* In tree-ssa-alias.c  */
 extern void ao_ref_init (ao_ref *, tree);
 extern void ao_ref_init_from_ptr_and_size (ao_ref *, tree, tree);
+void ao_ref_init_from_ptr_and_range (ao_ref *, tree, bool,
+				     poly_int64, poly_int64, poly_int64);
 extern tree ao_ref_base (ao_ref *);
 extern alias_set_type ao_ref_alias_set (ao_ref *);
 extern alias_set_type ao_ref_base_alias_set (ao_ref *);
diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
index 27287fe88ee..1fec9100011 100644
--- a/gcc/tree-ssa-dse.c
+++ b/gcc/tree-ssa-dse.c
@@ -40,6 +40,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimplify.h"
 #include "tree-eh.h"
 #include "cfganal.h"
+#include "cgraph.h"
+#include "ipa-modref-tree.h"
+#include "ipa-modref.h"
 
 /* This file implements dead store elimination.
 
@@ -1039,7 +1042,8 @@ delete_dead_or_redundant_assignment (gimple_stmt_iterator *gsi, const char *type
    post dominates the first store, then the first store is dead.  */
 
 static void
-dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes)
+dse_optimize_store (function *fun, gimple_stmt_iterator *gsi,
+		    sbitmap live_bytes)
 {
   gimple *stmt = gsi_stmt (*gsi);
 
@@ -1182,6 +1186,128 @@ dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes)
     delete_dead_or_redundant_assignment (gsi, "dead", need_eh_cleanup);
 }
 
+/* Try to prove, using modref summary, that all memory written to by a call is
+   dead and remove it.  */
+
+static bool
+dse_optimize_call (gimple_stmt_iterator *gsi, sbitmap live_bytes)
+{
+  gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
+  tree callee = gimple_call_fndecl (stmt);
+
+  if (!callee)
+    return false;
+
+  /* Pure/const functions are optimized by normal DCE
+     or handled as store above.  */
+  int flags = gimple_call_flags (stmt);
+  if ((flags & (ECF_PURE|ECF_CONST|ECF_NOVOPS))
+      && !(flags & (ECF_LOOPING_CONST_OR_PURE)))
+    return false;
+
+  cgraph_node *node = cgraph_node::get (callee);
+  if (!node)
+    return false;
+
+  if (stmt_could_throw_p (cfun, stmt)
+      && !cfun->can_delete_dead_exceptions)
+    return false;
+
+  /* If return value is used the call is not dead.  */
+  tree lhs = gimple_call_lhs (stmt);
+  if (lhs && TREE_CODE (lhs) == SSA_NAME)
+    {
+      imm_use_iterator ui;
+      gimple *use_stmt;
+      FOR_EACH_IMM_USE_STMT (use_stmt, ui, lhs)
+	if (!is_gimple_debug (use_stmt))
+	  return false;
+    }
+
+  /* Verify that there are no side-effects except for return value
+     and memory writes tracked by modref.  */
+  modref_summary *summary = get_modref_function_summary (node);
+  if (!summary || summary->writes_errno
+      || summary->side_effects
+      || summary->global_memory_written_p ())
+    return false;
+
+  modref_base_node <alias_set_type> *base_node;
+  modref_ref_node <alias_set_type> *ref_node;
+  modref_access_node *access_node;
+  size_t i, j, k;
+  bool by_clobber_p = false;
+  int num_tests = 0, max_tests = param_modref_max_tests;
+
+  /* Unlike alias oracle we can not skip subtrees based on TBAA check.
+     Count the size of the whole tree to verify that we will not need too many
+     tests.  */
+  FOR_EACH_VEC_SAFE_ELT (summary->stores->bases, i, base_node)
+    FOR_EACH_VEC_SAFE_ELT (base_node->refs, j, ref_node)
+      FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node)
+	if (num_tests++ > max_tests)
+	  return false;
+
+  /* Walk all memory writes and verify that they are dead.  */
+  FOR_EACH_VEC_SAFE_ELT (summary->stores->bases, i, base_node)
+    FOR_EACH_VEC_SAFE_ELT (base_node->refs, j, ref_node)
+      FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node)
+	{
+	  /* ??? if offset is unkonwn it may be negative.  Not sure
+	     how to construct ref here.  */
+	  if (!access_node->parm_offset_known)
+	    return false;
+
+	  tree arg;
+	  if (access_node->parm_index == MODREF_STATIC_CHAIN_PARM)
+	    arg = gimple_call_chain (stmt);
+	  else
+	    arg = gimple_call_arg (stmt, access_node->parm_index);
+
+	  ao_ref ref;
+	  poly_offset_int off = (poly_offset_int)access_node->offset
+		+ ((poly_offset_int)access_node->parm_offset
+		   << LOG2_BITS_PER_UNIT);
+	  poly_int64 off2;
+	  if (!off.to_shwi (&off2))
+	    return false;
+	  ao_ref_init_from_ptr_and_range
+		 (&ref, arg, true, off2, access_node->size,
+		  access_node->max_size);
+	  ref.ref_alias_set = ref_node->ref;
+	  ref.base_alias_set = base_node->base;
+
+	  bool byte_tracking_enabled
+	      = setup_live_bytes_from_ref (&ref, live_bytes);
+	  enum dse_store_status store_status;
+
+	  store_status = dse_classify_store (&ref, stmt,
+					     byte_tracking_enabled,
+					     live_bytes, &by_clobber_p);
+	  if (store_status != DSE_STORE_DEAD)
+	    return false;
+	}
+  /* Check also value stored by the call.  */
+  if (gimple_store_p (stmt))
+    {
+      ao_ref ref;
+
+      if (!initialize_ao_ref_for_dse (stmt, &ref))
+	gcc_unreachable ();
+      bool byte_tracking_enabled
+	  = setup_live_bytes_from_ref (&ref, live_bytes);
+      enum dse_store_status store_status;
+
+      store_status = dse_classify_store (&ref, stmt,
+					 byte_tracking_enabled,
+					 live_bytes, &by_clobber_p);
+      if (store_status != DSE_STORE_DEAD)
+	return false;
+    }
+  delete_dead_or_redundant_assignment (gsi, "dead", need_eh_cleanup);
+  return true;
+}
+
 namespace {
 
 const pass_data pass_data_dse =
@@ -1235,7 +1363,14 @@ pass_dse::execute (function *fun)
 	  gimple *stmt = gsi_stmt (gsi);
 
 	  if (gimple_vdef (stmt))
-	    dse_optimize_stmt (fun, &gsi, live_bytes);
+	    {
+	      gcall *call = dyn_cast <gcall *> (stmt);
+
+	      if (call && dse_optimize_call (&gsi, live_bytes))
+		/* We removed a dead call.  */;
+	      else
+		dse_optimize_store (fun, &gsi, live_bytes);
+	    }
 	  else if (def_operand_p
 		     def_p = single_ssa_def_operand (stmt, SSA_OP_DEF))
 	    {


More information about the Gcc-patches mailing list