[RFA] [tree-optimization/80576] Handle non-constant sizes in DSE

Jeff Law law@redhat.com
Fri Aug 16 20:41:00 GMT 2019


On 8/16/19 12:09 PM, Marc Glisse wrote:
> On Fri, 16 Aug 2019, Jeff Law wrote:
> 
>> This patch improves our ability to detect dead stores by handling cases
>> where the size memcpy, memset, strncpy, etc call is not constant.  This
>> addresses some, but not all, of the issues in 80576.
>>
>> The key here is when the size is not constant we can make conservative
>> decisions that still give us a chance to analyze the code for dead
>> stores.
>>
>> Remember that for dead store elimination, we're trying to prove that
>> given two stores, the second store overwrites (partially or fully) the
>> same memory locations as the first store.  That makes the first store
>> either partially or fully dead.
>>
>> When we encounter the first store, we set up a bitmap of bytes written
>> by that store (live_bytes).  We then look at subsequent stores and clear
>> the appropriate entries in the bitmap.
>>
>> If the first store has a nonconstant length argument we can use the
>> range of the length argument (max) and the size of the destination
>> object to make a conservative estimation of how many bytes are written.
>>
>> For the second store the conservative thing to do for a non-constant
>> length is to use the minimum of the range of the length argument.
> 
> So I guess it won't handle things like
> 
> void f(char*p,int n){
>   __builtin_memset(p,3,n);
>   __builtin_memset(p,7,n);
> }
> 
> where we know nothing about the length, except that it is the same? Or
> do you look at symbolic ranges?
Nope.  I think ao_ref can represent that, so it'd just be a matter of
recording "n" as the length, then verifying that the second call's
length is "n" as well.  That makes the first call dead.  We'd have to
bypass the byte tracking in that case, but I think that's trivial
because we already have a means to do that when the sizes are too large.

> 
>> This doesn't come up a lot in practice.  But it also happens to put some
>> of the infrastructure in place to handle strcpy and strcpy_chk which are
>> needed to fully resolve 80576.
>>
>> Bootstrapped and regression tested on x86, x86_64, ppc64le, ppc64,
>> ppc32, aarch64, sparc, s390x and probably others.  Also verified that
>> the tests work on the various *-elf targets in my tester.
>>
>> OK for the trunk?
> 
> ENOPATCH
Opps.    Attached.

Jeff


-------------- next part --------------
	PR tree-optimizatoin/80576
	* tree-ssa-dse.c: Include builtins.h and gimple-fold.h.
	(objsize_from_type): New function.
	(initialize_ao_ref_for_dse): Add new argument.  Handle non
	constant sizes for currently supported builtin calls.
	(clear_bytes_written_by): Pass new argument to
	initialize_ao_ref_for_dse.
	(dse_optimize_redundant_stores): Likewise.
	(dse_dom_walker::dse_optimize_stmt): Likewise.  Do not trim
	calls if the size is not constant.

	* gcc.dg/tree-ssa/ssa-dse-39.c: New test.
	* gcc.dg/tree-ssa/ssa-dse-40.c: New test.

diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
index 5b7c4fc6d1a..ae03980f792 100644
--- a/gcc/tree-ssa-dse.c
+++ b/gcc/tree-ssa-dse.c
@@ -36,6 +36,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "params.h"
 #include "alias.h"
 #include "tree-ssa-loop.h"
+#include "builtins.h"
+#include "gimple-fold.h"
 
 /* This file implements dead store elimination.
 
@@ -91,16 +93,47 @@ enum dse_store_status
   DSE_STORE_DEAD
 };
 
+/* OBJECT is an ADDR_EXPR.  If its underlying type is an array,
+   return the size of the array in bytes.  */
+
+static tree
+objsize_from_type (tree object)
+{
+  if (TREE_CODE (object) != ADDR_EXPR)
+    return NULL_TREE;
+
+  tree type = TREE_TYPE (object);
+  if (POINTER_TYPE_P (type))
+    type = TREE_TYPE (type);
+
+  type = TYPE_MAIN_VARIANT (type);
+
+  if (TREE_CODE (type) == ARRAY_TYPE && !array_at_struct_end_p (object))
+    {
+      tree t = TYPE_SIZE_UNIT (type);
+      if (t && TREE_CODE (t) == INTEGER_CST && !integer_zerop (t))
+	return t;
+    }
+
+  return NULL_TREE;
+}
+
 /* STMT is a statement that may write into memory.  Analyze it and
    initialize WRITE to describe how STMT affects memory.
 
+   If MAXLEN is true, then we are computing how many bytes this write
+   might perform.  When false we are computing the minimum number of
+   bytes this write may perform.  This only matters for calls like
+   strcpy where the number of bytes written is determined by the length
+   of the input string operand.
+
    Return TRUE if the the statement was analyzed, FALSE otherwise.
 
    It is always safe to return FALSE.  But typically better optimziation
    can be achieved by analyzing more statements.  */
 
 static bool
-initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write)
+initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write, bool maxlen)
 {
   /* It's advantageous to handle certain mem* functions.  */
   if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
@@ -117,8 +150,34 @@ initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write)
 	case BUILT_IN_STRNCPY_CHK:
 	  {
 	    tree size = gimple_call_arg (stmt, 2);
-	    tree ptr = gimple_call_arg (stmt, 0);
-	    ao_ref_init_from_ptr_and_size (write, ptr, size);
+	    tree dest = gimple_call_arg (stmt, 0);
+
+	    if (TREE_CODE (size) != INTEGER_CST)
+	      {
+		wide_int minbound, maxbound;
+		value_range_kind rng = get_range_info (size, &minbound, &maxbound);
+		if (rng == VR_RANGE)
+		  size = wide_int_to_tree (TREE_TYPE (size),
+					   maxlen ? maxbound : minbound);
+	      }
+
+	    /* Constrain the maximum length to the size of the destination
+	       object.  This is primarily useful when we have a nonconstant
+	       range which might be somewhat pessimistic.  */
+	    if (maxlen)
+	      {
+		tree type_size = objsize_from_type (dest);
+		if (TREE_CODE (size) != INTEGER_CST)
+		  size = type_size;
+		else if (type_size)
+		  size = fold_build2 (MIN_EXPR, TREE_TYPE (dest),
+				      size, type_size);
+	      }
+
+	    if (size == NULL_TREE || TREE_CODE (size) != INTEGER_CST)
+	      return false;
+
+	    ao_ref_init_from_ptr_and_size (write, dest, size);
 	    return true;
 	  }
 
@@ -219,7 +278,7 @@ static void
 clear_bytes_written_by (sbitmap live_bytes, gimple *stmt, ao_ref *ref)
 {
   ao_ref write;
-  if (!initialize_ao_ref_for_dse (stmt, &write))
+  if (!initialize_ao_ref_for_dse (stmt, &write, false))
     return;
 
   /* Verify we have the same base memory address, the write
@@ -641,7 +700,7 @@ dse_optimize_redundant_stores (gimple *stmt)
 	{
 	  ao_ref write;
 
-	  if (!initialize_ao_ref_for_dse (use_stmt, &write))
+	  if (!initialize_ao_ref_for_dse (use_stmt, &write, false))
 	    BREAK_FROM_IMM_USE_STMT (ui)
 
 	  if (valid_ao_ref_for_dse (&write)
@@ -956,7 +1015,7 @@ dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi)
     return;
 
   ao_ref ref;
-  if (!initialize_ao_ref_for_dse (stmt, &ref))
+  if (!initialize_ao_ref_for_dse (stmt, &ref, true))
     return;
 
   /* We know we have virtual definitions.  We can handle assignments and
@@ -1002,7 +1061,13 @@ dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi)
 	    if (store_status == DSE_STORE_LIVE)
 	      return;
 
-	    if (store_status == DSE_STORE_MAYBE_PARTIAL_DEAD)
+	    /* If this store did not have a constant size, then do not
+	       try to trim the partially redundant store.
+
+	       We could possible compute a new size at runtime which
+	       could be faster because we avoid memory traffic.  */
+	    if (TREE_CODE (size) == INTEGER_CST
+		&& store_status == DSE_STORE_MAYBE_PARTIAL_DEAD)
 	      {
 		maybe_trim_memstar_call (&ref, m_live_bytes, stmt);
 		return;
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-39.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-39.c
new file mode 100644
index 00000000000..0b0fa618844
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-39.c
@@ -0,0 +1,44 @@
+/* { dg-options "-O2 -fdump-tree-dse-details" } */
+
+extern void frob (char *);
+
+void g (char *s, int x)
+{
+  char a[8];
+  __builtin_strncpy (a, s, x);
+  __builtin_memset (a, 0, sizeof a); 
+  frob (a);
+}
+
+void h (char *s, int x)
+{
+  char a[8];
+  __builtin_memset (a, 0, x); 
+  __builtin_strncpy (a, s, sizeof a);
+  frob (a);
+}
+
+void i (char *s, int x)
+{
+  char a[8];
+  __builtin_strncpy (a, s, sizeof a);
+  __builtin_memset (a, 0, x); 
+  frob (a);
+}
+
+void j (char *s, int x)
+{
+  char a[8];
+  __builtin_memset (a, 0, sizeof a); 
+  __builtin_strncpy (a, s, x);
+  frob (a);
+}
+
+/* We can remove the dead stores/calls in the first two functions
+   since in both cases the second call wipes the entire object.
+
+   There's nothing we can do in the last two functions since we
+   know nothing about the extent of the second store.  */
+/* { dg-final { scan-tree-dump-times "Deleted dead call" 2 "dse1" } } */
+/* { dg-final { scan-tree-dump-not "Trimming statement " "dse1" } } */
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-40.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-40.c
new file mode 100644
index 00000000000..0f431b8089f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-40.c
@@ -0,0 +1,63 @@
+/* { dg-options "-O2 -fdump-tree-dse-details" } */
+
+extern void frob (char *);
+
+void g (char *s, int x)
+{
+  char a[8];
+  __builtin_strncpy (a, s, sizeof a);
+  __builtin_memset (a, 0, x ? 8 : 10); 
+  frob (a);
+}
+
+void h (char *s, int x)
+{
+  char a[8];
+  __builtin_memset (a, 0, sizeof a); 
+  __builtin_strncpy (a, s, x ? 8 : 10);
+  frob (a);
+}
+
+void i (char *s, int x)
+{
+  char a[8];
+  __builtin_strncpy (a, s, sizeof a);
+  __builtin_memset (a, 0, x ? 6 : 8); 
+  frob (a);
+}
+
+void j (char *s, int x)
+{
+  char a[8];
+  __builtin_memset (a, 0, sizeof a); 
+  __builtin_strncpy (a, s, x ? 6 : 8);
+  frob (a);
+}
+
+void k (char *s, int x)
+{
+  char a[8];
+  __builtin_strncpy (a, s, sizeof a);
+  __builtin_memset (&a[6], 0, x ? 2 : 4); 
+  frob (a);
+}
+
+void l (char *s, int x)
+{
+  char a[8];
+  __builtin_memset (a, 0, sizeof a); 
+  __builtin_strncpy (&a[6], s, x ? 2 : 4);
+  frob (a);
+}
+
+/* We can remove the dead stores/calls in the first two functions
+   since in both cases the second call wipes the entire object.
+
+   In the 3rd and 4th case we can trim up to 6 bytes off the
+   head of the store. 
+
+   In the last two tests we can trim 2 bytes from the tail.  */
+
+/* { dg-final { scan-tree-dump-times "Deleted dead call" 2 "dse1" } } */
+/* { dg-final { scan-tree-dump-times "Trimming statement"  4 "dse1" } } */
+


More information about the Gcc-patches mailing list