This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RFC] [P2] [PR tree-optimization/33562] Lowering more complex assignments.


On 02/17/2016 03:48 AM, Richard Biener wrote:

I instrumented a bootstrap -- the improved DSE finds ~20k additional DSE
opportunities during a GCC bootstrap that could not be found by the current
DSE.  Yes, 20k additional statements deleted by tree DSE.  Yow!

Well, DCE also can do quite some DSE and it runs after DSE - did that 20k
more DSE affect the overall end-result?
I haven't looked at that yet. I just got the instrumentation data last night.


Of those additional opportunities > 99% are for sizes of 64 bytes or
smaller.  Thus we can pack those into 1 or 2 bitmap elements, depending on
the starting offset.  So the bitmap side will be efficient with no real
searching if we choose our PARAM value wisely.

So then please use a uint64_t or even uint32_t mask please.  Which means
a fixed size SBITMAP (32 bits) if you like to use the bitmap interface.
I actually prefer the standard bitmap interface as it seamlessly handles differences in the starting offset for the writes.


Can you share your work-in-progress patch?
Easy 'nuff. This will bootstrap and regression test. Was planning to spend today generating some additional testcodes from new cases it catches and looking at impacts on code generation.

I'm particularly interested in any impact on the zero-sized object clobbers. I'd like to remove the bits which filter those out.

It feels like there's some refactoring that ought to happen in this code. Both in terms of the mostly duplicated test that a particular ref is "interesting" and with mostly duplicated code to extract a ref from a mem* or assignment.

jeff

commit d49afd895524df98c5e53280b1c77f4b61a45ba3
Author: Jeff Law <law@tor.usersys.redhat.com>
Date:   Tue Feb 16 13:44:20 2016 -0500

    Checkpoint
    
    CHeckpoint
    
    Another checkpoint
    
    Checkpoint

diff --git a/gcc/params.def b/gcc/params.def
index c0494fa..5aa146b 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -520,6 +520,11 @@ DEFPARAM(PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND,
 	 "If number of candidates in the set is smaller, we always try to remove unused ivs during its optimization.",
 	 10, 0, 0)
 
+DEFPARAM(PARAM_DSE_MAX_OBJECT_SIZE,
+ 	 "dse-max-object-size",
+	 "Maximum size (in bytes) of objects tracked by dead store elimination.",
+	 64, 0, 0)
+
 DEFPARAM(PARAM_SCEV_MAX_EXPR_SIZE,
  	 "scev-max-expr-size",
 	 "Bound on size of expressions used in the scalar evolutions analyzer.",
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/complex-4.c b/gcc/testsuite/gcc.dg/tree-ssa/complex-4.c
index 87a2638..3155741 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/complex-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/complex-4.c
@@ -10,4 +10,4 @@ int f(void)
   return g(&t);
 }
 
-/* { dg-final { scan-tree-dump-times "__complex__" 0 "optimized" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "__complex__" 0 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/complex-5.c b/gcc/testsuite/gcc.dg/tree-ssa/complex-5.c
index e2cd403..e6d027f 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/complex-5.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/complex-5.c
@@ -8,4 +8,4 @@ int f(void)
  __imag__ t = 2;
 }
 
-/* { dg-final { scan-tree-dump-times "__complex__" 0 "optimized" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "__complex__" 0 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-9.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-9.c
index 594c20c..ae48ddd 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-9.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-9.c
@@ -11,4 +11,4 @@ foo ()
 }
 
 /* We should eliminate the first assignment.  */
-/* { dg-final { scan-tree-dump-times "VDEF" 2 "dse1" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "VDEF" 2 "dse1" } } */
diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
index 372a0be..97a091b 100644
--- a/gcc/tree-ssa-dse.c
+++ b/gcc/tree-ssa-dse.c
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-dfa.h"
 #include "domwalk.h"
 #include "tree-cfgcleanup.h"
+#include "params.h"
 
 /* This file implements dead store elimination.
 
@@ -68,6 +69,58 @@ along with GCC; see the file COPYING3.  If not see
    remove their dead edges eventually.  */
 static bitmap need_eh_cleanup;
 
+/* Clear any bytes written by STMT from the bitmap LIVE_BYTES.  The base
+   address written by STMT must match the one found in REF, which must
+   have its base address previously initialized.
+
+   This routine must be conservative.  If we don't know the offset or
+   actual size written, assume nothing was written.  */
+
+static void
+clear_bytes_written_by (bitmap live_bytes, gimple *stmt, ao_ref *ref)
+{
+  ao_ref write;
+  write.base = NULL;
+
+  /* It's advantageous to handle certain mem* functions.  */
+  if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
+    {
+      switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
+	{
+	  case BUILT_IN_MEMCPY:
+	  case BUILT_IN_MEMMOVE:
+	  case BUILT_IN_MEMSET:
+	    {
+	      tree size = NULL_TREE;
+	      if (gimple_call_num_args (stmt) == 3)
+		size = gimple_call_arg (stmt, 2);
+	      tree ptr = gimple_call_arg (stmt, 0);
+	      ao_ref_init_from_ptr_and_size (&write, ptr, size);
+	      ao_ref_base (&write);
+	    }
+	  default:
+	    break;
+	}
+    }
+  else if (is_gimple_assign (stmt))
+    {
+      ao_ref_init (&write, gimple_assign_lhs (stmt));
+      ao_ref_base (&write);
+    }
+
+  /* Verify we have the same base memory address and that the write
+     has a known size.  If so, then clear the appropriate bytes in
+     the LIVE_BYTES bitmap.  */
+  if (write.base
+      && write.base == ref->base
+      && write.size == write.max_size
+      && (write.size % BITS_PER_UNIT) == 0
+      && (write.offset % BITS_PER_UNIT) == 0
+      && write.max_size != -1)
+    bitmap_clear_range (live_bytes,
+			write.offset / BITS_PER_UNIT,
+			write.size / BITS_PER_UNIT);
+}
 
 /* A helper of dse_optimize_stmt.
    Given a GIMPLE_ASSIGN in STMT that writes to REF, find a candidate
@@ -79,9 +132,33 @@ dse_possible_dead_store_p (ao_ref *ref, gimple *stmt, gimple **use_stmt)
 {
   gimple *temp;
   unsigned cnt = 0;
+  bitmap live_bytes = NULL;
 
   *use_stmt = NULL;
 
+  /* REF is a memory write.  Go ahead and get its base, size, extent
+     information and encode the bytes written into LIVE_BYTES.  We can
+     handle any case where we have a known base and maximum size.
+
+     However, experimentation has shown that bit level tracking is not
+     useful in practice, so we only track at the byte level.
+
+     Furthermore, experimentation has shown that 99% of the cases
+     that require byte tracking are 64 bytes or less.  Tracking 64
+     bytes also happens to fit nicely into a bitmap element.  */
+  if (ao_ref_base (ref)
+      && ref->max_size
+      && (ref->offset % BITS_PER_UNIT) == 0
+      && (ref->max_size % BITS_PER_UNIT) == 0
+      && ref->max_size <= PARAM_VALUE (PARAM_DSE_MAX_OBJECT_SIZE)
+      && ref->max_size != -1)
+    {
+      live_bytes = BITMAP_ALLOC (NULL);
+      bitmap_set_range (live_bytes,
+			ref->offset / BITS_PER_UNIT,
+			ref->max_size / BITS_PER_UNIT);
+    }
+
   /* Find the first dominated statement that clobbers (part of) the
      memory stmt stores to with no intermediate statement that may use
      part of the memory stmt stores.  That is, find a store that may
@@ -177,11 +254,18 @@ dse_possible_dead_store_p (ao_ref *ref, gimple *stmt, gimple **use_stmt)
 	  temp = stmt;
 	  break;
 	}
+
+      if (live_bytes && temp)
+	clear_bytes_written_by (live_bytes, temp, ref);
     }
-  /* Continue walking until we reach a kill.  */
-  while (!stmt_kills_ref_p (temp, ref));
+  /* Continue walking until we reach a full kill as a single statement
+     or there are no more live bytes.  */
+  while (!stmt_kills_ref_p (temp, ref)
+	 && !(live_bytes && bitmap_empty_p (live_bytes)));
 
   *use_stmt = temp;
+  if (live_bytes)
+    BITMAP_FREE (live_bytes);
 
   return true;
 }

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]