[gcc r11-11490] cfgexpand: Workaround CSE of ADDR_EXPRs in VAR_DECL partitioning [PR113372]

Jakub Jelinek jakub@gcc.gnu.org
Thu Jun 20 13:21:51 GMT 2024


https://gcc.gnu.org/g:c845244dd7afae95689b8dd02a62c18932441583

commit r11-11490-gc845244dd7afae95689b8dd02a62c18932441583
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Tue Jan 16 11:49:34 2024 +0100

    cfgexpand: Workaround CSE of ADDR_EXPRs in VAR_DECL partitioning [PR113372]
    
    The following patch adds a quick workaround to bugs in VAR_DECL
    partitioning.
    The problem is that there is no dependency between ADDR_EXPRs of local
    decls and CLOBBERs of those vars, so VN can CSE uses of ADDR_EXPRs
    (including ivopts integral variants thereof), which can break
    add_scope_conflicts discovery of what variables are actually used
    in certain region.
    E.g. we can have
      ivtmp.40_3 = (unsigned long) &MEM <unsigned long[100]> [(void *)&bitint.6 + 8B];
    ...
      uses of ivtmp.40_3
    ...
      bitint.6 ={v} {CLOBBER(eos)};
    ...
      ivtmp.28_43 = (unsigned long) &MEM <unsigned long[100]> [(void *)&bitint.6 + 8B];
    ...
      uses of ivtmp.28_43
    before VN (such as dom3), which the add_scope_conflicts code identifies as 2
    independent uses of bitint.6 variable (which is correct), but then VN
    determines ivtmp.28_43 is the same as ivtmp.40_3 and just uses ivtmp.40_3
    even in the second region; at that point add_scope_conflict thinks the
    bitint.6 variable is not used in that region anymore.
    
    The following patch does a simple single def-stmt check for such ADDR_EXPRs
    (rather than say trying to do a full propagation of what SSA_NAMEs can
    contain ADDR_EXPRs of local variables), which seems to workaround all 4 PRs.
    
    In addition to this patch I've used the attached one to gather statistics
    on the total size of all variable partitions in a function and seems besides
    the new testcases nothing is really affected compared to no patch (I've
    actually just modified the patch to == OMP_SCAN instead of == ADDR_EXPR, so
    it looks the same except that it never triggers).  The comparison wasn't
    perfect because I've only gathered BITS_PER_WORD, main_input_filename (did
    some replacement of build directories and /tmp/ccXXXXXX names of LTO to make
    it more similar between the two bootstraps/regtests), current_function_name
    and the total size of all variable partitions if any, because I didn't
    record e.g. the optimization options and so e.g. torture tests which iterate
    over options could have different partition sizes even in one compiler when
    BITS_PER_WORD, main_input_filename and current_function_name are all equal.
    So had to write an awk script to check if the first triple in the second
    build appeared in the first one and the quadruple in the second build
    appeared in the first one too, otherwise print result and that only
    triggered in the new tests.
    Also, the cc1plus binary according to objdump -dr is identical between the
    two builds except for the ADDR_EXPR vs. OMP_SCAN constant in the two spots.
    
    2024-01-16  Jakub Jelinek  <jakub@redhat.com>
    
            PR tree-optimization/113372
            PR middle-end/90348
            PR middle-end/110115
            PR middle-end/111422
            * cfgexpand.c (add_scope_conflicts_2): New function.
            (add_scope_conflicts_1): Use it.
    
            * gcc.c-torture/execute/pr90348.c: New test.
            * gcc.c-torture/execute/pr110115.c: New test.
            * gcc.c-torture/execute/pr111422.c: New test.
    
    (cherry picked from commit 1251d3957de04dc9b023a23c09400217e13deadb)

Diff:
---
 gcc/cfgexpand.c                                | 30 +++++++++++++++--
 gcc/testsuite/gcc.c-torture/execute/pr110115.c | 45 ++++++++++++++++++++++++++
 gcc/testsuite/gcc.c-torture/execute/pr111422.c | 39 ++++++++++++++++++++++
 gcc/testsuite/gcc.c-torture/execute/pr90348.c  | 38 ++++++++++++++++++++++
 4 files changed, 150 insertions(+), 2 deletions(-)

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index d3768a6134ba..8b11ae424b56 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -571,6 +571,26 @@ visit_conflict (gimple *, tree op, tree, void *data)
   return false;
 }
 
+/* Helper function for add_scope_conflicts_1.  For USE on
+   a stmt, if it is a SSA_NAME and in its SSA_NAME_DEF_STMT is known to be
+   based on some ADDR_EXPR, invoke VISIT on that ADDR_EXPR.  */
+
+static inline void
+add_scope_conflicts_2 (tree use, bitmap work,
+		       walk_stmt_load_store_addr_fn visit)
+{
+  if (TREE_CODE (use) == SSA_NAME
+      && (POINTER_TYPE_P (TREE_TYPE (use))
+	  || INTEGRAL_TYPE_P (TREE_TYPE (use))))
+    {
+      gimple *g = SSA_NAME_DEF_STMT (use);
+      if (is_gimple_assign (g))
+	if (tree op = gimple_assign_rhs1 (g))
+	  if (TREE_CODE (op) == ADDR_EXPR)
+	    visit (g, TREE_OPERAND (op, 0), op, work);
+    }
+}
+
 /* Helper routine for add_scope_conflicts, calculating the active partitions
    at the end of BB, leaving the result in WORK.  We're called to generate
    conflicts when FOR_CONFLICT is true, otherwise we're just tracking
@@ -583,6 +603,8 @@ add_scope_conflicts_1 (basic_block bb, bitmap work, bool for_conflict)
   edge_iterator ei;
   gimple_stmt_iterator gsi;
   walk_stmt_load_store_addr_fn visit;
+  use_operand_p use_p;
+  ssa_op_iter iter;
 
   bitmap_clear (work);
   FOR_EACH_EDGE (e, ei, bb->preds)
@@ -593,7 +615,10 @@ add_scope_conflicts_1 (basic_block bb, bitmap work, bool for_conflict)
   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     {
       gimple *stmt = gsi_stmt (gsi);
+      gphi *phi = as_a <gphi *> (stmt);
       walk_stmt_load_store_addr_ops (stmt, work, NULL, NULL, visit);
+      FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
+	add_scope_conflicts_2 (USE_FROM_PTR (use_p), work, visit);
     }
   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     {
@@ -613,8 +638,7 @@ add_scope_conflicts_1 (basic_block bb, bitmap work, bool for_conflict)
 	}
       else if (!is_gimple_debug (stmt))
 	{
-	  if (for_conflict
-	      && visit == visit_op)
+	  if (for_conflict && visit == visit_op)
 	    {
 	      /* If this is the first real instruction in this BB we need
 	         to add conflicts for everything live at this point now.
@@ -634,6 +658,8 @@ add_scope_conflicts_1 (basic_block bb, bitmap work, bool for_conflict)
 	      visit = visit_conflict;
 	    }
 	  walk_stmt_load_store_addr_ops (stmt, work, visit, visit, visit);
+	  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
+	    add_scope_conflicts_2 (USE_FROM_PTR (use_p), work, visit);
 	}
     }
 }
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr110115.c b/gcc/testsuite/gcc.c-torture/execute/pr110115.c
new file mode 100644
index 000000000000..02dec54f9b23
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr110115.c
@@ -0,0 +1,45 @@
+/* PR middle-end/110115 */
+
+int a;
+signed char b;
+
+static int
+foo (signed char *e, int f)
+{
+  int d;
+  for (d = 0; d < f; d++)
+    e[d] = 0;
+  return d;
+}
+
+int
+bar (signed char e, int f)
+{
+  signed char h[20];
+  int i = foo (h, f);
+  return i;
+}
+
+int
+baz ()
+{
+  switch (a)
+    {
+    case 'f':
+      return 0;
+    default:
+      return ~0;
+    }
+}
+
+int
+main ()
+{
+  {
+    signed char *k[3];
+    int d;
+    for (d = 0; bar (8, 15) - 15 + d < 1; d++)
+      k[baz () + 1] = &b;
+    *k[0] = -*k[0];
+  }
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111422.c b/gcc/testsuite/gcc.c-torture/execute/pr111422.c
new file mode 100644
index 000000000000..f5920dd90934
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111422.c
@@ -0,0 +1,39 @@
+/* PR middle-end/111422 */
+
+int a, b;
+int *c = &b;
+unsigned d;
+signed char e;
+int f = 1;
+
+int
+foo (int k, signed char *l)
+{
+  if (k < 6)
+    return a;
+  l[0] = l[1] = l[k - 1] = 8;
+  return 0;
+}
+
+int
+bar (int k)
+{
+  signed char g[11];
+  int h = foo (k, g);
+  return h;
+}
+
+int
+main ()
+{
+  for (; b < 8; b = b + 1)
+    ;
+  int j;
+  int *n[8];
+  for (j = 0; 18446744073709551608ULL + bar (*c) + *c + j < 2; j++)
+    n[j] = &f;
+  for (; e <= 4; e++)
+    d = *n[0] == f;
+  if (d != 1)
+    __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr90348.c b/gcc/testsuite/gcc.c-torture/execute/pr90348.c
new file mode 100644
index 000000000000..341cedd2435d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr90348.c
@@ -0,0 +1,38 @@
+/* PR middle-end/90348 */
+
+void __attribute__ ((noipa))
+set_one (unsigned char *ptr)
+{
+  *ptr = 1;
+}
+
+void __attribute__ ((noipa))
+check_zero (unsigned char const *in, unsigned int len)
+{
+  for (unsigned int i = 0; i < len; ++i)
+    if (in[i] != 0)
+      __builtin_abort ();
+}
+
+static void
+set_one_on_stack (void)
+{
+  unsigned char buf[1];
+  set_one (buf);
+}
+
+int
+main ()
+{
+  for (int i = 0; i <= 4; ++i)
+    {
+      unsigned char in[4];
+      for (int j = 0; j < i; ++j)
+	{
+	  in[j] = 0;
+	  set_one_on_stack ();
+	}
+      check_zero (in, i);
+    }
+  return 0;
+}


More information about the Gcc-cvs mailing list