This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Optimiza aggregate a = b = c = {} (PR c/78408, take 2)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Richard Biener <rguenther at suse dot de>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Thu, 15 Dec 2016 17:41:26 +0100
- Subject: [PATCH] Optimiza aggregate a = b = c = {} (PR c/78408, take 2)
- Authentication-results: sourceware.org; auth=none
- References: <20161125193219.GP3541@tucnak.redhat.com> <alpine.LSU.2.11.1611281044020.5294@t29.fhfr.qr> <20161213113648.GP3541@tucnak.redhat.com> <alpine.LSU.2.11.1612141317250.5294@t29.fhfr.qr>
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
On Wed, Dec 14, 2016 at 01:27:56PM +0100, Richard Biener wrote:
> Ah, ok. But then the size of the memset shouldn't be compared
> against the get_ref_base_and_extend size from src2 but to the
> size of the access of SRC/DEST (clearly looking at the "size" of
> the ADDR_EXPR argument is somewhat bogus).
> And as you compare src and dest
> with operand_equal_p there is no need to reject ssize != max_size
> either (you'd of course not see memset (&a[i].x, 0, 400) because
> &a[i].x is not invariant, you'd need to lookup the SSA def for a pointer
> here).
>
> You can get at the size of an actual access simpler than by
> the full-blown get_ref_base_and_extent (just outline a
> get_ref_size () from the head part of it.
>
> I still think that using get_addr_base_and_unit_offset on the
> address is better and passing decomposed (base, offset, size)
> triplets to optimize_memcpy would also save you the
> MEM[(char * {ref-all})&b] = MEM[(char * {ref-all})&a]; special-case.
Here is an updated patch that does that (i.e. always work with base, offset,
length triplets) and drops the alias check for dest vs. src overlap.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2016-12-15 Jakub Jelinek <jakub@redhat.com>
PR c/78408
* tree-ssa-ccp.c: Include tree-dfa.h.
(optimize_memcpy): New function.
(pass_fold_builtins::execute): Use it. Remove useless conditional
break after BUILT_IN_VA_*.
* gcc.dg/pr78408-1.c: New test.
* gcc.dg/pr78408-2.c: New test.
--- gcc/tree-ssa-ccp.c.jj 2016-11-28 16:19:11.000000000 +0100
+++ gcc/tree-ssa-ccp.c 2016-12-15 15:09:03.180993745 +0100
@@ -143,6 +143,7 @@ along with GCC; see the file COPYING3.
#include "stor-layout.h"
#include "optabs-query.h"
#include "tree-ssa-ccp.h"
+#include "tree-dfa.h"
/* Possible lattice values. */
typedef enum
@@ -2933,6 +2934,113 @@ optimize_atomic_bit_test_and (gimple_stm
release_ssa_name (lhs);
}
+/* Optimize
+ a = {};
+ b = a;
+ into
+ a = {};
+ b = {};
+ Similarly for memset (&a, ..., sizeof (a)); instead of a = {};
+ and/or memcpy (&b, &a, sizeof (a)); instead of b = a; */
+
+static void
+optimize_memcpy (gimple_stmt_iterator *gsip, tree dest, tree src, tree len)
+{
+ gimple *stmt = gsi_stmt (*gsip);
+ if (gimple_has_volatile_ops (stmt)
+ || TREE_THIS_VOLATILE (dest)
+ || TREE_THIS_VOLATILE (src))
+ return;
+
+ tree vuse = gimple_vuse (stmt);
+ if (vuse == NULL)
+ return;
+
+ gimple *defstmt = SSA_NAME_DEF_STMT (vuse);
+ tree src2 = NULL_TREE, len2 = NULL_TREE;
+ HOST_WIDE_INT offset, offset2;
+ tree val = integer_zero_node;
+ if (gimple_store_p (defstmt)
+ && gimple_assign_single_p (defstmt)
+ && TREE_CODE (gimple_assign_rhs1 (defstmt)) == CONSTRUCTOR
+ && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (defstmt)) == 0
+ && !gimple_clobber_p (defstmt))
+ src2 = gimple_assign_lhs (defstmt);
+ else if (gimple_call_builtin_p (defstmt, BUILT_IN_MEMSET)
+ && TREE_CODE (gimple_call_arg (defstmt, 0)) == ADDR_EXPR
+ && TREE_CODE (gimple_call_arg (defstmt, 1)) == INTEGER_CST)
+ {
+ src2 = TREE_OPERAND (gimple_call_arg (defstmt, 0), 0);
+ len2 = gimple_call_arg (defstmt, 2);
+ val = gimple_call_arg (defstmt, 1);
+ if (!integer_zerop (val) && is_gimple_assign (stmt))
+ src2 = NULL_TREE;
+ }
+
+ if (src2 == NULL_TREE)
+ return;
+
+ if (len == NULL_TREE)
+ len = (TREE_CODE (src) == COMPONENT_REF
+ ? DECL_SIZE_UNIT (TREE_OPERAND (src, 1))
+ : TYPE_SIZE_UNIT (TREE_TYPE (src)));
+ if (len2 == NULL_TREE)
+ len2 = (TREE_CODE (src2) == COMPONENT_REF
+ ? DECL_SIZE_UNIT (TREE_OPERAND (src2, 1))
+ : TYPE_SIZE_UNIT (TREE_TYPE (src2)));
+ if (len == NULL_TREE
+ || TREE_CODE (len) != INTEGER_CST
+ || len2 == NULL_TREE
+ || TREE_CODE (len2) != INTEGER_CST)
+ return;
+
+ src = get_addr_base_and_unit_offset (src, &offset);
+ src2 = get_addr_base_and_unit_offset (src2, &offset2);
+ if (src == NULL_TREE
+ || src2 == NULL_TREE
+ || offset < offset2)
+ return;
+
+ if (!operand_equal_p (src, src2, 0))
+ return;
+
+ /* [ src + offset2, src + offset2 + len2 - 1 ] is set to val.
+ Make sure that
+ [ src + offset, src + offset + len - 1 ] is a subset of that. */
+ if (wi::to_widest (len) + (offset - offset2) > wi::to_widest (len2))
+ return;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Simplified\n ");
+ print_gimple_stmt (dump_file, stmt, 0, dump_flags);
+ fprintf (dump_file, "after previous\n ");
+ print_gimple_stmt (dump_file, defstmt, 0, dump_flags);
+ }
+
+ if (is_gimple_assign (stmt))
+ {
+ tree ctor = build_constructor (TREE_TYPE (dest), NULL);
+ gimple_assign_set_rhs_from_tree (gsip, ctor);
+ update_stmt (stmt);
+ }
+ else
+ {
+ gcall *call = as_a <gcall *> (stmt);
+ tree fndecl = builtin_decl_implicit (BUILT_IN_MEMSET);
+ gimple_call_set_fndecl (call, fndecl);
+ gimple_call_set_fntype (call, TREE_TYPE (fndecl));
+ gimple_call_set_arg (call, 1, val);
+ update_stmt (stmt);
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "into\n ");
+ print_gimple_stmt (dump_file, stmt, 0, dump_flags);
+ }
+}
+
/* A simple pass that attempts to fold all builtin functions. This pass
is run after we've propagated as many constants as we can. */
@@ -2999,6 +3107,9 @@ pass_fold_builtins::execute (function *f
continue;
}
}
+ else if (gimple_assign_load_p (stmt) && gimple_store_p (stmt))
+ optimize_memcpy (&i, gimple_assign_lhs (stmt),
+ gimple_assign_rhs1 (stmt), NULL_TREE);
gsi_next (&i);
continue;
}
@@ -3114,14 +3225,25 @@ pass_fold_builtins::execute (function *f
false, false);
break;
+ case BUILT_IN_MEMCPY:
+ if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)
+ && TREE_CODE (gimple_call_arg (stmt, 0)) == ADDR_EXPR
+ && TREE_CODE (gimple_call_arg (stmt, 1)) == ADDR_EXPR
+ && TREE_CODE (gimple_call_arg (stmt, 2)) == INTEGER_CST)
+ {
+ tree dest = TREE_OPERAND (gimple_call_arg (stmt, 0), 0);
+ tree src = TREE_OPERAND (gimple_call_arg (stmt, 1), 0);
+ tree len = gimple_call_arg (stmt, 2);
+ optimize_memcpy (&i, dest, src, len);
+ }
+ break;
+
case BUILT_IN_VA_START:
case BUILT_IN_VA_END:
case BUILT_IN_VA_COPY:
/* These shouldn't be folded before pass_stdarg. */
result = optimize_stdarg_builtin (stmt);
- if (result)
- break;
- /* FALLTHRU */
+ break;
default:;
}
--- gcc/testsuite/gcc.dg/pr78408-1.c.jj 2016-12-15 13:54:08.942121305 +0100
+++ gcc/testsuite/gcc.dg/pr78408-1.c 2016-12-15 15:13:04.900774924 +0100
@@ -0,0 +1,88 @@
+/* PR c/78408 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fab1-details" } */
+/* { dg-final { scan-tree-dump-times "after previous" 17 "fab1" } } */
+
+struct S { char a[32]; };
+struct T { char a[65536]; };
+void bar (int, struct S *, struct S *, struct T *, struct T *);
+void baz (char *, char *);
+
+void
+f1 (void)
+{
+ struct S a, b;
+ struct T c, d;
+ a = b = (struct S) {};
+ c = d = (struct T) {};
+ bar (1, &a, &b, &c, &d);
+}
+
+void
+f2 (void)
+{
+ struct S a, b;
+ struct T c, d;
+ b = (struct S) {};
+ a = b;
+ d = (struct T) {};
+ c = d;
+ bar (2, &a, &b, &c, &d);
+}
+
+void
+f3 (void)
+{
+ struct S a, b;
+ struct T c, d;
+ __builtin_memset (&b, 0, sizeof (b));
+ a = b;
+ __builtin_memset (&d, 0, sizeof (d));
+ c = d;
+ bar (3, &a, &b, &c, &d);
+}
+
+
+void
+f4 (void)
+{
+ struct S a, b;
+ struct T c, d;
+ b = (struct S) {};
+ __builtin_memcpy (&a, &b, sizeof (b));
+ d = (struct T) {};
+ __builtin_memcpy (&c, &d, sizeof (d));
+ bar (4, &a, &b, &c, &d);
+}
+
+void
+f5 (void)
+{
+ struct S a, b;
+ struct T c, d;
+ __builtin_memset (&b, 0, sizeof (b));
+ __builtin_memcpy (&a, &b, sizeof (b));
+ __builtin_memset (&d, 0, sizeof (d));
+ __builtin_memcpy (&c, &d, sizeof (d));
+ bar (5, &a, &b, &c, &d);
+}
+
+void
+f6 (void)
+{
+ struct S a, b, e, g;
+ struct T c, d, f, h;
+ g = e = a = b = (struct S) {};
+ h = f = c = d = (struct T) {};
+ bar (6, &a, &b, &c, &d);
+ bar (6, &e, &g, &f, &h);
+}
+
+void
+f7 (void)
+{
+ char a[64], b[64];
+ __builtin_memset (a + 13, 2, 27);
+ __builtin_memcpy (b + 4, a + 17, 23);
+ baz (a, b);
+}
--- gcc/testsuite/gcc.dg/pr78408-2.c.jj 2016-12-15 15:13:48.060200199 +0100
+++ gcc/testsuite/gcc.dg/pr78408-2.c 2016-12-15 15:15:50.880564683 +0100
@@ -0,0 +1,39 @@
+/* PR c/78408 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fab1-details" } */
+/* { dg-final { scan-tree-dump-not "after previous" "fab1" } } */
+
+struct S { char a[32]; };
+struct T { char a[65536]; };
+void bar (int, struct S *, struct S *, struct T *, struct T *);
+void baz (char *, char *);
+
+void
+f1 (void)
+{
+ struct S a, b;
+ struct T c, d;
+ __builtin_memset (&b, 2, sizeof (b));
+ a = b;
+ __builtin_memset (&d, 3, sizeof (d));
+ c = d;
+ bar (3, &a, &b, &c, &d);
+}
+
+void
+f2 (void)
+{
+ char a[64], b[64];
+ __builtin_memset (a + 13, 2, 27);
+ __builtin_memcpy (b + 4, a + 17, 24);
+ baz (a, b);
+}
+
+void
+f3 (void)
+{
+ char a[64], b[64];
+ __builtin_memset (a + 13, 2, 27);
+ __builtin_memcpy (b + 4, a + 12, 5);
+ baz (a, b);
+}
Jakub