This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Optimize memcpy + memset (PR fortran/45636)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Richard Guenther <rguenther at suse dot de>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Mon, 11 Oct 2010 22:46:17 +0200
- Subject: [PATCH] Optimize memcpy + memset (PR fortran/45636)
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
This patch optimizes
memcpy (x, "abcd", 4);
memset (x + 4, ' ', 4);
sequences into a single
memcpy (x, "abcd ", 8);
if it is known (or expected) that the memcpy will be implemented using
store_by_pieces (otherwise we'd risk enlarging .rodata too much, e.g.
if there are only few longer string literals in memcpy originally and
adding too many different memset variants after them would mean the string
literals couldn't be shared anymore).
The patch also handles
memcpy (x, "a", 1);
memset (x + 1, 'b', 11);
where the memcpy has been optimized into a store.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2010-10-11 Jakub Jelinek <jakub@redhat.com>
PR fortran/45636
* tree-ssa-forwprop.c: Include expr.h.
(constant_pointer_difference, simplify_builtin_call): New functions.
(tree_ssa_forward_propagate_single_use_vars): Call
simplify_builtin_call on builtin calls.
* gcc.c-torture/execute/pr45636.c: New test.
* gfortran.dg/pr45636.f90: New test.
--- gcc/tree-ssa-forwprop.c.jj 2010-10-07 19:44:48.000000000 +0200
+++ gcc/tree-ssa-forwprop.c 2010-10-11 16:43:03.000000000 +0200
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.
#include "langhooks.h"
#include "flags.h"
#include "gimple.h"
+#include "expr.h"
/* This pass propagates the RHS of assignment statements into use
sites of the LHS of the assignment. It's basically a specialized
@@ -1317,6 +1318,296 @@ simplify_gimple_switch (gimple stmt)
}
}
+/* For pointers p2 and p1 return p2 - p1 if the
+ difference is known and constant, otherwise return NULL. */
+
+static tree
+constant_pointer_difference (tree p1, tree p2)
+{
+ int i, j;
+ tree exps[2][5];
+ tree offs[2][5];
+ int cnt[2];
+
+ for (i = 0; i < 2; i++)
+ {
+ tree p = i ? p1 : p2;
+ tree off = size_zero_node;
+ gimple stmt;
+ enum tree_code code;
+
+ j = 0;
+ do
+ {
+ if (!POINTER_TYPE_P (TREE_TYPE (p)))
+ break;
+ if (TREE_CODE (p) == ADDR_EXPR)
+ {
+ tree q = TREE_OPERAND (p, 0);
+ if (handled_component_p (q)
+ || TREE_CODE (q) == MEM_REF
+ || TREE_CODE (q) == TARGET_MEM_REF)
+ {
+ HOST_WIDE_INT offset, size, maxsize;
+ tree base
+ = get_ref_base_and_extent (q, &offset,
+ &size, &maxsize);
+ if (size == maxsize
+ && offset >= 0
+ && (offset % BITS_PER_UNIT) == 0)
+ {
+ q = base;
+ off = size_binop (PLUS_EXPR, off,
+ size_int (offset / BITS_PER_UNIT));
+ }
+ }
+ if (TREE_CODE (q) == MEM_REF
+ && TREE_CODE (TREE_OPERAND (q, 0)) == SSA_NAME)
+ {
+ p = TREE_OPERAND (q, 0);
+ off = size_binop (PLUS_EXPR, off,
+ fold_convert (sizetype,
+ TREE_OPERAND (q, 1)));
+ }
+ else
+ {
+ exps[i][j] = q;
+ offs[i][j++] = off;
+ break;
+ }
+ }
+ if (TREE_CODE (p) != SSA_NAME)
+ break;
+ exps[i][j] = p;
+ offs[i][j++] = off;
+ if (j == 5)
+ break;
+ stmt = SSA_NAME_DEF_STMT (p);
+ if (!is_gimple_assign (stmt) || gimple_assign_lhs (stmt) != p)
+ break;
+ code = gimple_assign_rhs_code (stmt);
+ if (code == POINTER_PLUS_EXPR)
+ {
+ if (TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST)
+ break;
+ off = size_binop (PLUS_EXPR, off, gimple_assign_rhs2 (stmt));
+ p = gimple_assign_rhs1 (stmt);
+ }
+ else if (code == ADDR_EXPR || code == NOP_EXPR)
+ p = gimple_assign_rhs1 (stmt);
+ else
+ break;
+ }
+ while (1);
+ cnt[i] = j;
+ }
+
+ for (i = 0; i < cnt[0]; i++)
+ for (j = 0; j < cnt[1]; j++)
+ if (exps[0][i] == exps[1][j])
+ return size_binop (MINUS_EXPR, offs[0][i], offs[1][j]);
+
+ return NULL_TREE;
+}
+
+/* *GSI_P is a GIMPLE_CALL to a builtin function.
+ Optimize
+ memcpy (p, "abcd", 4);
+ memset (p + 4, ' ', 3);
+ into
+ memcpy (p, "abcd ", 7);
+ call if the latter can be stored by pieces during expansion. */
+
+static bool
+simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2)
+{
+ gimple stmt1, stmt2 = gsi_stmt (*gsi_p);
+ tree vuse = gimple_vuse (stmt2);
+ if (vuse == NULL)
+ return false;
+ stmt1 = SSA_NAME_DEF_STMT (vuse);
+
+ switch (DECL_FUNCTION_CODE (callee2))
+ {
+ case BUILT_IN_MEMSET:
+ if (gimple_call_num_args (stmt2) != 3
+ || gimple_call_lhs (stmt2)
+ || CHAR_BIT != 8
+ || BITS_PER_UNIT != 8)
+ break;
+ else
+ {
+ tree callee1;
+ tree ptr1, src1, str1, off1, len1, lhs1;
+ tree ptr2 = gimple_call_arg (stmt2, 0);
+ tree val2 = gimple_call_arg (stmt2, 1);
+ tree len2 = gimple_call_arg (stmt2, 2);
+ tree diff, vdef, new_str_cst;
+ unsigned int ptr1_align;
+ unsigned HOST_WIDE_INT src_len;
+ char *src_buf;
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+ bool do_fail;
+
+ if (!host_integerp (val2, 0)
+ || !host_integerp (len2, 1))
+ break;
+ if (is_gimple_call (stmt1))
+ {
+ callee1 = gimple_call_fndecl (stmt1);
+ if (callee1 == NULL_TREE
+ || DECL_BUILT_IN_CLASS (callee1) != BUILT_IN_NORMAL
+ || gimple_call_num_args (stmt1) != 3)
+ break;
+ if (DECL_FUNCTION_CODE (callee1) != BUILT_IN_MEMCPY
+ && DECL_FUNCTION_CODE (callee1) != BUILT_IN_MEMPCPY)
+ break;
+ ptr1 = gimple_call_arg (stmt1, 0);
+ src1 = gimple_call_arg (stmt1, 1);
+ len1 = gimple_call_arg (stmt1, 2);
+ lhs1 = gimple_call_lhs (stmt1);
+ if (!host_integerp (len1, 1))
+ break;
+ str1 = string_constant (src1, &off1);
+ if (str1 == NULL_TREE)
+ break;
+ if (!host_integerp (off1, 1)
+ || compare_tree_int (off1, TREE_STRING_LENGTH (str1) - 1) > 0
+ || compare_tree_int (len1, TREE_STRING_LENGTH (str1)
+ - tree_low_cst (off1, 1)) > 0
+ || TREE_CODE (TREE_TYPE (str1)) != ARRAY_TYPE
+ || TYPE_MODE (TREE_TYPE (TREE_TYPE (str1)))
+ != TYPE_MODE (char_type_node))
+ break;
+ }
+ else if (gimple_assign_single_p (stmt1))
+ {
+ ptr1 = gimple_assign_lhs (stmt1);
+ src1 = gimple_assign_rhs1 (stmt1);
+ if (TREE_CODE (ptr1) != MEM_REF
+ || TYPE_MODE (TREE_TYPE (ptr1)) != TYPE_MODE (char_type_node)
+ || !host_integerp (src1, 0))
+ break;
+ ptr1 = build_fold_addr_expr (ptr1);
+ callee1 = NULL_TREE;
+ len1 = size_one_node;
+ lhs1 = NULL_TREE;
+ off1 = size_zero_node;
+ str1 = NULL_TREE;
+ }
+ else
+ break;
+
+ diff = constant_pointer_difference (ptr1, ptr2);
+ if (diff == NULL && lhs1 != NULL)
+ {
+ diff = constant_pointer_difference (lhs1, ptr2);
+ if (DECL_FUNCTION_CODE (callee1) == BUILT_IN_MEMPCPY
+ && diff != NULL)
+ diff = size_binop (PLUS_EXPR, diff,
+ fold_convert (sizetype, len1));
+ }
+ if (diff == NULL
+ || !host_integerp (diff, 1)
+ || tree_int_cst_lt (len1, diff))
+ break;
+
+ src_len = tree_low_cst (diff, 1);
+ src_len += tree_low_cst (len2, 1);
+ if (src_len < (unsigned HOST_WIDE_INT) tree_low_cst (len1, 1))
+ src_len = tree_low_cst (len1, 1);
+ if (src_len > 1024)
+ break;
+
+ do_fail = false;
+ if (lhs1 != NULL
+ && DECL_FUNCTION_CODE (callee1) == BUILT_IN_MEMPCPY)
+ {
+ if (TREE_CODE (lhs1) != SSA_NAME)
+ break;
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs1)
+ {
+ if (USE_STMT (use_p) != stmt2)
+ {
+ do_fail = true;
+ break;
+ }
+ }
+ if (do_fail)
+ break;
+ }
+ vdef = gimple_vdef (stmt1);
+ if (vdef != NULL)
+ {
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, vdef)
+ {
+ if (USE_STMT (use_p) != stmt2)
+ {
+ do_fail = true;
+ break;
+ }
+ }
+ if (do_fail)
+ break;
+ }
+
+ ptr1_align = get_pointer_alignment (ptr1, BIGGEST_ALIGNMENT);
+ src_buf = XALLOCAVEC (char, src_len + 1);
+ if (callee1)
+ memcpy (src_buf,
+ TREE_STRING_POINTER (str1) + tree_low_cst (off1, 1),
+ tree_low_cst (len1, 1));
+ else
+ src_buf[0] = tree_low_cst (src1, 0);
+ memset (src_buf + tree_low_cst (diff, 1),
+ tree_low_cst (val2, 1), tree_low_cst (len2, 1));
+ src_buf[src_len] = '\0';
+ if (strlen (src_buf) != src_len)
+ break;
+ rtl_profile_for_bb (gimple_bb (stmt2));
+ if (!can_store_by_pieces (src_len,
+ builtin_strncpy_read_str,
+ src_buf, ptr1_align, false))
+ break;
+
+ new_str_cst = build_string_literal (src_len, src_buf);
+ if (callee1)
+ {
+ if (DECL_FUNCTION_CODE (callee1) == BUILT_IN_MEMPCPY)
+ gimple_call_set_lhs (stmt1, NULL_TREE);
+ gimple_call_set_arg (stmt1, 1, new_str_cst);
+ gimple_call_set_arg (stmt1, 2,
+ build_int_cst (TREE_TYPE (len1), src_len));
+ update_stmt (stmt1);
+ unlink_stmt_vdef (stmt2);
+ gsi_remove (gsi_p, true);
+ release_defs (stmt2);
+ return true;
+ }
+ else
+ {
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt1);
+
+ gimple_call_set_fndecl (stmt2, built_in_decls [BUILT_IN_MEMCPY]);
+ gimple_call_set_arg (stmt2, 0, ptr1);
+ gimple_call_set_arg (stmt2, 1, new_str_cst);
+ gimple_call_set_arg (stmt2, 2,
+ build_int_cst (TREE_TYPE (len2), src_len));
+ unlink_stmt_vdef (stmt1);
+ gsi_remove (&gsi, true);
+ release_defs (stmt1);
+ update_stmt (stmt2);
+ return false;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ return false;
+}
+
/* Run bitwise and assignments throug the folder. If the first argument is an
ssa name that is itself a result of a typecast of an ADDR_EXPR to an
integer, feed the ADDR_EXPR to the folder rather than the ssa name.
@@ -1784,6 +2075,14 @@ tree_ssa_forward_propagate_single_use_va
WARN_STRICT_OVERFLOW_CONDITIONAL);
gsi_next (&gsi);
}
+ else if (is_gimple_call (stmt))
+ {
+ tree callee = gimple_call_fndecl (stmt);
+ if (callee == NULL_TREE
+ || DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL
+ || !simplify_builtin_call (&gsi, callee))
+ gsi_next (&gsi);
+ }
else
gsi_next (&gsi);
}
--- gcc/Makefile.in.jj 2010-10-11 07:51:09.000000000 +0200
+++ gcc/Makefile.in 2010-10-11 09:15:25.000000000 +0200
@@ -2404,7 +2404,7 @@ tree-ssa-dse.o : tree-ssa-dse.c $(CONFIG
tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
$(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) $(DIAGNOSTIC_H) $(TIMEVAR_H) \
- langhooks.h $(FLAGS_H) $(GIMPLE_H) tree-pretty-print.h
+ langhooks.h $(FLAGS_H) $(GIMPLE_H) tree-pretty-print.h $(EXPR_H)
tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
$(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) $(DIAGNOSTIC_H) $(TIMEVAR_H) \
--- gcc/testsuite/gcc.c-torture/execute/pr45636.c.jj 2010-10-11 09:15:25.000000000 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr45636.c 2010-10-11 09:15:25.000000000 +0200
@@ -0,0 +1,75 @@
+/* PR fortran/45636 */
+
+typedef __SIZE_TYPE__ size_t;
+void *memcpy (void *__restrict__, const void *__restrict__, size_t);
+void *mempcpy (void *__restrict__, const void *__restrict__, size_t);
+void *memset (void *, int, size_t);
+int memcmp (const void *, const void *, size_t);
+extern void abort (void);
+
+struct A { int i; char c[32]; } a[2];
+
+__attribute__((noinline, noclone)) int
+f1 (char *p, int q, int z)
+{
+ memcpy (p, "abcd", 4);
+ if (q)
+ z = z + 123;
+ else
+ z *= 114;
+ memset (p + 4, ' ', 2);
+ return z;
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+ char *p = mempcpy (&a[0].c[13], "123456", 4);
+ memset (p, '7', 3);
+}
+
+__attribute__((noinline, noclone)) void
+f3 (struct A *p)
+{
+ p++;
+ char *q = &p->c[10];
+ memcpy (q + 4, "__1234567" + 2, 7);
+ memset (&p->c[21], '9', 3);
+}
+
+__attribute__((noinline, noclone)) void
+f4 (void)
+{
+ memcpy (&a[0].c[10], "0123456789", 10);
+ memset (&a[0].c[13], ' ', 3);
+}
+
+__attribute__((noinline, noclone)) void
+check (const char *p, const char *str, size_t size)
+{
+ const char *q;
+ for (q = (const char *) &a; q < p; q++)
+ if (*q)
+ abort ();
+ if (memcmp (p, str, size) != 0)
+ abort ();
+ for (q = p + size; q < (const char *) (&a[0] + 2); q++)
+ if (*q)
+ abort ();
+ memset (&a, '\0', sizeof a);
+}
+
+int
+main (void)
+{
+ if (f1 (&a[0].c[7], 1, 2) != 125)
+ abort ();
+ check (&a[0].c[7], "abcd ", 6);
+ f2 ();
+ check (&a[0].c[13], "1234777", 7);
+ f3 (&a[0]);
+ check (&a[1].c[14], "1234567999", 10);
+ f4 ();
+ check (&a[0].c[10], "012 6789", 10);
+ return 0;
+}
--- gcc/testsuite/gfortran.dg/pr45636.f90.jj 2010-10-11 13:19:41.000000000 +0200
+++ gcc/testsuite/gfortran.dg/pr45636.f90 2010-10-11 16:49:19.000000000 +0200
@@ -0,0 +1,14 @@
+! PR fortran/45636
+! { dg-do compile }
+! { dg-options "-O2 -fdump-tree-forwprop2" }
+! PR 45636 - make sure no memset is needed for a short right-hand side.
+program main
+ character(len=2), parameter :: x='a '
+ character(len=1), parameter :: y='b'
+ character(len=4) :: a, b
+ a = x
+ b = y
+ call sub(a, b)
+end program main
+! { dg-final { scan-tree-dump-times "memset" 0 "forwprop2" } }
+! { dg-final { cleanup-tree-dump "forwprop2" } }
Jakub