This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Optimize memcpy + memset (PR fortran/45636)


Hi!

This patch optimizes
memcpy (x, "abcd", 4);
memset (x + 4, ' ', 4);
sequences into a single
memcpy (x, "abcd    ", 8);
if it is known (or expected) that the memcpy will be implemented using
store_by_pieces (otherwise we'd risk enlarging .rodata too much, e.g.
if there are only few longer string literals in memcpy originally and
adding too many different memset variants after them would mean the string
literals couldn't be shared anymore).
The patch also handles
memcpy (x, "a", 1);
memset (x + 1, 'b', 11);
where the memcpy has been optimized into a store.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2010-10-11  Jakub Jelinek  <jakub@redhat.com>

	PR fortran/45636
	* tree-ssa-forwprop.c: Include expr.h.
	(constant_pointer_difference, simplify_builtin_call): New functions.
	(tree_ssa_forward_propagate_single_use_vars): Call
	simplify_builtin_call on builtin calls.

	* gcc.c-torture/execute/pr45636.c: New test.
	* gfortran.dg/pr45636.f90: New test.

--- gcc/tree-ssa-forwprop.c.jj	2010-10-07 19:44:48.000000000 +0200
+++ gcc/tree-ssa-forwprop.c	2010-10-11 16:43:03.000000000 +0200
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.  
 #include "langhooks.h"
 #include "flags.h"
 #include "gimple.h"
+#include "expr.h"
 
 /* This pass propagates the RHS of assignment statements into use
    sites of the LHS of the assignment.  It's basically a specialized
@@ -1317,6 +1318,296 @@ simplify_gimple_switch (gimple stmt)
     }
 }
 
+/* For pointers p2 and p1 return p2 - p1 if the
+   difference is known and constant, otherwise return NULL.  */
+
+static tree
+constant_pointer_difference (tree p1, tree p2)
+{
+  int i, j;
+  tree exps[2][5];
+  tree offs[2][5];
+  int cnt[2];
+
+  for (i = 0; i < 2; i++)
+    {
+      tree p = i ? p1 : p2;
+      tree off = size_zero_node;
+      gimple stmt;
+      enum tree_code code;
+
+      j = 0;
+      do
+	{
+	  if (!POINTER_TYPE_P (TREE_TYPE (p)))
+	    break;
+	  if (TREE_CODE (p) == ADDR_EXPR)
+	    {
+	      tree q = TREE_OPERAND (p, 0);
+	      if (handled_component_p (q)
+		  || TREE_CODE (q) == MEM_REF
+		  || TREE_CODE (q) == TARGET_MEM_REF)
+		{
+		  HOST_WIDE_INT offset, size, maxsize;
+		  tree base
+		    = get_ref_base_and_extent (q, &offset,
+					       &size, &maxsize);
+		  if (size == maxsize
+		      && offset >= 0
+		      && (offset % BITS_PER_UNIT) == 0)
+		    {
+		      q = base;
+		      off = size_binop (PLUS_EXPR, off,
+					size_int (offset / BITS_PER_UNIT));
+		    }
+		}
+	      if (TREE_CODE (q) == MEM_REF
+		  && TREE_CODE (TREE_OPERAND (q, 0)) == SSA_NAME)
+		{
+		  p = TREE_OPERAND (q, 0);
+		  off = size_binop (PLUS_EXPR, off,
+				    fold_convert (sizetype,
+						  TREE_OPERAND (q, 1)));
+		}
+	      else
+		{
+		  exps[i][j] = q;
+		  offs[i][j++] = off;
+		  break;
+		}
+	    }
+	  if (TREE_CODE (p) != SSA_NAME)
+	    break;
+	  exps[i][j] = p;
+	  offs[i][j++] = off;
+	  if (j == 5)
+	    break;
+	  stmt = SSA_NAME_DEF_STMT (p);
+	  if (!is_gimple_assign (stmt) || gimple_assign_lhs (stmt) != p)
+	    break;
+	  code = gimple_assign_rhs_code (stmt);
+	  if (code == POINTER_PLUS_EXPR)
+	    {
+	      if (TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST)
+		break;
+	      off = size_binop (PLUS_EXPR, off, gimple_assign_rhs2 (stmt));
+	      p = gimple_assign_rhs1 (stmt);
+	    }
+	  else if (code == ADDR_EXPR || code == NOP_EXPR)
+	    p = gimple_assign_rhs1 (stmt);
+	  else
+	    break;
+	}
+      while (1);
+      cnt[i] = j;
+    }
+
+  for (i = 0; i < cnt[0]; i++)
+    for (j = 0; j < cnt[1]; j++)
+      if (exps[0][i] == exps[1][j])
+	return size_binop (MINUS_EXPR, offs[0][i], offs[1][j]);
+
+  return NULL_TREE;
+}
+
+/* *GSI_P is a GIMPLE_CALL to a builtin function.
+   Optimize
+   memcpy (p, "abcd", 4);
+   memset (p + 4, ' ', 3);
+   into
+   memcpy (p, "abcd   ", 7);
+   call if the latter can be stored by pieces during expansion.  */
+
+static bool
+simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2)
+{
+  gimple stmt1, stmt2 = gsi_stmt (*gsi_p);
+  tree vuse = gimple_vuse (stmt2);
+  if (vuse == NULL)
+    return false;
+  stmt1 = SSA_NAME_DEF_STMT (vuse);
+
+  switch (DECL_FUNCTION_CODE (callee2))
+    {
+    case BUILT_IN_MEMSET:
+      if (gimple_call_num_args (stmt2) != 3
+	  || gimple_call_lhs (stmt2)
+	  || CHAR_BIT != 8
+	  || BITS_PER_UNIT != 8)
+	break;
+      else
+	{
+	  tree callee1;
+	  tree ptr1, src1, str1, off1, len1, lhs1;
+	  tree ptr2 = gimple_call_arg (stmt2, 0);
+	  tree val2 = gimple_call_arg (stmt2, 1);
+	  tree len2 = gimple_call_arg (stmt2, 2);
+	  tree diff, vdef, new_str_cst;
+	  unsigned int ptr1_align;
+	  unsigned HOST_WIDE_INT src_len;
+	  char *src_buf;
+	  imm_use_iterator imm_iter;
+	  use_operand_p use_p;
+	  bool do_fail;
+
+	  if (!host_integerp (val2, 0)
+	      || !host_integerp (len2, 1))
+	    break;
+	  if (is_gimple_call (stmt1))
+	    {
+	      callee1 = gimple_call_fndecl (stmt1);
+	      if (callee1 == NULL_TREE
+		  || DECL_BUILT_IN_CLASS (callee1) != BUILT_IN_NORMAL
+		  || gimple_call_num_args (stmt1) != 3)
+		break;
+	      if (DECL_FUNCTION_CODE (callee1) != BUILT_IN_MEMCPY
+		  && DECL_FUNCTION_CODE (callee1) != BUILT_IN_MEMPCPY)
+		break;
+	      ptr1 = gimple_call_arg (stmt1, 0);
+	      src1 = gimple_call_arg (stmt1, 1);
+	      len1 = gimple_call_arg (stmt1, 2);
+	      lhs1 = gimple_call_lhs (stmt1);
+	      if (!host_integerp (len1, 1))
+		break;
+	      str1 = string_constant (src1, &off1);
+	      if (str1 == NULL_TREE)
+		break;
+	      if (!host_integerp (off1, 1)
+		  || compare_tree_int (off1, TREE_STRING_LENGTH (str1) - 1) > 0
+		  || compare_tree_int (len1, TREE_STRING_LENGTH (str1)
+					     - tree_low_cst (off1, 1)) > 0
+		  || TREE_CODE (TREE_TYPE (str1)) != ARRAY_TYPE
+		  || TYPE_MODE (TREE_TYPE (TREE_TYPE (str1)))
+		     != TYPE_MODE (char_type_node))
+		break;
+	    }
+	  else if (gimple_assign_single_p (stmt1))
+	    {
+    	      ptr1 = gimple_assign_lhs (stmt1);
+	      src1 = gimple_assign_rhs1 (stmt1);
+	      if (TREE_CODE (ptr1) != MEM_REF
+		  || TYPE_MODE (TREE_TYPE (ptr1)) != TYPE_MODE (char_type_node)
+		  || !host_integerp (src1, 0))
+		break;
+	      ptr1 = build_fold_addr_expr (ptr1);
+	      callee1 = NULL_TREE;
+	      len1 = size_one_node;
+	      lhs1 = NULL_TREE;
+	      off1 = size_zero_node;
+	      str1 = NULL_TREE;
+	    }
+	  else
+	    break;
+
+	  diff = constant_pointer_difference (ptr1, ptr2);
+	  if (diff == NULL && lhs1 != NULL)
+	    {
+	      diff = constant_pointer_difference (lhs1, ptr2);
+	      if (DECL_FUNCTION_CODE (callee1) == BUILT_IN_MEMPCPY
+		  && diff != NULL)
+		diff = size_binop (PLUS_EXPR, diff,
+				   fold_convert (sizetype, len1));
+	    }
+	  if (diff == NULL
+	      || !host_integerp (diff, 1)
+	      || tree_int_cst_lt (len1, diff))
+	    break;
+
+	  src_len = tree_low_cst (diff, 1);
+	  src_len += tree_low_cst (len2, 1);
+	  if (src_len < (unsigned HOST_WIDE_INT) tree_low_cst (len1, 1))
+	    src_len = tree_low_cst (len1, 1);
+	  if (src_len > 1024)
+	    break;
+
+	  do_fail = false;
+	  if (lhs1 != NULL
+	      && DECL_FUNCTION_CODE (callee1) == BUILT_IN_MEMPCPY)
+	    {
+	      if (TREE_CODE (lhs1) != SSA_NAME)
+		break;
+	      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs1)
+		{
+		  if (USE_STMT (use_p) != stmt2)
+		    {
+		      do_fail = true;
+		      break;
+		    }
+		}
+	      if (do_fail)
+		break;
+	    }
+	  vdef = gimple_vdef (stmt1);
+	  if (vdef != NULL)
+	    {
+	      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, vdef)
+		{
+		  if (USE_STMT (use_p) != stmt2)
+		    {
+		      do_fail = true;
+		      break;
+		    }
+		}
+	      if (do_fail)
+		break;
+	    }
+
+	  ptr1_align = get_pointer_alignment (ptr1, BIGGEST_ALIGNMENT);
+	  src_buf = XALLOCAVEC (char, src_len + 1);
+	  if (callee1)
+	    memcpy (src_buf,
+		    TREE_STRING_POINTER (str1) + tree_low_cst (off1, 1),
+		    tree_low_cst (len1, 1));
+	  else
+	    src_buf[0] = tree_low_cst (src1, 0);
+	  memset (src_buf + tree_low_cst (diff, 1),
+		  tree_low_cst (val2, 1), tree_low_cst (len2, 1));
+	  src_buf[src_len] = '\0';
+	  if (strlen (src_buf) != src_len)
+	    break;
+	  rtl_profile_for_bb (gimple_bb (stmt2));
+	  if (!can_store_by_pieces (src_len,
+				    builtin_strncpy_read_str,
+				    src_buf, ptr1_align, false))
+	    break;
+
+	  new_str_cst = build_string_literal (src_len, src_buf);
+	  if (callee1)
+	    {
+	      if (DECL_FUNCTION_CODE (callee1) == BUILT_IN_MEMPCPY)
+		gimple_call_set_lhs (stmt1, NULL_TREE);
+	      gimple_call_set_arg (stmt1, 1, new_str_cst);
+	      gimple_call_set_arg (stmt1, 2,
+				   build_int_cst (TREE_TYPE (len1), src_len));
+	      update_stmt (stmt1);
+	      unlink_stmt_vdef (stmt2);
+	      gsi_remove (gsi_p, true);
+	      release_defs (stmt2);
+	      return true;
+	    }
+	  else
+	    {
+	      gimple_stmt_iterator gsi = gsi_for_stmt (stmt1);
+
+	      gimple_call_set_fndecl (stmt2, built_in_decls [BUILT_IN_MEMCPY]);
+	      gimple_call_set_arg (stmt2, 0, ptr1);
+	      gimple_call_set_arg (stmt2, 1, new_str_cst);
+	      gimple_call_set_arg (stmt2, 2,
+				   build_int_cst (TREE_TYPE (len2), src_len));
+	      unlink_stmt_vdef (stmt1);
+	      gsi_remove (&gsi, true);
+	      release_defs (stmt1);
+	      update_stmt (stmt2);
+	      return false;
+	    }
+	}
+      break;
+    default:
+      break;
+    }
+  return false;
+}
+
 /* Run bitwise and assignments throug the folder.  If the first argument is an
    ssa name that is itself a result of a typecast of an ADDR_EXPR to an
    integer, feed the ADDR_EXPR to the folder rather than the ssa name.
@@ -1784,6 +2075,14 @@ tree_ssa_forward_propagate_single_use_va
 					      WARN_STRICT_OVERFLOW_CONDITIONAL);
 	      gsi_next (&gsi);
 	    }
+	  else if (is_gimple_call (stmt))
+	    {
+	      tree callee = gimple_call_fndecl (stmt);
+	      if (callee == NULL_TREE
+		  || DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL
+		  || !simplify_builtin_call (&gsi, callee))
+		gsi_next (&gsi);
+	    }
 	  else
 	    gsi_next (&gsi);
 	}
--- gcc/Makefile.in.jj	2010-10-11 07:51:09.000000000 +0200
+++ gcc/Makefile.in	2010-10-11 09:15:25.000000000 +0200
@@ -2404,7 +2404,7 @@ tree-ssa-dse.o : tree-ssa-dse.c $(CONFIG
 tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) $(DIAGNOSTIC_H) $(TIMEVAR_H) \
-   langhooks.h $(FLAGS_H) $(GIMPLE_H) tree-pretty-print.h
+   langhooks.h $(FLAGS_H) $(GIMPLE_H) tree-pretty-print.h $(EXPR_H)
 tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) $(DIAGNOSTIC_H) $(TIMEVAR_H) \
--- gcc/testsuite/gcc.c-torture/execute/pr45636.c.jj	2010-10-11 09:15:25.000000000 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr45636.c	2010-10-11 09:15:25.000000000 +0200
@@ -0,0 +1,75 @@
+/* PR fortran/45636 */
+
+typedef __SIZE_TYPE__ size_t;
+void *memcpy (void *__restrict__, const void *__restrict__, size_t);
+void *mempcpy (void *__restrict__, const void *__restrict__, size_t);
+void *memset (void *, int, size_t);
+int memcmp (const void *, const void *, size_t);
+extern void abort (void);
+
+struct A { int i; char c[32]; } a[2];
+
+__attribute__((noinline, noclone)) int
+f1 (char *p, int q, int z)
+{
+  memcpy (p, "abcd", 4);
+  if (q)
+    z = z + 123;
+  else
+    z *= 114;
+  memset (p + 4, ' ', 2);
+  return z;
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  char *p = mempcpy (&a[0].c[13], "123456", 4);
+  memset (p, '7', 3);
+}
+
+__attribute__((noinline, noclone)) void
+f3 (struct A *p)
+{
+  p++;
+  char *q = &p->c[10];
+  memcpy (q + 4, "__1234567" + 2, 7);
+  memset (&p->c[21], '9', 3);
+}
+
+__attribute__((noinline, noclone)) void
+f4 (void)
+{
+  memcpy (&a[0].c[10], "0123456789", 10);
+  memset (&a[0].c[13], ' ', 3);
+}
+
+__attribute__((noinline, noclone)) void
+check (const char *p, const char *str, size_t size)
+{
+  const char *q;
+  for (q = (const char *) &a; q < p; q++)
+    if (*q)
+      abort ();
+  if (memcmp (p, str, size) != 0)
+    abort ();
+  for (q = p + size; q < (const char *) (&a[0] + 2); q++)
+    if (*q)
+      abort ();
+  memset (&a, '\0', sizeof a);
+}
+
+int
+main (void)
+{
+  if (f1 (&a[0].c[7], 1, 2) != 125)
+    abort ();
+  check (&a[0].c[7], "abcd  ", 6);
+  f2 ();
+  check (&a[0].c[13], "1234777", 7);
+  f3 (&a[0]);
+  check (&a[1].c[14], "1234567999", 10);
+  f4 ();
+  check (&a[0].c[10], "012   6789", 10);
+  return 0;
+}
--- gcc/testsuite/gfortran.dg/pr45636.f90.jj	2010-10-11 13:19:41.000000000 +0200
+++ gcc/testsuite/gfortran.dg/pr45636.f90	2010-10-11 16:49:19.000000000 +0200
@@ -0,0 +1,14 @@
+! PR fortran/45636
+! { dg-do compile }
+! { dg-options "-O2 -fdump-tree-forwprop2" }
+! PR 45636 - make sure no memset is needed for a short right-hand side.
+program main
+  character(len=2), parameter :: x='a '
+  character(len=1), parameter :: y='b'
+  character(len=4) :: a, b
+  a = x
+  b = y
+  call sub(a, b)
+end program main
+! { dg-final { scan-tree-dump-times "memset" 0 "forwprop2" } }
+! { dg-final { cleanup-tree-dump "forwprop2" } }

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]