This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] More aggressive __builtin_memcpy optimizations (PR middle-end/29215)


Hi!

Until now memcpy folding folded just the case of both src and dst
being pointers to VAR_DECLs or their components with both the same size
as length and a bunch of other restrictions.

The following patch will optimize even when only one of src and dst
have such a property, the store is then done using a TYPE_REF_CAN_ALIAS_REF
pointer.  When this fold_builtin_memory_op optimization has been written,
TYPE_REF_CAN_ALIAS_REF could be lost in many places during optimizations,
but things have improved a lot since then.

Alternative to this (or maybe desirable anyway) is readdition of ADDRESSOF
or something similar to it to get rid of unneeded stores to stack variables,
many builtins convert something that forces TREE_ADDRESSABLE to something
that doesn't need to necessarily live in memory.  DSE ATM isn't able to
eliminate the dead stores, and even if it would, we'd allocate space in the
stack frame unnecessarily.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2008-11-14  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/29215
	* builtins.c (SLOW_UNALIGNED_ACCESS): Define if not defined.
	(fold_builtin_memory_op): Handle even the case where just one
	of src and dest is an address of a var decl component, using
	TYPE_REF_CAN_ALIAS_REF pointers.

	* gcc.dg/pr29215.c: New test.

--- gcc/builtins.c.jj	2008-11-10 09:59:14.000000000 +0100
+++ gcc/builtins.c	2008-11-14 11:46:15.000000000 +0100
@@ -51,6 +51,10 @@ along with GCC; see the file COPYING3.  
 #include "value-prof.h"
 #include "diagnostic.h"
 
+#ifndef SLOW_UNALIGNED_ACCESS
+#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
+#endif
+
 #ifndef PAD_VARARGS_DOWN
 #define PAD_VARARGS_DOWN BYTES_BIG_ENDIAN
 #endif
@@ -8780,10 +8784,12 @@ fold_builtin_memory_op (tree dest, tree 
   else
     {
       tree srctype, desttype;
+      int src_align, dest_align;
+
       if (endp == 3)
 	{
-          int src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
-          int dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
+	  src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
+	  dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
 
 	  /* Both DEST and SRC must be pointer types. 
 	     ??? This is what old code did.  Is the testing for pointer types
@@ -8818,44 +8824,79 @@ fold_builtin_memory_op (tree dest, tree 
 	  || !TYPE_SIZE_UNIT (srctype)
 	  || !TYPE_SIZE_UNIT (desttype)
 	  || TREE_CODE (TYPE_SIZE_UNIT (srctype)) != INTEGER_CST
-	  || TREE_CODE (TYPE_SIZE_UNIT (desttype)) != INTEGER_CST
-	  || !tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len)
-	  || !tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len))
+	  || TREE_CODE (TYPE_SIZE_UNIT (desttype)) != INTEGER_CST)
 	return NULL_TREE;
 
-      if (get_pointer_alignment (dest, BIGGEST_ALIGNMENT) 
-	  < (int) TYPE_ALIGN (desttype)
-	  || (get_pointer_alignment (src, BIGGEST_ALIGNMENT) 
-	      < (int) TYPE_ALIGN (srctype)))
+      src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
+      dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
+      if (dest_align < (int) TYPE_ALIGN (desttype)
+	  || src_align < (int) TYPE_ALIGN (srctype))
 	return NULL_TREE;
 
       if (!ignore)
         dest = builtin_save_expr (dest);
 
-      srcvar = build_fold_indirect_ref (src);
-      if (TREE_THIS_VOLATILE (srcvar))
-	return NULL_TREE;
-      if (!tree_int_cst_equal (lang_hooks.expr_size (srcvar), len))
-	return NULL_TREE;
-      /* With memcpy, it is possible to bypass aliasing rules, so without
-         this check i.e. execute/20060930-2.c would be misoptimized, because
-	 it use conflicting alias set to hold argument for the memcpy call.
-	 This check is probably unnecessary with -fno-strict-aliasing.
-	 Similarly for destvar.  See also PR29286.  */
-      if (!var_decl_component_p (srcvar)
-	  /* Accept: memcpy (*char_var, "test", 1); that simplify
-	     to char_var='t';  */
-	  || is_gimple_min_invariant (srcvar)
-	  || readonly_data_expr (src))
-	return NULL_TREE;
+      srcvar = NULL_TREE;
+      if (tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len))
+	{
+	  srcvar = build_fold_indirect_ref (src);
+	  if (TREE_THIS_VOLATILE (srcvar))
+	    srcvar = NULL_TREE;
+	  else if (!tree_int_cst_equal (lang_hooks.expr_size (srcvar), len))
+	    srcvar = NULL_TREE;
+	  /* With memcpy, it is possible to bypass aliasing rules, so without
+	     this check i.e. execute/20060930-2.c would be misoptimized,
+	     because it use conflicting alias set to hold argument for the
+	     memcpy call.  This check is probably unnecessary with
+	     -fno-strict-aliasing.  Similarly for destvar.  See also
+	     PR29286.  */
+	  else if (!var_decl_component_p (srcvar)
+		   /* Accept: memcpy (*char_var, "test", 1); that simplify
+		      to char_var='t';  */
+		   || is_gimple_min_invariant (srcvar)
+		   || readonly_data_expr (src))
+	    srcvar = NULL_TREE;
+	}
+
+      destvar = NULL_TREE;
+      if (tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len))
+	{
+	  destvar = build_fold_indirect_ref (dest);
+	  if (TREE_THIS_VOLATILE (destvar))
+	    destvar = NULL_TREE;
+	  else if (!tree_int_cst_equal (lang_hooks.expr_size (destvar), len))
+	    destvar = NULL_TREE;
+	  else if (!var_decl_component_p (destvar))
+	    destvar = NULL_TREE;
+	}
+
+      if (srcvar == NULL_TREE && destvar == NULL_TREE)
+	return NULL_TREE;
+
+      if (srcvar == NULL_TREE)
+	{
+	  tree srcptype;
+	  if (TREE_ADDRESSABLE (TREE_TYPE (destvar))
+	      || SLOW_UNALIGNED_ACCESS (TYPE_MODE (destvar), src_align))
+	    return NULL_TREE;
 
-      destvar = build_fold_indirect_ref (dest);
-      if (TREE_THIS_VOLATILE (destvar))
-	return NULL_TREE;
-      if (!tree_int_cst_equal (lang_hooks.expr_size (destvar), len))
-	return NULL_TREE;
-      if (!var_decl_component_p (destvar))
-	return NULL_TREE;
+	  srctype = desttype;
+	  srcptype = build_pointer_type_for_mode (srctype, ptr_mode, true);
+	  src = fold_convert (srcptype, src);
+	  srcvar = build_fold_indirect_ref (src);
+	}
+      else if (destvar == NULL_TREE)
+	{
+	  tree destptype;
+	  if (TREE_ADDRESSABLE (TREE_TYPE (srcvar))
+	      || SLOW_UNALIGNED_ACCESS (TYPE_MODE (srcvar), dest_align))
+	    return NULL_TREE;
+
+	  desttype = srctype;
+	  destptype = build_pointer_type_for_mode (desttype, ptr_mode, true);
+	  dest = fold_convert (destptype, dest);
+	  destvar = build_fold_indirect_ref (dest);
+	}
 
       if (srctype == desttype
 	  || (gimple_in_ssa_p (cfun)
--- gcc/testsuite/gcc.dg/pr29215.c.jj	2008-11-14 11:32:15.000000000 +0100
+++ gcc/testsuite/gcc.dg/pr29215.c	2008-11-14 11:32:15.000000000 +0100
@@ -0,0 +1,38 @@
+/* PR middle-end/29215 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-gimple" } */
+
+char buf[5 * sizeof (int) + 1];
+
+static void
+foo (char *buffer, int arg1, int arg2, int arg3, int arg4, int arg5)
+{
+  __builtin_memcpy (buffer, &arg1, sizeof (int));
+  buffer += sizeof (int);
+  __builtin_memcpy (buffer, &arg2, sizeof (int));
+  buffer += sizeof (int);
+  __builtin_memcpy (buffer, &arg3, sizeof (int));
+  buffer += sizeof (int);
+  __builtin_memcpy (buffer, &arg4, sizeof (int));
+  buffer += sizeof (int);
+  __builtin_memcpy (buffer, &arg5, sizeof (int));
+  buffer += sizeof (int);
+}
+
+int
+main (void)
+{
+  union { char buf[4]; int i; } u;
+  u.i = 0;
+  u.buf[0] = 'a';
+  u.buf[1] = 'b';
+  u.buf[2] = 'c';
+  u.buf[3] = 'd';
+  foo (buf, u.i, u.i, u.i, u.i, u.i);
+  buf[5 * sizeof (int)] = '\0';
+  __builtin_puts (buf);
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "memcpy" "gimple" } } */
+/* { dg-final { cleanup-tree-dump "gimple" } } */

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]