[PATCH] More aggressive __builtin_memcpy optimizations (PR middle-end/29215)

Jakub Jelinek jakub@redhat.com
Tue Nov 18 15:22:00 GMT 2008


On Fri, Nov 14, 2008 at 08:49:03PM -0600, Richard Guenther wrote:
> On Fri, Nov 14, 2008 at 6:42 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> > @@ -8818,44 +8824,79 @@ fold_builtin_memory_op (tree dest, tree
> >          || !TYPE_SIZE_UNIT (srctype)
> >          || !TYPE_SIZE_UNIT (desttype)
> >          || TREE_CODE (TYPE_SIZE_UNIT (srctype)) != INTEGER_CST
> > -         || TREE_CODE (TYPE_SIZE_UNIT (desttype)) != INTEGER_CST
> > -         || !tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len)
> > -         || !tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len))
> 
> You should re-add these checks to the case we create a VIEW_CONVERT_EXPR
> (the integral/pointer type case looks wrong btw, just unconditionally creating
> the V_C_E and relying on its folding would be better).  V_C_Es should have
> matched sizes of the source and target types.

Those checks weren't removed, only moved later.  Instead of failing right
away, the code now does:
	srcvar = NULL_TREE;
	if (tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len))
	  {
	    srcvar = build_fold_indirect_ref (src);
	    if (not_appropriate_srcvar)
	      srcvar = NULL_TREE;
	  }
and similarly for destvar.  If both srcvar and destvar are NULL, it
won't optimize anything, if both are non-NULL, then there is no change
from what current SVN already handles.  The new stuff is only to handle
the case where srcvar is NULL and destvar non-NULL (optimized as
a load through TYPE_REF_CAN_ALIAS_ALL pointer) and srcvar non-NULL and destvar
NULL (store through TYPE_REF_CAN_ALIAS_ALL pointer).

Whether VCE creation is bypassed in fold_builtin_memory_op or whether
we always fold VCE (I bet for non-equal, but still
useless_type_conversion_p, conversions it would make a difference, otherwise
not) is unrelated to this patch; can be changed separately if you want.

> > +         || TREE_CODE (TYPE_SIZE_UNIT (desttype)) != INTEGER_CST)
> >        return NULL_TREE;
> >
> > -      if (get_pointer_alignment (dest, BIGGEST_ALIGNMENT)
> > -         < (int) TYPE_ALIGN (desttype)
> > -         || (get_pointer_alignment (src, BIGGEST_ALIGNMENT)
> > -             < (int) TYPE_ALIGN (srctype)))
> > +      src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
> > +      dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
> > +      if (dest_align < (int) TYPE_ALIGN (desttype)
> > +         || src_align < (int) TYPE_ALIGN (srctype))
> >        return NULL_TREE;
> 
> If either dest or src are not decls the dest or srctypes as available from
> above do not have any meaning.  So if you later do a store via *(T *)p
> it should check the dest_align against TYPE_ALIGN of srctype or mark
> the pointed-to type packed.  You probably can create a testcase for
> strict-alignment targets that would break.

See the following updated patch (untested yet), which checks the alignment first
and if not sufficiently aligned, for aggregates bails out, or for
SLOW_UNALIGNED_ACCESS, otherwise uses packed type.

> Btw, this is a quite tricky area and I am somewhat concerned about
> putting this into 4.4 - just to make you think a second time here ;)

I think it is not that much tricky, it is an important regression
and IMHO we still have time to get it sufficiently tested.  If it causes
regressions we can back it out.

If the patch is acked, I'd bootstrap/regtest it also on powerpc*-linux and
ia64-linux to get wider testing.

To answer Laurent's question, I've regtested the earlier version
of the patch on x86_64-linux with all languages, including ada and objc++.

2008-11-18  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/29215
	* builtins.c (SLOW_UNALIGNED_ACCESS): Define if not defined.
	(fold_builtin_memory_op): Handle even the case where just one
	of src and dest is an address of a var decl component, using
	TYPE_REF_CAN_ALIAS_ALL pointers.
	* tree-ssa-sccvn.c (DFS): Use clear_and_done_ssa_iter to shut
	up warnings.

	* gcc.dg/pr29215.c: New test.

--- gcc/builtins.c.jj	2008-11-17 20:25:12.000000000 +0100
+++ gcc/builtins.c	2008-11-18 14:30:59.000000000 +0100
@@ -51,6 +51,10 @@ along with GCC; see the file COPYING3.  
 #include "value-prof.h"
 #include "diagnostic.h"
 
+#ifndef SLOW_UNALIGNED_ACCESS
+#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
+#endif
+
 #ifndef PAD_VARARGS_DOWN
 #define PAD_VARARGS_DOWN BYTES_BIG_ENDIAN
 #endif
@@ -8824,10 +8828,12 @@ fold_builtin_memory_op (tree dest, tree 
   else
     {
       tree srctype, desttype;
+      int src_align, dest_align;
+
       if (endp == 3)
 	{
-          int src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
-          int dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
+	  src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
+	  dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
 
 	  /* Both DEST and SRC must be pointer types. 
 	     ??? This is what old code did.  Is the testing for pointer types
@@ -8862,45 +8868,100 @@ fold_builtin_memory_op (tree dest, tree 
 	  || !TYPE_SIZE_UNIT (srctype)
 	  || !TYPE_SIZE_UNIT (desttype)
 	  || TREE_CODE (TYPE_SIZE_UNIT (srctype)) != INTEGER_CST
-	  || TREE_CODE (TYPE_SIZE_UNIT (desttype)) != INTEGER_CST
-	  || !tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len)
-	  || !tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len))
+	  || TREE_CODE (TYPE_SIZE_UNIT (desttype)) != INTEGER_CST)
 	return NULL_TREE;
 
-      if (get_pointer_alignment (dest, BIGGEST_ALIGNMENT) 
-	  < (int) TYPE_ALIGN (desttype)
-	  || (get_pointer_alignment (src, BIGGEST_ALIGNMENT) 
-	      < (int) TYPE_ALIGN (srctype)))
+      src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
+      dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
+      if (dest_align < (int) TYPE_ALIGN (desttype)
+	  || src_align < (int) TYPE_ALIGN (srctype))
 	return NULL_TREE;
 
       if (!ignore)
         dest = builtin_save_expr (dest);
 
-      srcvar = build_fold_indirect_ref (src);
-      if (TREE_THIS_VOLATILE (srcvar))
-	return NULL_TREE;
-      if (!tree_int_cst_equal (lang_hooks.expr_size (srcvar), len))
-	return NULL_TREE;
-      /* With memcpy, it is possible to bypass aliasing rules, so without
-         this check i.e. execute/20060930-2.c would be misoptimized, because
-	 it use conflicting alias set to hold argument for the memcpy call.
-	 This check is probably unnecessary with -fno-strict-aliasing.
-	 Similarly for destvar.  See also PR29286.  */
-      if (!var_decl_component_p (srcvar)
-	  /* Accept: memcpy (*char_var, "test", 1); that simplify
-	     to char_var='t';  */
-	  || is_gimple_min_invariant (srcvar)
-	  || readonly_data_expr (src))
-	return NULL_TREE;
+      srcvar = NULL_TREE;
+      if (tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len))
+	{
+	  srcvar = build_fold_indirect_ref (src);
+	  if (TREE_THIS_VOLATILE (srcvar))
+	    srcvar = NULL_TREE;
+	  else if (!tree_int_cst_equal (lang_hooks.expr_size (srcvar), len))
+	    srcvar = NULL_TREE;
+	  /* With memcpy, it is possible to bypass aliasing rules, so without
+	     this check i.e. execute/20060930-2.c would be misoptimized,
+	     because it use conflicting alias set to hold argument for the
+	     memcpy call.  This check is probably unnecessary with
+	     -fno-strict-aliasing.  Similarly for destvar.  See also
+	     PR29286.  */
+	  else if (!var_decl_component_p (srcvar)
+		   /* Accept: memcpy (*char_var, "test", 1); that simplify
+		      to char_var='t';  */
+		   || is_gimple_min_invariant (srcvar)
+		   || readonly_data_expr (src))
+	    srcvar = NULL_TREE;
+	}
+
+      destvar = NULL_TREE;
+      if (tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len))
+	{
+	  destvar = build_fold_indirect_ref (dest);
+	  if (TREE_THIS_VOLATILE (destvar))
+	    destvar = NULL_TREE;
+	  else if (!tree_int_cst_equal (lang_hooks.expr_size (destvar), len))
+	    destvar = NULL_TREE;
+	  else if (!var_decl_component_p (destvar))
+	    destvar = NULL_TREE;
+	}
 
-      destvar = build_fold_indirect_ref (dest);
-      if (TREE_THIS_VOLATILE (destvar))
-	return NULL_TREE;
-      if (!tree_int_cst_equal (lang_hooks.expr_size (destvar), len))
-	return NULL_TREE;
-      if (!var_decl_component_p (destvar))
+      if (srcvar == NULL_TREE && destvar == NULL_TREE)
 	return NULL_TREE;
 
+      if (srcvar == NULL_TREE)
+	{
+	  tree srcptype;
+	  if (TREE_ADDRESSABLE (TREE_TYPE (destvar)))
+	    return NULL_TREE;
+
+	  srctype = desttype;
+	  if (src_align < (int) TYPE_ALIGN (srctype))
+	    {
+	      if (AGGREGATE_TYPE_P (srctype)
+		  || SLOW_UNALIGNED_ACCESS (TYPE_MODE (srctype), src_align))
+		return NULL_TREE;
+
+	      srctype = build_variant_type_copy (srctype);
+	      TYPE_ALIGN (srctype) = src_align;
+	      TYPE_USER_ALIGN (srctype) = 1;
+	      TYPE_PACKED (srctype) = 1;
+	    }
+	  srcptype = build_pointer_type_for_mode (srctype, ptr_mode, true);
+	  src = fold_convert (srcptype, src);
+	  srcvar = build_fold_indirect_ref (src);
+	}
+      else if (destvar == NULL_TREE)
+	{
+	  tree destptype;
+	  if (TREE_ADDRESSABLE (TREE_TYPE (srcvar)))
+	    return NULL_TREE;
+
+	  desttype = srctype;
+	  if (dest_align < (int) TYPE_ALIGN (desttype))
+	    {
+	      if (AGGREGATE_TYPE_P (desttype)
+		  || SLOW_UNALIGNED_ACCESS (TYPE_MODE (desttype), dest_align))
+		return NULL_TREE;
+
+	      desttype = build_variant_type_copy (desttype);
+	      TYPE_ALIGN (desttype) = dest_align;
+	      TYPE_USER_ALIGN (desttype) = 1;
+	      TYPE_PACKED (desttype) = 1;
+	    }
+	  destptype = build_pointer_type_for_mode (desttype, ptr_mode, true);
+	  dest = fold_convert (destptype, dest);
+	  destvar = build_fold_indirect_ref (dest);
+	}
+
       if (srctype == desttype
 	  || (gimple_in_ssa_p (cfun)
 	      && useless_type_conversion_p (desttype, srctype)))
--- gcc/tree-ssa-sccvn.c.jj	2008-11-14 16:14:44.000000000 +0100
+++ gcc/tree-ssa-sccvn.c	2008-11-18 14:14:37.000000000 +0100
@@ -2654,7 +2654,7 @@ start_over:
 	usep = op_iter_init_use (&iter, defstmt, SSA_OP_ALL_USES);
     }
   else
-    iter.done = true;
+    clear_and_done_ssa_iter (&iter);
 
   while (1)
     {
--- gcc/testsuite/gcc.dg/pr29215.c.jj	2008-11-18 14:14:37.000000000 +0100
+++ gcc/testsuite/gcc.dg/pr29215.c	2008-11-18 14:14:37.000000000 +0100
@@ -0,0 +1,38 @@
+/* PR middle-end/29215 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-gimple" } */
+
+char buf[5 * sizeof (int) + 1];
+
+static void
+foo (char *buffer, int arg1, int arg2, int arg3, int arg4, int arg5)
+{
+  __builtin_memcpy (buffer, &arg1, sizeof (int));
+  buffer += sizeof (int);
+  __builtin_memcpy (buffer, &arg2, sizeof (int));
+  buffer += sizeof (int);
+  __builtin_memcpy (buffer, &arg3, sizeof (int));
+  buffer += sizeof (int);
+  __builtin_memcpy (buffer, &arg4, sizeof (int));
+  buffer += sizeof (int);
+  __builtin_memcpy (buffer, &arg5, sizeof (int));
+  buffer += sizeof (int);
+}
+
+int
+main (void)
+{
+  union { char buf[4]; int i; } u;
+  u.i = 0;
+  u.buf[0] = 'a';
+  u.buf[1] = 'b';
+  u.buf[2] = 'c';
+  u.buf[3] = 'd';
+  foo (buf, u.i, u.i, u.i, u.i, u.i);
+  buf[5 * sizeof (int)] = '\0';
+  __builtin_puts (buf);
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "memcpy" "gimple" } } */
+/* { dg-final { cleanup-tree-dump "gimple" } } */


	Jakub



More information about the Gcc-patches mailing list