This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] More aggressive __builtin_memcpy optimizations (PR middle-end/29215)


On Wed, Nov 19, 2008 at 06:23:28PM +0100, Richard Guenther wrote:
> The patch is ok.

Before checking in, I've bootstrapped/regtested it on ia64-linux
in addition to the usual x86_64-linux bootstrap/regtest.

Unfortunately this revealed a few issues:

1) on ia64 allocatable_function_3.f90 ICEd at -O1 and higher, because
   memcpy from a TREE_STATIC readonly VAR_DECL with a CONSTRUCTOR
   initializer was optimized into a store, but TREE_PURPOSE in the
   ctor elements weren't filled up, which upsets e.g.
   gimplify_init_ctor_eval or categorize_ctor_elements_1.
   When this wasn't optimized, Fortran FE managed to survive,
   because none of these routines were called.  Either we could teach
   these what to do if purpose isn't present, or, as done in the patch
   below, fill in TREE_PURPOSE like the C/C++ FEs do.
2) when looking why that test didn't fail on x86_64-linux as well,
   I found out the
      if (!var_decl_component_p (srcvar)
          /* Accept: memcpy (*char_var, "test", 1); that simplify
             to char_var='t';  */
          || is_gimple_min_invariant (srcvar)
          || readonly_data_expr (src))
        return NULL_TREE;
   lines (in current SVN, my previous patch just moved these around).
   On ia64 src was actually emitted into .sdata section, even when
   read-only and thus memcpy was optimized, on x86_64 where src
   lived in .rodata it wasn't.  This logic seems to be very much
   broken, I mean if something is var_decl_component_p, then it
   won't be gimple invariant, and whether it is in readonly data
   doesn't matter too.  return NULL_TREE here means "don't optimize".
   I've tried changing this into !var_decl_component_p () &&
   !is_gimple_min_invariant () && !readonly_data_expr (),
   but that created invalid GIMPLE, assigning STRING_CST into something.
   So the patch below just removes those, optimization of the
   memcpy (*char_var, "test", 1) can be looked at later, but
   would certainly need extra code.
3) the new testcase failed on ia64, because memcpy wasn't optimized there.
   memcpy is optimized only if the other pointer is known to be sufficiently
   aligned (in the testcase it wasn't) or if unaligned access is allowed
   and not slow (true on x86_64, false on ia64).  I've adjusted
   the testcase so that gcc knows the buffer is aligned.
4) array_memcpy_3.f90 test failed to match on x86_64-linux after the
   2) changes, memcpy is now optimized into a store, the pattern has
   been adjusted not to care if memcpy has been optimized or not.

Ok for trunk (together with the earlier patch)?

2008-11-20  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/29215
	* builtins.c (fold_builtin_memory_op): Remove
	is_gimple_min_invariant and readonly_data_expr src check.

	* trans-array.c (trans_array_constructor_value,
	gfc_build_constant_array_constructor): Fill in TREE_PURPOSE.

	* gfortran.dg/array_memcpy_3.f90: Adjust pattern to match even
	memcpy optimized into ref-all store.
	* gcc.dg/pr29215.c: Adjust so that all stores are known to be
	aligned.

--- gcc/builtins.c.jj	2008-11-20 14:16:50.000000000 +0100
+++ gcc/builtins.c	2008-11-20 14:25:38.000000000 +0100
@@ -8894,11 +8894,7 @@ fold_builtin_memory_op (tree dest, tree 
 	     memcpy call.  This check is probably unnecessary with
 	     -fno-strict-aliasing.  Similarly for destvar.  See also
 	     PR29286.  */
-	  else if (!var_decl_component_p (srcvar)
-		   /* Accept: memcpy (*char_var, "test", 1); that simplify
-		      to char_var='t';  */
-		   || is_gimple_min_invariant (srcvar)
-		   || readonly_data_expr (src))
+	  else if (!var_decl_component_p (srcvar))
 	    srcvar = NULL_TREE;
 	}
 
--- gcc/fortran/trans-array.c.jj	2008-11-17 08:37:27.000000000 +0100
+++ gcc/fortran/trans-array.c	2008-11-20 14:16:58.000000000 +0100
@@ -1235,6 +1235,7 @@ gfc_trans_array_constructor_value (stmtb
 	      tree init;
 	      tree bound;
 	      tree tmptype;
+	      HOST_WIDE_INT idx = 0;
 
 	      p = c;
 	      list = NULL_TREE;
@@ -1253,7 +1254,8 @@ gfc_trans_array_constructor_value (stmtb
 				(gfc_get_pchar_type (p->expr->ts.kind),
 				 se.expr);
 
-		  list = tree_cons (NULL_TREE, se.expr, list);
+		  list = tree_cons (build_int_cst (gfc_array_index_type,
+						   idx++), se.expr, list);
 		  c = p;
 		  p = p->next;
 		}
@@ -1619,7 +1621,8 @@ gfc_build_constant_array_constructor (gf
       if (c->expr->ts.type == BT_CHARACTER && POINTER_TYPE_P (type))
 	se.expr = gfc_build_addr_expr (gfc_get_pchar_type (c->expr->ts.kind),
 				       se.expr);
-      list = tree_cons (NULL_TREE, se.expr, list);
+      list = tree_cons (build_int_cst (gfc_array_index_type, nelem),
+			se.expr, list);
       c = c->next;
       nelem++;
     }
--- gcc/testsuite/gfortran.dg/array_memcpy_3.f90.jj	2008-09-30 16:56:06.000000000 +0200
+++ gcc/testsuite/gfortran.dg/array_memcpy_3.f90	2008-11-20 16:32:56.000000000 +0100
@@ -11,5 +11,5 @@ subroutine bar(x)
   x = (/ 3, 1, 4, 1 /)
 end subroutine
 
-! { dg-final { scan-tree-dump-times "memcpy" 2 "original" } }
+! { dg-final { scan-tree-dump-times "memcpy|ref-all" 2 "original" } }
 ! { dg-final { cleanup-tree-dump "original" } }
--- gcc/testsuite/gcc.dg/pr29215.c.jj	2008-11-20 14:16:50.000000000 +0100
+++ gcc/testsuite/gcc.dg/pr29215.c	2008-11-20 14:16:58.000000000 +0100
@@ -2,21 +2,16 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -fdump-tree-gimple" } */
 
-char buf[5 * sizeof (int) + 1];
+char buf[5 * sizeof (int) + 1] __attribute__((aligned (__alignof__ (int))));
 
 static void
-foo (char *buffer, int arg1, int arg2, int arg3, int arg4, int arg5)
+foo (int arg1, int arg2, int arg3, int arg4, int arg5)
 {
-  __builtin_memcpy (buffer, &arg1, sizeof (int));
-  buffer += sizeof (int);
-  __builtin_memcpy (buffer, &arg2, sizeof (int));
-  buffer += sizeof (int);
-  __builtin_memcpy (buffer, &arg3, sizeof (int));
-  buffer += sizeof (int);
-  __builtin_memcpy (buffer, &arg4, sizeof (int));
-  buffer += sizeof (int);
-  __builtin_memcpy (buffer, &arg5, sizeof (int));
-  buffer += sizeof (int);
+  __builtin_memcpy (buf, &arg1, sizeof (int));
+  __builtin_memcpy (buf + sizeof (int), &arg2, sizeof (int));
+  __builtin_memcpy (buf + 2 * sizeof (int), &arg3, sizeof (int));
+  __builtin_memcpy (buf + 3 * sizeof (int), &arg4, sizeof (int));
+  __builtin_memcpy (buf + 4 * sizeof (int), &arg5, sizeof (int));
 }
 
 int
@@ -28,7 +23,7 @@ main (void)
   u.buf[1] = 'b';
   u.buf[2] = 'c';
   u.buf[3] = 'd';
-  foo (buf, u.i, u.i, u.i, u.i, u.i);
+  foo (u.i, u.i, u.i, u.i, u.i);
   buf[5 * sizeof (int)] = '\0';
   __builtin_puts (buf);
   return 0;

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]