This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] More aggressive __builtin_memcpy optimizations (PR middle-end/29215)


On Thu, Nov 20, 2008 at 4:54 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Wed, Nov 19, 2008 at 06:23:28PM +0100, Richard Guenther wrote:
>> The patch is ok.
>
> Before checking in, I've bootstrapped/regtested it on ia64-linux
> in addition to the usual x86_64-linux bootstrap/regtest.
>
> Unfortunately this revealed a few issues:
>
> 1) on ia64 allocatable_function_3.f90 ICEd at -O1 and higher, because
>   memcpy from a TREE_STATIC readonly VAR_DECL with a CONSTRUCTOR
>   initializer was optimized into a store, but TREE_PURPOSE in the
>   ctor elements weren't filled up, which upsets e.g.
>   gimplify_init_ctor_eval or categorize_ctor_elements_1.
>   When this wasn't optimized, Fortran FE managed to survive,
>   because none of these routines were called.  Either we could teach
>   these what to do if purpose isn't present, or, as done in the patch
>   below, fill in TREE_PURPOSE like the C/C++ FEs do.
> 2) when looking why that test didn't fail on x86_64-linux as well,
>   I found out the
>      if (!var_decl_component_p (srcvar)
>          /* Accept: memcpy (*char_var, "test", 1); that simplify
>             to char_var='t';  */
>          || is_gimple_min_invariant (srcvar)
>          || readonly_data_expr (src))
>        return NULL_TREE;
>   lines (in current SVN, my previous patch just moved these around).
>   On ia64 src was actually emitted into .sdata section, even when
>   read-only and thus memcpy was optimized, on x86_64 where src
>   lived in .rodata it wasn't.  This logic seems to be very much
>   broken, I mean if something is var_decl_component_p, then it
>   won't be gimple invariant, and whether it is in readonly data
>   doesn't matter too.  return NULL_TREE here means "don't optimize".
>   I've tried changing this into !var_decl_component_p () &&
>   !is_gimple_min_invariant () && !readonly_data_expr (),
>   but that created invalid GIMPLE, assigning STRING_CST into something.
>   So the patch below just removes those, optimization of the
>   memcpy (*char_var, "test", 1) can be looked at later, but
>   would certainly need extra code.
> 3) the new testcase failed on ia64, because memcpy wasn't optimized there.
>   memcpy is optimized only if the other pointer is known to be sufficiently
>   aligned (in the testcase it wasn't) or if unaligned access is allowed
>   and not slow (true on x86_64, false on ia64).  I've adjusted
>   the testcase so that gcc knows the buffer is aligned.
> 4) array_memcpy_3.f90 test failed to match on x86_64-linux after the
>   2) changes, memcpy is now optimized into a store, the pattern has
>   been adjusted not to care if memcpy has been optimized or not.
>
> Ok for trunk (together with the earlier patch)?

Ok for the middle-end parts.

Richard.

> 2008-11-20  Jakub Jelinek  <jakub@redhat.com>
>
>        PR middle-end/29215
>        * builtins.c (fold_builtin_memory_op): Remove
>        is_gimple_min_invariant and readonly_data_expr src check.
>
>        * trans-array.c (trans_array_constructor_value,
>        gfc_build_constant_array_constructor): Fill in TREE_PURPOSE.
>
>        * gfortran.dg/array_memcpy_3.f90: Adjust pattern to match even
>        memcpy optimized into ref-all store.
>        * gcc.dg/pr29215.c: Adjust so that all stores are known to be
>        aligned.
>
> --- gcc/builtins.c.jj   2008-11-20 14:16:50.000000000 +0100
> +++ gcc/builtins.c      2008-11-20 14:25:38.000000000 +0100
> @@ -8894,11 +8894,7 @@ fold_builtin_memory_op (tree dest, tree
>             memcpy call.  This check is probably unnecessary with
>             -fno-strict-aliasing.  Similarly for destvar.  See also
>             PR29286.  */
> -         else if (!var_decl_component_p (srcvar)
> -                  /* Accept: memcpy (*char_var, "test", 1); that simplify
> -                     to char_var='t';  */
> -                  || is_gimple_min_invariant (srcvar)
> -                  || readonly_data_expr (src))
> +         else if (!var_decl_component_p (srcvar))
>            srcvar = NULL_TREE;
>        }
>
> --- gcc/fortran/trans-array.c.jj        2008-11-17 08:37:27.000000000 +0100
> +++ gcc/fortran/trans-array.c   2008-11-20 14:16:58.000000000 +0100
> @@ -1235,6 +1235,7 @@ gfc_trans_array_constructor_value (stmtb
>              tree init;
>              tree bound;
>              tree tmptype;
> +             HOST_WIDE_INT idx = 0;
>
>              p = c;
>              list = NULL_TREE;
> @@ -1253,7 +1254,8 @@ gfc_trans_array_constructor_value (stmtb
>                                (gfc_get_pchar_type (p->expr->ts.kind),
>                                 se.expr);
>
> -                 list = tree_cons (NULL_TREE, se.expr, list);
> +                 list = tree_cons (build_int_cst (gfc_array_index_type,
> +                                                  idx++), se.expr, list);
>                  c = p;
>                  p = p->next;
>                }
> @@ -1619,7 +1621,8 @@ gfc_build_constant_array_constructor (gf
>       if (c->expr->ts.type == BT_CHARACTER && POINTER_TYPE_P (type))
>        se.expr = gfc_build_addr_expr (gfc_get_pchar_type (c->expr->ts.kind),
>                                       se.expr);
> -      list = tree_cons (NULL_TREE, se.expr, list);
> +      list = tree_cons (build_int_cst (gfc_array_index_type, nelem),
> +                       se.expr, list);
>       c = c->next;
>       nelem++;
>     }
> --- gcc/testsuite/gfortran.dg/array_memcpy_3.f90.jj     2008-09-30 16:56:06.000000000 +0200
> +++ gcc/testsuite/gfortran.dg/array_memcpy_3.f90        2008-11-20 16:32:56.000000000 +0100
> @@ -11,5 +11,5 @@ subroutine bar(x)
>   x = (/ 3, 1, 4, 1 /)
>  end subroutine
>
> -! { dg-final { scan-tree-dump-times "memcpy" 2 "original" } }
> +! { dg-final { scan-tree-dump-times "memcpy|ref-all" 2 "original" } }
>  ! { dg-final { cleanup-tree-dump "original" } }
> --- gcc/testsuite/gcc.dg/pr29215.c.jj   2008-11-20 14:16:50.000000000 +0100
> +++ gcc/testsuite/gcc.dg/pr29215.c      2008-11-20 14:16:58.000000000 +0100
> @@ -2,21 +2,16 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -fdump-tree-gimple" } */
>
> -char buf[5 * sizeof (int) + 1];
> +char buf[5 * sizeof (int) + 1] __attribute__((aligned (__alignof__ (int))));
>
>  static void
> -foo (char *buffer, int arg1, int arg2, int arg3, int arg4, int arg5)
> +foo (int arg1, int arg2, int arg3, int arg4, int arg5)
>  {
> -  __builtin_memcpy (buffer, &arg1, sizeof (int));
> -  buffer += sizeof (int);
> -  __builtin_memcpy (buffer, &arg2, sizeof (int));
> -  buffer += sizeof (int);
> -  __builtin_memcpy (buffer, &arg3, sizeof (int));
> -  buffer += sizeof (int);
> -  __builtin_memcpy (buffer, &arg4, sizeof (int));
> -  buffer += sizeof (int);
> -  __builtin_memcpy (buffer, &arg5, sizeof (int));
> -  buffer += sizeof (int);
> +  __builtin_memcpy (buf, &arg1, sizeof (int));
> +  __builtin_memcpy (buf + sizeof (int), &arg2, sizeof (int));
> +  __builtin_memcpy (buf + 2 * sizeof (int), &arg3, sizeof (int));
> +  __builtin_memcpy (buf + 3 * sizeof (int), &arg4, sizeof (int));
> +  __builtin_memcpy (buf + 4 * sizeof (int), &arg5, sizeof (int));
>  }
>
>  int
> @@ -28,7 +23,7 @@ main (void)
>   u.buf[1] = 'b';
>   u.buf[2] = 'c';
>   u.buf[3] = 'd';
> -  foo (buf, u.i, u.i, u.i, u.i, u.i);
> +  foo (u.i, u.i, u.i, u.i, u.i);
>   buf[5 * sizeof (int)] = '\0';
>   __builtin_puts (buf);
>   return 0;
>
>        Jakub
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]