This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] More aggressive __builtin_memcpy optimizations (PR middle-end/29215)


On Fri, Nov 14, 2008 at 6:42 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> Until now memcpy folding folded just the case of both src and dst
> being pointers to VAR_DECLs or their components with both the same size
> as length and a bunch of other restrictions.
>
> The following patch will optimize even when only one of src and dst
> have such a property, the store is then done using a TYPE_REF_CAN_ALIAS_REF
> pointer.  When this fold_builtin_memory_op optimization has been written,
> TYPE_REF_CAN_ALIAS_REF could be lost in many places during optimizations,
> but things have improved a lot since then.
>
> Alternative to this (or maybe desirable anyway) is readdition of ADDRESSOF
> or something similar to it to get rid of unneeded stores to stack variables,
> many builtins convert something that forces TREE_ADDRESSABLE to something
> that doesn't need to necessarily live in memory.  DSE ATM isn't able to
> eliminate the dead stores, and even if it would, we'd allocate space in the
> stack frame unnecessarily.
>
> Bootstrapped/regtested on x86_64-linux, ok for trunk?

This looks ok, but I have some questions regarding the alignment checks
and the size-checks:

> 2008-11-14  Jakub Jelinek  <jakub@redhat.com>
>
>        PR middle-end/29215
>        * builtins.c (SLOW_UNALIGNED_ACCESS): Define if not defined.
>        (fold_builtin_memory_op): Handle even the case where just one
>        of src and dest is an address of a var decl component, using
>        TYPE_REF_CAN_ALIAS_REF pointers.
>
>        * gcc.dg/pr29215.c: New test.
>
> --- gcc/builtins.c.jj   2008-11-10 09:59:14.000000000 +0100
> +++ gcc/builtins.c      2008-11-14 11:46:15.000000000 +0100
> @@ -51,6 +51,10 @@ along with GCC; see the file COPYING3.
>  #include "value-prof.h"
>  #include "diagnostic.h"
>
> +#ifndef SLOW_UNALIGNED_ACCESS
> +#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
> +#endif
> +
>  #ifndef PAD_VARARGS_DOWN
>  #define PAD_VARARGS_DOWN BYTES_BIG_ENDIAN
>  #endif
> @@ -8780,10 +8784,12 @@ fold_builtin_memory_op (tree dest, tree
>   else
>     {
>       tree srctype, desttype;
> +      int src_align, dest_align;
> +
>       if (endp == 3)
>        {
> -          int src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
> -          int dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
> +         src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
> +         dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
>
>          /* Both DEST and SRC must be pointer types.
>             ??? This is what old code did.  Is the testing for pointer types
> @@ -8818,44 +8824,79 @@ fold_builtin_memory_op (tree dest, tree
>          || !TYPE_SIZE_UNIT (srctype)
>          || !TYPE_SIZE_UNIT (desttype)
>          || TREE_CODE (TYPE_SIZE_UNIT (srctype)) != INTEGER_CST
> -         || TREE_CODE (TYPE_SIZE_UNIT (desttype)) != INTEGER_CST
> -         || !tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len)
> -         || !tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len))

You should re-add these checks to the case we create a VIEW_CONVERT_EXPR
(the integral/pointer type case looks wrong btw, just unconditionally creating
the V_C_E and relying on its folding would be better).  V_C_Es should have
matched sizes of the source and target types.

> +         || TREE_CODE (TYPE_SIZE_UNIT (desttype)) != INTEGER_CST)
>        return NULL_TREE;
>
> -      if (get_pointer_alignment (dest, BIGGEST_ALIGNMENT)
> -         < (int) TYPE_ALIGN (desttype)
> -         || (get_pointer_alignment (src, BIGGEST_ALIGNMENT)
> -             < (int) TYPE_ALIGN (srctype)))
> +      src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
> +      dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
> +      if (dest_align < (int) TYPE_ALIGN (desttype)
> +         || src_align < (int) TYPE_ALIGN (srctype))
>        return NULL_TREE;

If either dest or src are not decls the dest or srctypes as available from
above do not have any meaning.  So if you later do a store via *(T *)p
it should check the dest_align against TYPE_ALIGN of srctype or mark
the pointed-to type packed.  You probably can create a testcase for
strict-alignment targets that would break.

Btw, this is a quite tricky area and I am somewhat concerned about
putting this into 4.4 - just to make you think a second time here ;)

Thanks,
Richard.

>       if (!ignore)
>         dest = builtin_save_expr (dest);
>
> -      srcvar = build_fold_indirect_ref (src);
> -      if (TREE_THIS_VOLATILE (srcvar))
> -       return NULL_TREE;
> -      if (!tree_int_cst_equal (lang_hooks.expr_size (srcvar), len))
> -       return NULL_TREE;
> -      /* With memcpy, it is possible to bypass aliasing rules, so without
> -         this check i.e. execute/20060930-2.c would be misoptimized, because
> -        it use conflicting alias set to hold argument for the memcpy call.
> -        This check is probably unnecessary with -fno-strict-aliasing.
> -        Similarly for destvar.  See also PR29286.  */
> -      if (!var_decl_component_p (srcvar)
> -         /* Accept: memcpy (*char_var, "test", 1); that simplify
> -            to char_var='t';  */
> -         || is_gimple_min_invariant (srcvar)
> -         || readonly_data_expr (src))
> -       return NULL_TREE;
> +      srcvar = NULL_TREE;
> +      if (tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len))
> +       {
> +         srcvar = build_fold_indirect_ref (src);
> +         if (TREE_THIS_VOLATILE (srcvar))
> +           srcvar = NULL_TREE;
> +         else if (!tree_int_cst_equal (lang_hooks.expr_size (srcvar), len))
> +           srcvar = NULL_TREE;
> +         /* With memcpy, it is possible to bypass aliasing rules, so without
> +            this check i.e. execute/20060930-2.c would be misoptimized,
> +            because it use conflicting alias set to hold argument for the
> +            memcpy call.  This check is probably unnecessary with
> +            -fno-strict-aliasing.  Similarly for destvar.  See also
> +            PR29286.  */
> +         else if (!var_decl_component_p (srcvar)
> +                  /* Accept: memcpy (*char_var, "test", 1); that simplify
> +                     to char_var='t';  */
> +                  || is_gimple_min_invariant (srcvar)
> +                  || readonly_data_expr (src))
> +           srcvar = NULL_TREE;
> +       }
> +
> +      destvar = NULL_TREE;
> +      if (tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len))
> +       {
> +         destvar = build_fold_indirect_ref (dest);
> +         if (TREE_THIS_VOLATILE (destvar))
> +           destvar = NULL_TREE;
> +         else if (!tree_int_cst_equal (lang_hooks.expr_size (destvar), len))
> +           destvar = NULL_TREE;
> +         else if (!var_decl_component_p (destvar))
> +           destvar = NULL_TREE;
> +       }
> +
> +      if (srcvar == NULL_TREE && destvar == NULL_TREE)
> +       return NULL_TREE;
> +
> +      if (srcvar == NULL_TREE)
> +       {
> +         tree srcptype;
> +         if (TREE_ADDRESSABLE (TREE_TYPE (destvar))
> +             || SLOW_UNALIGNED_ACCESS (TYPE_MODE (destvar), src_align))
> +           return NULL_TREE;
>
> -      destvar = build_fold_indirect_ref (dest);
> -      if (TREE_THIS_VOLATILE (destvar))
> -       return NULL_TREE;
> -      if (!tree_int_cst_equal (lang_hooks.expr_size (destvar), len))
> -       return NULL_TREE;
> -      if (!var_decl_component_p (destvar))
> -       return NULL_TREE;
> +         srctype = desttype;
> +         srcptype = build_pointer_type_for_mode (srctype, ptr_mode, true);
> +         src = fold_convert (srcptype, src);
> +         srcvar = build_fold_indirect_ref (src);
> +       }
> +      else if (destvar == NULL_TREE)
> +       {
> +         tree destptype;
> +         if (TREE_ADDRESSABLE (TREE_TYPE (srcvar))
> +             || SLOW_UNALIGNED_ACCESS (TYPE_MODE (srcvar), dest_align))
> +           return NULL_TREE;
> +
> +         desttype = srctype;
> +         destptype = build_pointer_type_for_mode (desttype, ptr_mode, true);
> +         dest = fold_convert (destptype, dest);
> +         destvar = build_fold_indirect_ref (dest);
> +       }
>
>       if (srctype == desttype
>          || (gimple_in_ssa_p (cfun)
> --- gcc/testsuite/gcc.dg/pr29215.c.jj   2008-11-14 11:32:15.000000000 +0100
> +++ gcc/testsuite/gcc.dg/pr29215.c      2008-11-14 11:32:15.000000000 +0100
> @@ -0,0 +1,38 @@
> +/* PR middle-end/29215 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-gimple" } */
> +
> +char buf[5 * sizeof (int) + 1];
> +
> +static void
> +foo (char *buffer, int arg1, int arg2, int arg3, int arg4, int arg5)
> +{
> +  __builtin_memcpy (buffer, &arg1, sizeof (int));
> +  buffer += sizeof (int);
> +  __builtin_memcpy (buffer, &arg2, sizeof (int));
> +  buffer += sizeof (int);
> +  __builtin_memcpy (buffer, &arg3, sizeof (int));
> +  buffer += sizeof (int);
> +  __builtin_memcpy (buffer, &arg4, sizeof (int));
> +  buffer += sizeof (int);
> +  __builtin_memcpy (buffer, &arg5, sizeof (int));
> +  buffer += sizeof (int);
> +}
> +
> +int
> +main (void)
> +{
> +  union { char buf[4]; int i; } u;
> +  u.i = 0;
> +  u.buf[0] = 'a';
> +  u.buf[1] = 'b';
> +  u.buf[2] = 'c';
> +  u.buf[3] = 'd';
> +  foo (buf, u.i, u.i, u.i, u.i, u.i);
> +  buf[5 * sizeof (int)] = '\0';
> +  __builtin_puts (buf);
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "memcpy" "gimple" } } */
> +/* { dg-final { cleanup-tree-dump "gimple" } } */
>
>        Jakub
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]