This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] More aggressive __builtin_memcpy optimizations (PR middle-end/29215)
- From: "Richard Guenther" <richard dot guenther at gmail dot com>
- To: "Jakub Jelinek" <jakub at redhat dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Fri, 14 Nov 2008 20:49:03 -0600
- Subject: Re: [PATCH] More aggressive __builtin_memcpy optimizations (PR middle-end/29215)
- References: <20081114124259.GD17496@tyan-ft48-01.lab.bos.redhat.com>
On Fri, Nov 14, 2008 at 6:42 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> Until now memcpy folding folded just the case of both src and dst
> being pointers to VAR_DECLs or their components with both the same size
> as length and a bunch of other restrictions.
>
> The following patch will optimize even when only one of src and dst
> have such a property, the store is then done using a TYPE_REF_CAN_ALIAS_REF
> pointer. When this fold_builtin_memory_op optimization has been written,
> TYPE_REF_CAN_ALIAS_REF could be lost in many places during optimizations,
> but things have improved a lot since then.
>
> Alternative to this (or maybe desirable anyway) is readdition of ADDRESSOF
> or something similar to it to get rid of unneeded stores to stack variables,
> many builtins convert something that forces TREE_ADDRESSABLE to something
> that doesn't need to necessarily live in memory. DSE ATM isn't able to
> eliminate the dead stores, and even if it would, we'd allocate space in the
> stack frame unnecessarily.
>
> Bootstrapped/regtested on x86_64-linux, ok for trunk?
This looks ok, but I have some questions regarding the alignment checks
and the size-checks:
> 2008-11-14 Jakub Jelinek <jakub@redhat.com>
>
> PR middle-end/29215
> * builtins.c (SLOW_UNALIGNED_ACCESS): Define if not defined.
> (fold_builtin_memory_op): Handle even the case where just one
> of src and dest is an address of a var decl component, using
> TYPE_REF_CAN_ALIAS_REF pointers.
>
> * gcc.dg/pr29215.c: New test.
>
> --- gcc/builtins.c.jj 2008-11-10 09:59:14.000000000 +0100
> +++ gcc/builtins.c 2008-11-14 11:46:15.000000000 +0100
> @@ -51,6 +51,10 @@ along with GCC; see the file COPYING3.
> #include "value-prof.h"
> #include "diagnostic.h"
>
> +#ifndef SLOW_UNALIGNED_ACCESS
> +#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
> +#endif
> +
> #ifndef PAD_VARARGS_DOWN
> #define PAD_VARARGS_DOWN BYTES_BIG_ENDIAN
> #endif
> @@ -8780,10 +8784,12 @@ fold_builtin_memory_op (tree dest, tree
> else
> {
> tree srctype, desttype;
> + int src_align, dest_align;
> +
> if (endp == 3)
> {
> - int src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
> - int dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
> + src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
> + dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
>
> /* Both DEST and SRC must be pointer types.
> ??? This is what old code did. Is the testing for pointer types
> @@ -8818,44 +8824,79 @@ fold_builtin_memory_op (tree dest, tree
> || !TYPE_SIZE_UNIT (srctype)
> || !TYPE_SIZE_UNIT (desttype)
> || TREE_CODE (TYPE_SIZE_UNIT (srctype)) != INTEGER_CST
> - || TREE_CODE (TYPE_SIZE_UNIT (desttype)) != INTEGER_CST
> - || !tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len)
> - || !tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len))
You should re-add these checks to the case we create a VIEW_CONVERT_EXPR
(the integral/pointer type case looks wrong btw, just unconditionally creating
the V_C_E and relying on its folding would be better). V_C_Es should have
matched sizes of the source and target types.
> + || TREE_CODE (TYPE_SIZE_UNIT (desttype)) != INTEGER_CST)
> return NULL_TREE;
>
> - if (get_pointer_alignment (dest, BIGGEST_ALIGNMENT)
> - < (int) TYPE_ALIGN (desttype)
> - || (get_pointer_alignment (src, BIGGEST_ALIGNMENT)
> - < (int) TYPE_ALIGN (srctype)))
> + src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
> + dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
> + if (dest_align < (int) TYPE_ALIGN (desttype)
> + || src_align < (int) TYPE_ALIGN (srctype))
> return NULL_TREE;
If either dest or src are not decls the dest or srctypes as available from
above do not have any meaning. So if you later do a store via *(T *)p
it should check the dest_align against TYPE_ALIGN of srctype or mark
the pointed-to type packed. You probably can create a testcase for
strict-alignment targets that would break.
Btw, this is a quite tricky area and I am somewhat concerned about
putting this into 4.4 - just to make you think a second time here ;)
Thanks,
Richard.
> if (!ignore)
> dest = builtin_save_expr (dest);
>
> - srcvar = build_fold_indirect_ref (src);
> - if (TREE_THIS_VOLATILE (srcvar))
> - return NULL_TREE;
> - if (!tree_int_cst_equal (lang_hooks.expr_size (srcvar), len))
> - return NULL_TREE;
> - /* With memcpy, it is possible to bypass aliasing rules, so without
> - this check i.e. execute/20060930-2.c would be misoptimized, because
> - it use conflicting alias set to hold argument for the memcpy call.
> - This check is probably unnecessary with -fno-strict-aliasing.
> - Similarly for destvar. See also PR29286. */
> - if (!var_decl_component_p (srcvar)
> - /* Accept: memcpy (*char_var, "test", 1); that simplify
> - to char_var='t'; */
> - || is_gimple_min_invariant (srcvar)
> - || readonly_data_expr (src))
> - return NULL_TREE;
> + srcvar = NULL_TREE;
> + if (tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len))
> + {
> + srcvar = build_fold_indirect_ref (src);
> + if (TREE_THIS_VOLATILE (srcvar))
> + srcvar = NULL_TREE;
> + else if (!tree_int_cst_equal (lang_hooks.expr_size (srcvar), len))
> + srcvar = NULL_TREE;
> + /* With memcpy, it is possible to bypass aliasing rules, so without
> + this check i.e. execute/20060930-2.c would be misoptimized,
> + because it use conflicting alias set to hold argument for the
> + memcpy call. This check is probably unnecessary with
> + -fno-strict-aliasing. Similarly for destvar. See also
> + PR29286. */
> + else if (!var_decl_component_p (srcvar)
> + /* Accept: memcpy (*char_var, "test", 1); that simplify
> + to char_var='t'; */
> + || is_gimple_min_invariant (srcvar)
> + || readonly_data_expr (src))
> + srcvar = NULL_TREE;
> + }
> +
> + destvar = NULL_TREE;
> + if (tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len))
> + {
> + destvar = build_fold_indirect_ref (dest);
> + if (TREE_THIS_VOLATILE (destvar))
> + destvar = NULL_TREE;
> + else if (!tree_int_cst_equal (lang_hooks.expr_size (destvar), len))
> + destvar = NULL_TREE;
> + else if (!var_decl_component_p (destvar))
> + destvar = NULL_TREE;
> + }
> +
> + if (srcvar == NULL_TREE && destvar == NULL_TREE)
> + return NULL_TREE;
> +
> + if (srcvar == NULL_TREE)
> + {
> + tree srcptype;
> + if (TREE_ADDRESSABLE (TREE_TYPE (destvar))
> + || SLOW_UNALIGNED_ACCESS (TYPE_MODE (destvar), src_align))
> + return NULL_TREE;
>
> - destvar = build_fold_indirect_ref (dest);
> - if (TREE_THIS_VOLATILE (destvar))
> - return NULL_TREE;
> - if (!tree_int_cst_equal (lang_hooks.expr_size (destvar), len))
> - return NULL_TREE;
> - if (!var_decl_component_p (destvar))
> - return NULL_TREE;
> + srctype = desttype;
> + srcptype = build_pointer_type_for_mode (srctype, ptr_mode, true);
> + src = fold_convert (srcptype, src);
> + srcvar = build_fold_indirect_ref (src);
> + }
> + else if (destvar == NULL_TREE)
> + {
> + tree destptype;
> + if (TREE_ADDRESSABLE (TREE_TYPE (srcvar))
> + || SLOW_UNALIGNED_ACCESS (TYPE_MODE (srcvar), dest_align))
> + return NULL_TREE;
> +
> + desttype = srctype;
> + destptype = build_pointer_type_for_mode (desttype, ptr_mode, true);
> + dest = fold_convert (destptype, dest);
> + destvar = build_fold_indirect_ref (dest);
> + }
>
> if (srctype == desttype
> || (gimple_in_ssa_p (cfun)
> --- gcc/testsuite/gcc.dg/pr29215.c.jj 2008-11-14 11:32:15.000000000 +0100
> +++ gcc/testsuite/gcc.dg/pr29215.c 2008-11-14 11:32:15.000000000 +0100
> @@ -0,0 +1,38 @@
> +/* PR middle-end/29215 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-gimple" } */
> +
> +char buf[5 * sizeof (int) + 1];
> +
> +static void
> +foo (char *buffer, int arg1, int arg2, int arg3, int arg4, int arg5)
> +{
> + __builtin_memcpy (buffer, &arg1, sizeof (int));
> + buffer += sizeof (int);
> + __builtin_memcpy (buffer, &arg2, sizeof (int));
> + buffer += sizeof (int);
> + __builtin_memcpy (buffer, &arg3, sizeof (int));
> + buffer += sizeof (int);
> + __builtin_memcpy (buffer, &arg4, sizeof (int));
> + buffer += sizeof (int);
> + __builtin_memcpy (buffer, &arg5, sizeof (int));
> + buffer += sizeof (int);
> +}
> +
> +int
> +main (void)
> +{
> + union { char buf[4]; int i; } u;
> + u.i = 0;
> + u.buf[0] = 'a';
> + u.buf[1] = 'b';
> + u.buf[2] = 'c';
> + u.buf[3] = 'd';
> + foo (buf, u.i, u.i, u.i, u.i, u.i);
> + buf[5 * sizeof (int)] = '\0';
> + __builtin_puts (buf);
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "memcpy" "gimple" } } */
> +/* { dg-final { cleanup-tree-dump "gimple" } } */
>
> Jakub
>