This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Fix dr_explicit_realign vectorization (PR tree-optimization/65369)
- From: Richard Biener <rguenther at suse dot de>
- To: Jakub Jelinek <jakub at redhat dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Sat, 14 Mar 2015 10:49:49 +0100
- Subject: Re: [PATCH] Fix dr_explicit_realign vectorization (PR tree-optimization/65369)
- Authentication-results: sourceware.org; auth=none
- References: <20150314090453 dot GJ1746 at tucnak dot redhat dot com>
On March 14, 2015 10:04:53 AM GMT+01:00, Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>This issue is practically the same as PR63341, except in this case it
>is for
>dr_explicit_realign rather than dr_explicit_realign_optimized, and the
>bump
>isn't passed through multiple functions and thus is easier to fix.
>
>Without the patch we use (dataptr & -16) for the first load and
>((dataptr + 12) & -16) for the second load, which works just fine if
>the
>elements are properly aligned (4 byte at least), but in this case we
>have
>underaligned accesses (coming from folding of memcpy in this testcase,
>and
>from 4 byte loads combined together recognized by bswap pass in the
>original
>source), and so we really want to use ((dataptr + 15) & -16), otherwise
>if we are unlucky we might read the same memory twice even when dataptr
>is not 16 byte aligned.
>
>Bootstrapped/regtested on
>{x86_64,i686,aarch64,powerpc64{,le},s390{,x}}-linux, ok for trunk?
OK.
Thanks,
Richard.
>2015-03-14 Jakub Jelinek <jakub@redhat.com>
>
> PR tree-optimization/65369
> * tree-vect-stmts.c (vectorizable_load) <case dr_explicit_realign>:
> Set bump to vs * TYPE_SIZE_UNIT (elem_type) - 1 instead of
> (vs - 1) * TYPE_SIZE_UNIT (elem_type).
>
> * gcc.c-torture/execute/pr65369.c: New test.
>
>--- gcc/tree-vect-stmts.c.jj 2015-03-09 08:05:13.000000000 +0100
>+++ gcc/tree-vect-stmts.c 2015-03-13 17:27:30.613529768 +0100
>@@ -6468,9 +6468,8 @@ vectorizable_load (gimple stmt, gimple_s
> case dr_explicit_realign:
> {
> tree ptr, bump;
>- tree vs_minus_1;
>
>- vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
>+ tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
>
> if (compute_in_loop)
> msq = vect_setup_realignment (first_stmt, gsi,
>@@ -6499,8 +6498,9 @@ vectorizable_load (gimple stmt, gimple_s
> vect_finish_stmt_generation (stmt, new_stmt, gsi);
> msq = new_temp;
>
>- bump = size_binop (MULT_EXPR, vs_minus_1,
>+ bump = size_binop (MULT_EXPR, vs,
> TYPE_SIZE_UNIT (elem_type));
>+ bump = size_binop (MINUS_EXPR, bump, size_one_node);
> ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
> new_stmt = gimple_build_assign
> (NULL_TREE, BIT_AND_EXPR, ptr,
>--- gcc/testsuite/gcc.c-torture/execute/pr65369.c.jj 2015-03-13
>17:37:10.926175685 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr65369.c 2015-03-13
>17:35:40.000000000 +0100
>@@ -0,0 +1,45 @@
>+/* PR tree-optimization/65369 */
>+
>+static const char data[] =
>+ "12345678901234567890123456789012345678901234567890"
>+ "123456789012345678901234567890";
>+
>+__attribute__ ((noinline))
>+static void foo (const unsigned int *buf)
>+{
>+ if (__builtin_memcmp (buf, data, 64))
>+ __builtin_abort ();
>+}
>+
>+__attribute__ ((noinline))
>+static void bar (const unsigned char *block)
>+{
>+ unsigned int buf[16];
>+ __builtin_memcpy (buf + 0, block + 0, 4);
>+ __builtin_memcpy (buf + 1, block + 4, 4);
>+ __builtin_memcpy (buf + 2, block + 8, 4);
>+ __builtin_memcpy (buf + 3, block + 12, 4);
>+ __builtin_memcpy (buf + 4, block + 16, 4);
>+ __builtin_memcpy (buf + 5, block + 20, 4);
>+ __builtin_memcpy (buf + 6, block + 24, 4);
>+ __builtin_memcpy (buf + 7, block + 28, 4);
>+ __builtin_memcpy (buf + 8, block + 32, 4);
>+ __builtin_memcpy (buf + 9, block + 36, 4);
>+ __builtin_memcpy (buf + 10, block + 40, 4);
>+ __builtin_memcpy (buf + 11, block + 44, 4);
>+ __builtin_memcpy (buf + 12, block + 48, 4);
>+ __builtin_memcpy (buf + 13, block + 52, 4);
>+ __builtin_memcpy (buf + 14, block + 56, 4);
>+ __builtin_memcpy (buf + 15, block + 60, 4);
>+ foo (buf);
>+}
>+
>+int
>+main ()
>+{
>+ unsigned char input[sizeof data + 16] __attribute__((aligned (16)));
>+ __builtin_memset (input, 0, sizeof input);
>+ __builtin_memcpy (input + 1, data, sizeof data);
>+ bar (input + 1);
>+ return 0;
>+}
>
> Jakub