This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Fix dr_explicit_realign vectorization (PR tree-optimization/65369)


On March 14, 2015 10:04:53 AM GMT+01:00, Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>This issue is practically the same as PR63341, except in this case it
>is for
>dr_explicit_realign rather than dr_explicit_realign_optimized, and the
>bump
>isn't passed through multiple functions and thus is easier to fix.
>
>Without the patch we use (dataptr & -16) for the first load and
>((dataptr + 12) & -16) for the second load, which works just fine if
>the
>elements are properly aligned (4 byte at least), but in this case we
>have
>underaligned accesses (coming from folding of memcpy in this testcase,
>and
>from 4 byte loads combined together recognized by bswap pass in the
>original
>source), and so we really want to use ((dataptr + 15) & -16), otherwise
>if we are unlucky we might read the same memory twice even when dataptr
>is not 16 byte aligned.
>
>Bootstrapped/regtested on
>{x86_64,i686,aarch64,powerpc64{,le},s390{,x}}-linux, ok for trunk?

OK.

Thanks,
Richard.

>2015-03-14  Jakub Jelinek  <jakub@redhat.com>
>
>	PR tree-optimization/65369
>	* tree-vect-stmts.c (vectorizable_load) <case dr_explicit_realign>:
>	Set bump to vs * TYPE_SIZE_UNIT (elem_type) - 1 instead of
>	(vs - 1) * TYPE_SIZE_UNIT (elem_type).
>
>	* gcc.c-torture/execute/pr65369.c: New test.
>
>--- gcc/tree-vect-stmts.c.jj	2015-03-09 08:05:13.000000000 +0100
>+++ gcc/tree-vect-stmts.c	2015-03-13 17:27:30.613529768 +0100
>@@ -6468,9 +6468,8 @@ vectorizable_load (gimple stmt, gimple_s
> 		case dr_explicit_realign:
> 		  {
> 		    tree ptr, bump;
>-		    tree vs_minus_1;
> 
>-		    vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
>+		    tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
> 
> 		    if (compute_in_loop)
> 		      msq = vect_setup_realignment (first_stmt, gsi,
>@@ -6499,8 +6498,9 @@ vectorizable_load (gimple stmt, gimple_s
> 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
> 		    msq = new_temp;
> 
>-		    bump = size_binop (MULT_EXPR, vs_minus_1,
>+		    bump = size_binop (MULT_EXPR, vs,
> 				       TYPE_SIZE_UNIT (elem_type));
>+		    bump = size_binop (MINUS_EXPR, bump, size_one_node);
> 		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
> 		    new_stmt = gimple_build_assign
> 				 (NULL_TREE, BIT_AND_EXPR, ptr,
>--- gcc/testsuite/gcc.c-torture/execute/pr65369.c.jj	2015-03-13
>17:37:10.926175685 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr65369.c	2015-03-13
>17:35:40.000000000 +0100
>@@ -0,0 +1,45 @@
>+/* PR tree-optimization/65369 */
>+
>+static const char data[] =
>+  "12345678901234567890123456789012345678901234567890"
>+  "123456789012345678901234567890";
>+
>+__attribute__ ((noinline))
>+static void foo (const unsigned int *buf)
>+{
>+  if (__builtin_memcmp (buf, data, 64))
>+    __builtin_abort ();
>+}
>+
>+__attribute__ ((noinline))
>+static void bar (const unsigned char *block)
>+{
>+  unsigned int buf[16];
>+  __builtin_memcpy (buf +  0, block +  0, 4);
>+  __builtin_memcpy (buf +  1, block +  4, 4);
>+  __builtin_memcpy (buf +  2, block +  8, 4);
>+  __builtin_memcpy (buf +  3, block + 12, 4);
>+  __builtin_memcpy (buf +  4, block + 16, 4);
>+  __builtin_memcpy (buf +  5, block + 20, 4);
>+  __builtin_memcpy (buf +  6, block + 24, 4);
>+  __builtin_memcpy (buf +  7, block + 28, 4);
>+  __builtin_memcpy (buf +  8, block + 32, 4);
>+  __builtin_memcpy (buf +  9, block + 36, 4);
>+  __builtin_memcpy (buf + 10, block + 40, 4);
>+  __builtin_memcpy (buf + 11, block + 44, 4);
>+  __builtin_memcpy (buf + 12, block + 48, 4);
>+  __builtin_memcpy (buf + 13, block + 52, 4);
>+  __builtin_memcpy (buf + 14, block + 56, 4);
>+  __builtin_memcpy (buf + 15, block + 60, 4);
>+  foo (buf);
>+}
>+
>+int
>+main ()
>+{
>+  unsigned char input[sizeof data + 16] __attribute__((aligned (16)));
>+  __builtin_memset (input, 0, sizeof input);
>+  __builtin_memcpy (input + 1, data, sizeof data);
>+  bar (input + 1);
>+  return 0;
>+}
>
>	Jakub



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]