This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Fix PR70130
- From: Richard Biener <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 14 Apr 2016 13:22:19 +0200 (CEST)
- Subject: [PATCH] Fix PR70130
- Authentication-results: sourceware.org; auth=none
The following fixes PR70130 - improved SLP capabilities now run into
the realignment code on ppc which doesn't properly verify that all
vector loads emitted by vectorizable_load share the same alignment.
Bootstrap / regtest pending on x86_64-unknown-linux-gnu.
Bootstrapped / tested on ppc64le by Alan.
Richard.
2016-04-14 Richard Biener <rguenther@suse.de>
Alan Modra <amodra@gmail.com>
PR tree-optimization/70130
* tree-vect-data-refs.c (vect_supportable_dr_alignment): Detect
when alignment stays not the same and no not use the realign
scheme then.
* gcc.dg/vect/O3-pr70130.c: New testcase.
Index: gcc/tree-vect-data-refs.c
===================================================================
*** gcc/tree-vect-data-refs.c (revision 234970)
--- gcc/tree-vect-data-refs.c (working copy)
*************** vect_supportable_dr_alignment (struct da
*** 5983,5992 ****
|| targetm.vectorize.builtin_mask_for_load ()))
{
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
! if ((nested_in_vect_loop
! && (TREE_INT_CST_LOW (DR_STEP (dr))
! != GET_MODE_SIZE (TYPE_MODE (vectype))))
! || !loop_vinfo)
return dr_explicit_realign;
else
return dr_explicit_realign_optimized;
--- 5983,6001 ----
|| targetm.vectorize.builtin_mask_for_load ()))
{
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
!
! /* If we are doing SLP then the accesses need not have the
! same alignment, instead it depends on the SLP group size. */
! if (loop_vinfo
! && STMT_SLP_TYPE (stmt_info)
! && (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
! * GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info))))
! % TYPE_VECTOR_SUBPARTS (vectype) != 0)
! ;
! else if (!loop_vinfo
! || (nested_in_vect_loop
! && (TREE_INT_CST_LOW (DR_STEP (dr))
! != GET_MODE_SIZE (TYPE_MODE (vectype)))))
return dr_explicit_realign;
else
return dr_explicit_realign_optimized;
Index: gcc/testsuite/gcc.dg/vect/O3-pr70130.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/O3-pr70130.c (revision 0)
--- gcc/testsuite/gcc.dg/vect/O3-pr70130.c (working copy)
***************
*** 0 ****
--- 1,94 ----
+ /* { dg-do run } */
+ /* { dg-require-effective-target vsx_hw { target powerpc*-*-* } } */
+ /* { dg-additional-options "-mcpu=power7" { target powerpc*-*-* } } */
+
+ struct foo
+ {
+ short a[3][16][16];
+ short pad;
+ } images[8];
+
+ void __attribute__ ((noinline, noclone))
+ Loop_err (struct foo *img, const int s[16][2], int s0)
+ {
+ int i, j;
+
+ for (j = 0; j < 16; j++)
+ {
+ for (i=0; i < 16; i++)
+ {
+ img->a[0][j][i] = s[i][0];
+ img->a[1][j][i] = s[j][1];
+ img->a[2][j][i] = s0;
+ }
+ }
+ }
+
+ const int s[16][2] = { { 1, 16 }, { 2, 15 }, { 3, 14 }, { 4, 13 },
+ { 5, 12 }, { 6, 11 }, { 7, 10 }, { 8, 9 },
+ { 9, 8 }, { 10, 7 }, { 11, 6 }, { 12, 5 },
+ { 13, 4 }, { 14, 3 }, { 15, 2 }, { 16, 1 } };
+ const struct foo expected
+ = { { { { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
+ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 } },
+ { { 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 },
+ { 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 },
+ { 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 },
+ { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+ { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 },
+ { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
+ { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
+ { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } },
+ { { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } } },
+ 0 };
+
+ int
+ main (void)
+ {
+ int i;
+
+ for (i = 0; i < 8; i++)
+ Loop_err (images + i, s, -1);
+
+ for (i = 0; i < 8; i++)
+ if (__builtin_memcmp (&expected, images + i, sizeof (expected)))
+ __builtin_abort ();
+ return 0;
+ }