]> gcc.gnu.org Git - gcc.git/commitdiff
tree-optimization/107451 - SLP load vectorization issue
authorRichard Biener <rguenther@suse.de>
Thu, 22 Dec 2022 08:36:17 +0000 (09:36 +0100)
committerRichard Biener <rguenther@suse.de>
Thu, 22 Dec 2022 11:21:06 +0000 (12:21 +0100)
When vectorizing SLP loads with permutations we can access excess
elements when the load vector type is bigger than the group size
and the vectorization factor covers less groups than necessary
to fill it.  Since we know the code will only access up to
group_size * VF elements in the unpermuted vector we can simply
fill the rest of the vector with whatever we want.  For simplicity
this patch chooses to repeat the last group.

PR tree-optimization/107451
* tree-vect-stmts.cc (vectorizable_load): Avoid loading
SLP group members from group numbers in excess of the
vectorization factor.

* gcc.dg/torture/pr107451.c: New testcase.

gcc/testsuite/gcc.dg/torture/pr107451.c [new file with mode: 0644]
gcc/tree-vect-stmts.cc

diff --git a/gcc/testsuite/gcc.dg/torture/pr107451.c b/gcc/testsuite/gcc.dg/torture/pr107451.c
new file mode 100644 (file)
index 0000000..a17574c
--- /dev/null
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-vectorize -fno-vect-cost-model" } */
+/* { dg-additional-options "-mavx2" { target avx2_runtime } } */
+
+double getdot(int n, const double *x, int inc_x, const double *y)
+{
+  int i, ix = 0;
+  double dot[4] = { 0.0, 0.0, 0.0, 0.0 } ;
+
+  for(i = 0; i < n; i++) {
+      dot[0] += x[ix]   * y[ix]   ;
+      dot[1] += x[ix+1] * y[ix+1] ;
+      dot[2] += x[ix]   * y[ix+1] ;
+      dot[3] += x[ix+1] * y[ix]   ;
+      ix += inc_x ;
+  }
+
+  return dot[0] + dot[1] + dot[2] + dot[3];
+}
+
+int main()
+{
+  double x[2] = {0, 0}, y[2] = {0, 0};
+  if (getdot(1, x, 4096*4096, y) != 0.)
+    __builtin_abort ();
+  return 0;
+}
index 5485da58b38a0db2ea1a357ee8647ae47b563a8f..8f8deaf82bc86d3e0288019abd653d57ac3be347 100644 (file)
@@ -9235,6 +9235,7 @@ vectorizable_load (vec_info *vinfo,
       unsigned int group_el = 0;
       unsigned HOST_WIDE_INT
        elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
+      unsigned int n_groups = 0;
       for (j = 0; j < ncopies; j++)
        {
          if (nloads > 1)
@@ -9256,12 +9257,19 @@ vectorizable_load (vec_info *vinfo,
              if (! slp
                  || group_el == group_size)
                {
-                 tree newoff = copy_ssa_name (running_off);
-                 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
-                                                     running_off, stride_step);
-                 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
-
-                 running_off = newoff;
+                 n_groups++;
+                 /* When doing SLP make sure to not load elements from
+                    the next vector iteration, those will not be accessed
+                    so just use the last element again.  See PR107451.  */
+                 if (!slp || known_lt (n_groups, vf))
+                   {
+                     tree newoff = copy_ssa_name (running_off);
+                     gimple *incr
+                       = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
+                                              running_off, stride_step);
+                     vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
+                     running_off = newoff;
+                   }
                  group_el = 0;
                }
            }
This page took 0.102863 seconds and 5 git commands to generate.