This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Make the vectoriser drop to strided accesses for stores with gaps


We could vectorise:

     for (...)
       {
         a[0] = ...;
         a[1] = ...;
         a[2] = ...;
         a[3] = ...;
         a += stride;
       }

(including the case when stride == 8) but not:

     for (...)
       {
         a[0] = ...;
         a[1] = ...;
         a[2] = ...;
         a[3] = ...;
         a += 8;
       }

(where the stride is always 8).  The former was treated as a "grouped
and strided" store, while the latter was treated as grouped store with
gaps, which we don't support.

This patch makes us treat groups of stores with gaps at the end as
strided groups too.  I tried to go through all uses of STMT_VINFO_STRIDED_P
and all vector uses of DR_STEP to see whether there were any hard-baked
assumptions, but couldn't see any.  I wondered whether we should relax:

  /* We do not have to consider dependences between accesses that belong
     to the same group, unless the stride could be smaller than the
     group size.  */
  if (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
      && (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
          == DR_GROUP_FIRST_ELEMENT (stmtinfo_b))
      && !STMT_VINFO_STRIDED_P (stmtinfo_a))
    return false;

for cases in which the step is constant and the absolute step is known
to be greater than the group size, but data dependence analysis should
already return chrec_known for those cases.

The new test is a version of vect-avg-15.c with the variable step
replaced by a constant one.

A natural follow-on would be to do the same for groups with gaps in
the middle:

          /* Check that the distance between two accesses is equal to the type
             size. Otherwise, we have gaps.  */
          diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
                  - TREE_INT_CST_LOW (prev_init)) / type_size;
          if (diff != 1)
            {
              [...]
              if (DR_IS_WRITE (data_ref))
                {
                  if (dump_enabled_p ())
                    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                     "interleaved store with gaps\n");
                  return false;
                }

But I think we should do that separately and see what the fallout
from this change is first.

Tested on aarch64-linux-gnu (with and without SVE), aarch64_be-elf
and x86_64-linux-gnu.  OK to install?

Richard


2018-07-20  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* tree-vect-data-refs.c (vect_analyze_group_access_1): Convert
	grouped stores with gaps to a strided group.

gcc/testsuite/
	* gcc.dg/vect/vect-avg-16.c: New test.
	* gcc.dg/vect/slp-37.c: Expect the loop to be vectorized.
	* gcc.dg/vect/vect-strided-u8-i8-gap4.c,
	* gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c: Likewise for
	the second loop in main1.

Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c	2018-06-30 13:44:38.567611988 +0100
+++ gcc/tree-vect-data-refs.c	2018-07-20 11:55:22.570911497 +0100
@@ -2632,10 +2632,14 @@ vect_analyze_group_access_1 (struct data
       if (groupsize != count
 	  && !DR_IS_READ (dr))
         {
-	  if (dump_enabled_p ())
-	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			     "interleaved store with gaps\n");
-	  return false;
+	  groupsize = count;
+	  STMT_VINFO_STRIDED_P (stmt_info) = true;
+	  stmt_vec_info next_info = stmt_info;
+	  while (DR_GROUP_NEXT_ELEMENT (next_info))
+	    {
+	      next_info = vinfo_for_stmt (DR_GROUP_NEXT_ELEMENT (next_info));
+	      STMT_VINFO_STRIDED_P (next_info) = 1;
+	    }
 	}
 
       /* If there is a gap after the last load in the group it is the
@@ -2651,6 +2655,8 @@ vect_analyze_group_access_1 (struct data
 			   "Detected interleaving ");
 	  if (DR_IS_READ (dr))
 	    dump_printf (MSG_NOTE, "load ");
+	  else if (STMT_VINFO_STRIDED_P (stmt_info))
+	    dump_printf (MSG_NOTE, "strided store ");
 	  else
 	    dump_printf (MSG_NOTE, "store ");
 	  dump_printf (MSG_NOTE, "of size %u starting with ",
Index: gcc/testsuite/gcc.dg/vect/vect-avg-16.c
===================================================================
--- /dev/null	2018-07-09 14:52:09.234750850 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-avg-16.c	2018-07-20 11:55:22.570911497 +0100
@@ -0,0 +1,52 @@
+/* { dg-additional-options "-O3" } */
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+#define N 80
+
+void __attribute__ ((noipa))
+f (signed char *restrict a, signed char *restrict b,
+   signed char *restrict c, int n)
+{
+  for (int j = 0; j < n; ++j)
+    {
+      for (int i = 0; i < 16; ++i)
+	a[i] = (b[i] + c[i]) >> 1;
+      a += 20;
+      b += 20;
+      c += 20;
+    }
+}
+
+#define BASE1 -126
+#define BASE2 -42
+
+signed char a[N], b[N], c[N];
+
+int
+main (void)
+{
+  check_vect ();
+
+  for (int i = 0; i < N; ++i)
+    {
+      a[i] = i;
+      b[i] = BASE1 + i * 3;
+      c[i] = BASE2 + i * 2;
+      asm volatile ("" ::: "memory");
+    }
+  f (a, b, c, N / 20);
+  for (int i = 0; i < N; ++i)
+    {
+      int d = (BASE1 + BASE2 + i * 5) >> 1;
+      if (a[i] != (i % 20 < 16 ? d : i))
+	__builtin_abort ();
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
Index: gcc/testsuite/gcc.dg/vect/slp-37.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/slp-37.c	2018-05-02 08:37:49.033604262 +0100
+++ gcc/testsuite/gcc.dg/vect/slp-37.c	2018-07-20 11:55:22.570911497 +0100
@@ -17,8 +17,8 @@ foo1 (s1 *arr)
   int i;
   s1 *ptr = arr;
 
-  /* Different constant types - not SLPable.  The group size is not power of 2,
-     interleaving is not supported either.  */
+  /* Vectorized as a strided SLP pair of accesses to <a, b> and a single
+     strided access to c.  */
   for (i = 0; i < N; i++)
     {
       ptr->a = 6;
@@ -58,6 +58,5 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect"  } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect"  } } */
-  
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  } } */
Index: gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c	2018-05-02 08:37:49.021604375 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c	2018-07-20 11:55:22.570911497 +0100
@@ -53,7 +53,7 @@ main1 (s *arr)
    }
 
   ptr = arr;
-  /* Not vectorizable: gap in store. */
+  /* Vectorized as a strided SLP pair.  */
   for (i = 0; i < N; i++)
     { 
       res[i].a = ptr->b;
@@ -97,5 +97,4 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
-  
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target vect_strided8 } } } */
Index: gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c	2018-05-02 08:37:49.013604450 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c	2018-07-20 11:55:22.570911497 +0100
@@ -55,7 +55,7 @@ main1 (s *arr)
    }
 
   ptr = arr;
-  /* Not vectorizable: gap in store.  */
+  /* Vectorized as a strided SLP pair.  */
   for (i = 0; i < N; i++)
     {
       res[i].a = ptr->b;
@@ -110,5 +110,4 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
-
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target vect_strided8 } } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]