[gcc(refs/users/aoliva/heads/testbase)] tree-optimization/96783 - fix vectorization of negative step SLP

Alexandre Oliva aoliva@gcc.gnu.org
Thu Sep 3 15:49:40 GMT 2020


https://gcc.gnu.org/g:71b6257e3a90995e1c1d3d2716a0eec5eef243db

commit 71b6257e3a90995e1c1d3d2716a0eec5eef243db
Author: Richard Biener <rguenther@suse.de>
Date:   Wed Aug 26 14:24:01 2020 +0200

    tree-optimization/96783 - fix vectorization of negative step SLP
    
    This appropriately uses VMAT_ELEMENTWISE when the vectors cannot be
    filled from a single SLP group until we get more explicit support
    for negative stride SLP.
    
    2020-08-26  Richard Biener  <rguenther@suse.de>
    
            PR tree-optimization/96783
            * tree-vect-stmts.c (get_group_load_store_type): Use
            VMAT_ELEMENTWISE for negative strides when we cannot
            use VMAT_STRIDED_SLP.
    
            * gcc.dg/vect/pr96783-1.c: New testcase.
            * gcc.dg/vect/pr96783-2.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr96783-1.c | 38 +++++++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/vect/pr96783-2.c | 29 ++++++++++++++++++++++++++
 gcc/tree-vect-stmts.c                 | 10 ++++++++-
 3 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr96783-1.c b/gcc/testsuite/gcc.dg/vect/pr96783-1.c
new file mode 100644
index 00000000000..55d1364f056
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr96783-1.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+
+#include "tree-vect.h"
+
+void __attribute__((noipa))
+foo (long *a, int off, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      long tem1 = a[0];
+      long tem2 = a[1];
+      long tem3 = a[2];
+      long tem4 = a[off + 1];
+      a[0] = tem4;
+      long tem5 = a[off + 2];
+      a[1] = tem5;
+      long tem6 = a[off + 3];
+      a[2] = tem6;
+      a[off + 1] = tem1;
+      a[off + 2] = tem2;
+      a[off + 3] = tem3;
+      a -= 3;
+    }
+}
+
+int main ()
+{
+  long a[3 * 9];
+  check_vect ();
+  for (int i = 0; i < 3 * 9; ++i)
+    a[i] = i;
+  foo (a + 3 * 5, 6-1, 5);
+  const long b[3 * 8] = { 0, 1, 2, 21, 22, 23, 18, 19, 20, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 };
+  for (int i = 0; i < 3 * 8; ++i)
+    if (a[i] != b[i])
+      __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr96783-2.c b/gcc/testsuite/gcc.dg/vect/pr96783-2.c
new file mode 100644
index 00000000000..33c37109e3a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr96783-2.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include "tree-vect.h"
+
+long a[1024];
+long b[1024];
+
+void __attribute__((noipa)) foo ()
+{
+  for (int i = 0; i < 256; ++i)
+    {
+      a[3*i] = b[1023 - 3*i - 2];
+      a[3*i + 1] = b[1023 - 3*i - 1];
+      a[3*i + 2] = b[1023 - 3*i];
+    }
+}
+
+int main()
+{
+  for (int i = 0; i < 1024; ++i)
+    b[i] = i;
+  foo ();
+  for (int i = 0; i < 256; ++i)
+    if (a[3*i] != 1023 - 3*i - 2
+	|| a[3*i+1] != 1023 - 3*i - 1
+	|| a[3*i+2] != 1023 - 3*i)
+      __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 65e30bac424..224be018af9 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -2192,7 +2192,15 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
 		*memory_access_type = get_negative_load_store_type
 				       (vinfo, stmt_info, vectype, vls_type, 1);
 	      else
-		*memory_access_type = VMAT_STRIDED_SLP;
+		{
+		  /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
+		     separated by the stride, until we have a complete vector.
+		     Fall back to scalar accesses if that isn't possible.  */
+		  if (multiple_p (nunits, group_size))
+		    *memory_access_type = VMAT_STRIDED_SLP;
+		  else
+		    *memory_access_type = VMAT_ELEMENTWISE;
+		}
 	    }
 	  else
 	    {


More information about the Gcc-cvs mailing list