This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] PR/67682, break SLP groups up if only some elements match


vect_analyze_slp_instance currently only creates an slp_instance if _all_ stores
in a group fitted the same pattern. This patch splits non-matching groups up
on vector boundaries, allowing only part of the group to be SLP'd, or multiple
subgroups to be SLP'd differently.

The algorithm could be made more efficient: we have more info available in
the matches vector, and a single match in a vector full of non-matches, means
we will be unable to SLP). But the double-recursion case has at most log(N)
depth and the single-recursion case is at worst N^2 in *the group width*, which
is generally small.

This could possibly be extended to hybrid SLP, but I believe that would also
require splitting the load groups, as at present removing the bb_vinfo check
ends up with data being stored to the wrong locations e.g. in slp-11a.c. Hence,
leaving that extension to a future patch.

Bootstrapped + check-{gcc,g++,fortran} on aarch64, x86_64 and arm (v7-a neon).

Thanks, Alan

gcc/ChangeLog:

	* tree-vect-slp.c (vect_split_slp_group): New.
	(vect_analyze_slp_instance): Recurse on subgroup(s) if
	vect_build_slp_tree fails during basic block SLP.

gcc/testsuite/ChangeLog:

	* gcc.dg/vect/bb-slp-7.c: Make that SLP group even more non-isomorphic.
	* gcc.dg/vect/bb-slp-subgroups-1.c: New.
	* gcc.dg/vect/bb-slp-subgroups-2.c: New.
---
 gcc/testsuite/gcc.dg/vect/bb-slp-7.c           |  8 ++--
 gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-1.c | 44 ++++++++++++++++++
 gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-2.c | 42 +++++++++++++++++
 gcc/tree-vect-slp.c                            | 63 +++++++++++++++++++++++++-
 4 files changed, 152 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-2.c

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-7.c b/gcc/testsuite/gcc.dg/vect/bb-slp-7.c
index ab54a48..b012d78 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-7.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-7.c
@@ -16,12 +16,12 @@ main1 (unsigned int x, unsigned int y)
   unsigned int *pout = &out[0];
   unsigned int a0, a1, a2, a3;
 
-  /* Non isomorphic.  */
+  /* Non isomorphic, even 64-bit subgroups.  */
   a0 = *pin++ + 23;
-  a1 = *pin++ + 142;
+  a1 = *pin++ * 142;
   a2 = *pin++ + 2;
   a3 = *pin++ * 31;
-  
+
   *pout++ = a0 * x;
   *pout++ = a1 * y;
   *pout++ = a2 * x;
@@ -47,4 +47,4 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "basic block vectorized" 0 "slp2" } } */
-  
+
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-1.c
new file mode 100644
index 0000000..39c23c3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-1.c
@@ -0,0 +1,44 @@
+/* { dg-require-effective-target vect_int } */
+/* PR tree-optimization/67682.  */
+
+#include "tree-vect.h"
+
+int __attribute__((__aligned__(8))) a[8];
+int __attribute__((__aligned__(8))) b[4];
+
+__attribute__ ((noinline)) void
+test ()
+{
+    a[0] = b[0];
+    a[1] = b[1];
+    a[2] = b[2];
+    a[3] = b[3];
+    a[4] = 0;
+    a[5] = 0;
+    a[6] = 0;
+    a[7] = 0;
+}
+
+int
+main (int argc, char **argv)
+{
+  check_vect ();
+
+  for (int i = 0; i < 8; i++)
+    a[i] = 1;
+  for (int i = 0; i < 4; i++)
+    b[i] = i + 4;
+  __asm__ volatile ("" : : : "memory");
+  test (a, b);
+  __asm__ volatile ("" : : : "memory");
+  for (int i = 0; i < 4; i++)
+    if (a[i] != i+4)
+      abort ();
+  for (int i = 4; i < 8; i++)
+    if (a[i] != 0)
+      abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Basic block will be vectorized using SLP" 1 "slp2" } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-2.c b/gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-2.c
new file mode 100644
index 0000000..06099fd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-2.c
@@ -0,0 +1,42 @@
+/* { dg-require-effective-target vect_int } */
+/* PR tree-optimization/67682.  */
+
+#include "tree-vect.h"
+
+int __attribute__((__aligned__(8))) a[8];
+int __attribute__((__aligned__(8))) b[8];
+
+__attribute__ ((noinline)) void
+test ()
+{
+    a[0] = b[0];
+    a[1] = b[1] + 1;
+    a[2] = b[2] * 2;
+    a[3] = b[3] + 3;
+
+    a[4] = b[4] + 4;
+    a[5] = b[5] + 4;
+    a[6] = b[6] + 4;
+    a[7] = b[7] + 4;
+}
+
+int
+main (int argc, char **argv)
+{
+  check_vect ();
+
+  for (int i = 0; i < 8; i++)
+    b[i] = i + 1;
+  __asm__ volatile ("" : : : "memory");
+  test (a, b);
+  __asm__ volatile ("" : : : "memory");
+  if ((a[0] != 1) || (a[1] != 3) || (a[2] != 6) || (a[3] != 7))
+    abort ();
+  for (int i = 4; i < 8; i++)
+    if (a[i] != i + 5)
+      abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Basic block will be vectorized using SLP" 1 "slp2" } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 7a2d623..fb8d11e 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1627,6 +1627,33 @@ vect_analyze_slp_cost (slp_instance instance, void *data)
   body_cost_vec.release ();
 }
 
+/* Splits a group of statements currently beginning at FIRST_STMT into two
+   groups, one (still beginning at FIRST_STMT) containing the first I stmts,
+   the second containing the remainder.  Return the first stmt in the second
+   group.  */
+
+static gimple *
+vect_split_slp_group (gimple *first_stmt, unsigned int i)
+{
+  gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
+  gcc_assert (i > 0);
+  int group2_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)) - i;
+  gcc_assert (group2_size > 0);
+  GROUP_SIZE (vinfo_for_stmt (first_stmt)) = i;
+
+  gimple *stmt = first_stmt;
+  while (i-- > 1)
+    stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
+  /* STMT is now the last element of the first group.  */
+  gimple *first = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
+  GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
+
+  GROUP_SIZE (vinfo_for_stmt (first)) = group2_size;
+  for (stmt = first; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
+    GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = first;
+  return first;
+}
+
 /* Analyze an SLP instance starting from a group of grouped stores.  Call
    vect_build_slp_tree to build a tree of packed stmts if possible.
    Return FALSE if it's impossible to SLP any stmt in the loop.  */
@@ -1642,7 +1669,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
   tree vectype, scalar_type = NULL_TREE;
   gimple *next;
   unsigned int vectorization_factor = 0;
-  int i;
+  unsigned int i;
   unsigned int max_nunits = 0;
   vec<slp_tree> loads;
   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
@@ -1836,6 +1863,40 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
   vect_free_slp_tree (node);
   loads.release ();
 
+  /* For basic block vectorization, try to break the group up into multiples
+     of the vectorization factor.  */
+  if (bb_vinfo && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
+    {
+      /* We consider breaking the group only on VF boundaries from the existing
+	 start.  */
+      for (i = 0; i < group_size; i++)
+	if (!matches[i]) break;
+
+      if (i < vectorization_factor && vectorization_factor < group_size)
+	{
+	  /* First vector is a mix of (non-/)matches, or first element was
+	     impossible for another reason.  Retry without the first vector.  */
+	  stmt = vect_split_slp_group (stmt, vectorization_factor);
+	  return vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
+					    stmt, max_tree_size);
+	}
+      else if (i >= vectorization_factor && i < group_size)
+	{
+	  /* Split into two groups at the first vector boundary before i.  */
+	  gcc_assert ((vectorization_factor & (vectorization_factor - 1)) == 0);
+	  i &= ~(vectorization_factor - 1);
+	  gimple *group2start = vect_split_slp_group (stmt, i);
+	  return vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
+					    stmt, max_tree_size)
+		 | vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
+					      group2start, max_tree_size);
+	 }
+    }
+  else
+    {
+      /* TODO: handle reduction case?  */
+    }
+
   return false;
 }
 
-- 
1.9.1


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]