[PATCH] Lift some BB vectorization restrictions

Richard Biener rguenther@suse.de
Wed Jun 3 11:56:00 GMT 2015


The following fixes GROUP_GAP computation if the gaps are only within
the group but not at the boundaries.  It also removes a restriction
in SLP detection that ultimatively is a restriction on supported
load permutations.

This allows us to basic-block vectorize the new testcase.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2015-06-03  Richard Biener  <rguenther@suse.de>

	* tree-vect-data-refs.c (vect_analyze_group_access): Properly
	compute GROUP_GAP for the first element.
	* tree-vect-slp.c (vect_build_slp_tree_1): Remove restriction
	on in-group gaps.

	* gcc.dg/vect/bb-slp-36.c: New testcase.

Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c	(revision 224059)
+++ gcc/tree-vect-data-refs.c	(working copy)
@@ -2205,29 +2205,33 @@ vect_analyze_group_access (struct data_r
 
       /* Check that the size of the interleaving is equal to count for stores,
          i.e., that there are no gaps.  */
-      if (groupsize != count)
+      if (groupsize != count
+	  && !DR_IS_READ (dr))
         {
-          if (DR_IS_READ (dr))
-            {
-              slp_impossible = true;
-              /* There is a gap after the last load in the group. This gap is a
-                 difference between the groupsize and the number of elements.
-		 When there is no gap, this difference should be 0.  */
-              GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - count;
-            }
-          else
-            {
-              if (dump_enabled_p ())
-                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                 "interleaved store with gaps\n");
-              return false;
-            }
-        }
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			     "interleaved store with gaps\n");
+	  return false;
+	}
+
+      /* If there is a gap after the last load in the group it is the
+	 difference between the groupsize and the last accessed
+	 element.
+	 When there is no gap, this difference should be 0.  */
+      GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - last_accessed_element;
 
       GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
       if (dump_enabled_p ())
-        dump_printf_loc (MSG_NOTE, vect_location,
-                         "Detected interleaving of size %d\n", (int)groupsize);
+	{
+	  dump_printf_loc (MSG_NOTE, vect_location,
+			   "Detected interleaving of size %d starting with ",
+			   (int)groupsize);
+	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+	  if (GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
+	    dump_printf_loc (MSG_NOTE, vect_location,
+			     "There is a gap of %d elements after the group\n",
+			     (int)GROUP_GAP (vinfo_for_stmt (stmt)));
+	}
 
       /* SLP: create an SLP data structure for every interleaving group of
 	 stores for further analysis in vect_analyse_slp.  */
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	(revision 224059)
+++ gcc/tree-vect-slp.c	(working copy)
@@ -777,17 +777,13 @@ vect_build_slp_tree_1 (loop_vec_info loo
 		    (*max_nunits, group_size) / group_size;
               /* FORNOW: Check that there is no gap between the loads
 		 and no gap between the groups when we need to load
-		 multiple groups at once.
-		 ???  We should enhance this to only disallow gaps
-		 inside vectors.  */
-              if ((unrolling_factor > 1
-		   && ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
-			&& GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
-		       /* If the group is split up then GROUP_GAP
-			  isn't correct here, nor is GROUP_FIRST_ELEMENT.  */
-		       || GROUP_SIZE (vinfo_for_stmt (stmt)) > group_size))
-		  || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
-		      && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
+		 multiple groups at once.  */
+              if (unrolling_factor > 1
+		  && ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
+		       && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
+		      /* If the group is split up then GROUP_GAP
+			 isn't correct here, nor is GROUP_FIRST_ELEMENT.  */
+		      || GROUP_SIZE (vinfo_for_stmt (stmt)) > group_size))
                 {
                   if (dump_enabled_p ())
                     {
Index: gcc/testsuite/gcc.dg/vect/bb-slp-36.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/bb-slp-36.c	(revision 0)
+++ gcc/testsuite/gcc.dg/vect/bb-slp-36.c	(revision 0)
@@ -0,0 +1,35 @@
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+extern void abort (void);
+
+int a[8], b[8];
+
+void __attribute__((noinline,noclone))
+foo(void)
+{
+  a[0] = b[0];
+  a[1] = b[0];
+  a[2] = b[3];
+  a[3] = b[3];
+  a[4] = b[4];
+  a[5] = b[7];
+  a[6] = b[4];
+  a[7] = b[7];
+}
+
+int main()
+{
+  int i;
+  check_vect ();
+  for (i = 0; i < 8; ++i)
+    b[i] = i;
+  foo ();
+  if (a[0] != 0 || a[1] != 0 || a[2] != 3 || a[3] != 3
+      || a[4] != 4 || a[5] != 7 || a[6] != 4 || a[7] != 7)
+    abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target vect_perm } } } */



More information about the Gcc-patches mailing list