[PATCH] tree-optimization/95866 - avoid vectorizing uniform SLP subgraphs

Richard Biener rguenther@suse.de
Wed Jun 24 13:56:24 GMT 2020


This avoids vectorizing SLP subgraphs that just compute uniform
operations on all-same operands.  That fixes the less interesting
(but most embarrasing) part of the testcase in the PR.  On the
way it also fixed a missing matches[0] reset in the last
refactoring touching that place.

Bootstrap / regtest running on x86_64-unknown-linux-gnu.

2020-06-24  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/95866
	* tree-vect-slp.c (vect_slp_tree_uniform_p): New.
	(vect_build_slp_tree_2): Properly reset matches[0],
	ignore uniform constants.

	* gcc.target/i386/pr95866-1.c: New testcase.
---
 gcc/testsuite/gcc.target/i386/pr95866-1.c | 18 ++++++++++++++++
 gcc/tree-vect-slp.c                       | 26 +++++++++++++++++++++--
 2 files changed, 42 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95866-1.c

diff --git a/gcc/testsuite/gcc.target/i386/pr95866-1.c b/gcc/testsuite/gcc.target/i386/pr95866-1.c
new file mode 100644
index 00000000000..991370cf669
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95866-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-slp2-details -msse2" } */
+
+int x[4];
+void foo(int i)
+{
+  int j = (i+1) & 31;
+  x[0] = (x[0] << j) + j;
+  x[1] = (x[1] << j) + j;
+  x[2] = (x[2] << j) + j;
+  x[3] = (x[3] << j) + j;
+}
+
+/* We should not use vector operations for i + 1 and (i + 1) & 31 but
+   instead use { j, j, j, j }.  */ 
+/* { dg-final { scan-tree-dump-times "Building parent vector operands from scalars" 2 "slp2" } } */
+/* { dg-final { scan-tree-dump-not " = \{i_" "slp2" } } */
+/* { dg-final { scan-tree-dump-times " = \{j_" 1 "slp2" } } */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index e7a260877a9..b2792c76ad2 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -238,6 +238,26 @@ vect_contains_pattern_stmt_p (vec<stmt_vec_info> stmts)
   return false;
 }
 
+/* Return true when all lanes in the external or constant NODE have
+   the same value.  */
+
+static bool
+vect_slp_tree_uniform_p (slp_tree node)
+{
+  gcc_assert (SLP_TREE_DEF_TYPE (node) == vect_constant_def
+	      || SLP_TREE_DEF_TYPE (node) == vect_external_def);
+
+  unsigned i;
+  tree op, first = NULL_TREE;
+  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
+    if (!first)
+      first = op;
+    else if (!operand_equal_p (first, op, 0))
+      return false;
+
+  return true;
+}
+
 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
    that starts from FIRST_STMT_INFO.  Return -1 if the data-ref is not a part
    of the chain.  */
@@ -1439,7 +1459,7 @@ fail:
   vect_free_oprnd_info (oprnds_info);
 
   /* If we have all children of a non-unary child built up from
-     scalars then just throw that away, causing it built up
+     uniform scalars then just throw that away, causing it built up
      from scalars.  */
   if (nops > 1
       && is_a <bb_vec_info> (vinfo)
@@ -1451,11 +1471,13 @@ fail:
       slp_tree child;
       unsigned j;
       FOR_EACH_VEC_ELT (children, j, child)
-	if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
+	if (SLP_TREE_DEF_TYPE (child) == vect_internal_def
+	    || !vect_slp_tree_uniform_p (child))
 	  break;
       if (!child)
 	{
 	  /* Roll back.  */
+	  matches[0] = false;
 	  FOR_EACH_VEC_ELT (children, j, child)
 	    vect_free_slp_tree (child, false);
 
-- 
2.26.2


More information about the Gcc-patches mailing list