[PATCH] Fix PR81633

Richard Biener rguenther@suse.de
Tue Aug 1 11:32:00 GMT 2017


I am testing the following pair of patches (first for trunk, 2nd for GCC 7 
branch) to fix PR81633.  On trunk recent refactoring made the PR71752
change obsolete, on the branch the patch installs the simpler originally
suggested patch which works within the constraints vect_get_slp_defs
is used on the branch.

Bootstrapped and tested on x86_64-unknown-linux-gnu (trunk version still
in testing).

Richard.

2017-08-01  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/81633
	Revert
	2015-08-17  Alan Hayward  <alan.hayward@arm.com>

	PR tree-optimization/71752
	* tree-vect-slp.c (vect_get_slp_defs): Handle null operands.

	* gcc.dg/vect/pr81633.c: New testcase.

Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	(revision 250725)
+++ gcc/tree-vect-slp.c	(working copy)
@@ -3325,32 +3325,24 @@ vect_get_slp_defs (vec<tree> ops, slp_tr
 {
   gimple *first_stmt;
   int number_of_vects = 0, i;
+  unsigned int child_index = 0;
   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
   slp_tree child = NULL;
   vec<tree> vec_defs;
   tree oprnd;
-  bool first_iteration = true;
+  bool vectorized_defs;
 
   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
   FOR_EACH_VEC_ELT (ops, i, oprnd)
     {
-      bool vectorized_defs = false;
-
-      if (oprnd == NULL)
-	{
-	  vec_defs = vNULL;
-	  vec_defs.create (0);
-	  vec_oprnds->quick_push (vec_defs);
-	  continue;
-	}
-
       /* For each operand we check if it has vectorized definitions in a child
 	 node or we need to create them (for invariants and constants).  We
 	 check if the LHS of the first stmt of the next child matches OPRND.
 	 If it does, we found the correct child.  Otherwise, we call
-	 vect_get_constant_vectors ().  */
-      for (unsigned int child_index = 0;
-	   child_index < SLP_TREE_CHILDREN (slp_node).length (); child_index++)
+	 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
+	 to check this child node for the next operand.  */
+      vectorized_defs = false;
+      if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
         {
           child = SLP_TREE_CHILDREN (slp_node)[child_index];
 
@@ -3375,25 +3367,30 @@ vect_get_slp_defs (vec<tree> ops, slp_tr
 		     statements.  */
 		  number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
 		  vectorized_defs = true;
-		  break;
+		  child_index++;
 		}
 	    }
+	  else
+	    child_index++;
         }
 
-      if (!vectorized_defs && first_iteration)
-	{
-	  number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-	  /* Number of vector stmts was calculated according to LHS in
-	     vect_schedule_slp_instance (), fix it by replacing LHS with
-	     RHS, if necessary.  See vect_get_smallest_scalar_type () for
-	     details.  */
-	  vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
-					 &rhs_size_unit);
-	  if (rhs_size_unit != lhs_size_unit)
-	    {
-	      number_of_vects *= rhs_size_unit;
-	      number_of_vects /= lhs_size_unit;
-	    }
+      if (!vectorized_defs)
+        {
+          if (i == 0)
+            {
+              number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+              /* Number of vector stmts was calculated according to LHS in
+                 vect_schedule_slp_instance (), fix it by replacing LHS with
+                 RHS, if necessary.  See vect_get_smallest_scalar_type () for
+                 details.  */
+              vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
+                                             &rhs_size_unit);
+              if (rhs_size_unit != lhs_size_unit)
+                {
+                  number_of_vects *= rhs_size_unit;
+                  number_of_vects /= lhs_size_unit;
+                }
+            }
         }
 
       /* Allocate memory for vectorized defs.  */
@@ -3411,8 +3408,6 @@ vect_get_slp_defs (vec<tree> ops, slp_tr
 				   number_of_vects);
 
       vec_oprnds->quick_push (vec_defs);
-
-      first_iteration = false;
     }
 }
 
Index: gcc/testsuite/gcc.dg/vect/pr81633.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr81633.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/vect/pr81633.c	(working copy)
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+
+static double identity[4][4] = {{1, 0, 0, 0},
+                                {0, 1, 0, 0},
+                                {0, 0, 1, 0},
+                                {0, 0, 0, 1}};
+static double expected[4][4] = {{1, 0, 0, 0},
+                                {0, 0, 0, 0},
+                                {0, 0, 0, 0},
+                                {0, 0, 0, 0}};
+
+static void __attribute__((noinline,noclone))
+kernel(double A[4][4])
+{
+  double tmp[4][4];
+  for (int j = 0; j < 4; j++)
+    for (int k = 0; k < 4; k++)
+      tmp[j][k] = identity[j][0] * identity[j][k];
+  for (int j = 0; j < 4; j++ )
+    for (int k = 0; k < 4; k++)
+      A[j][k] = tmp[j][k];
+}
+
+int main(void)
+{
+  double A[4][4] = {{0.0}};
+  kernel(A);
+  for ( int i = 0; i < 4; i++ )
+    for ( int j = 0; j < 4; j++ )
+      if (A[i][j] != expected[i][j])
+	__builtin_abort ();
+  return 0;
+}


2017-08-01  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/71752
	PR tree-optimization/81633
	* tree-vect-slp.c (vect_get_slp_defs): Handle null operands
	in the original suggested way.

	* gcc.dg/vect/pr81633.c: New testcase.

Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	(revision 250732)
+++ gcc/tree-vect-slp.c	(working copy)
@@ -3289,19 +3289,22 @@ vect_get_slp_defs (vec<tree> ops, slp_tr
 {
   gimple *first_stmt;
   int number_of_vects = 0, i;
+  unsigned int child_index = 0;
   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
   slp_tree child = NULL;
   vec<tree> vec_defs;
   tree oprnd;
+  bool vectorized_defs;
   bool first_iteration = true;
 
   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
   FOR_EACH_VEC_ELT (ops, i, oprnd)
     {
-      bool vectorized_defs = false;
-
       if (oprnd == NULL)
 	{
+	  /* Only vectorizable_reduction will call us with a NULL op which
+	     will always match the reduction operand for which we have no
+	     SLP child.  */
 	  vec_defs = vNULL;
 	  vec_defs.create (0);
 	  vec_oprnds->quick_push (vec_defs);
@@ -3312,9 +3315,10 @@ vect_get_slp_defs (vec<tree> ops, slp_tr
 	 node or we need to create them (for invariants and constants).  We
 	 check if the LHS of the first stmt of the next child matches OPRND.
 	 If it does, we found the correct child.  Otherwise, we call
-	 vect_get_constant_vectors ().  */
-      for (unsigned int child_index = 0;
-	   child_index < SLP_TREE_CHILDREN (slp_node).length (); child_index++)
+	 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
+	 to check this child node for the next operand.  */
+      vectorized_defs = false;
+      if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
         {
           child = SLP_TREE_CHILDREN (slp_node)[child_index];
 
@@ -3334,25 +3338,30 @@ vect_get_slp_defs (vec<tree> ops, slp_tr
 		     statements.  */
 		  number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
 		  vectorized_defs = true;
-		  break;
+		  child_index++;
 		}
 	    }
+	  else
+	    child_index++;
         }
 
-      if (!vectorized_defs && first_iteration)
-	{
-	  number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-	  /* Number of vector stmts was calculated according to LHS in
-	     vect_schedule_slp_instance (), fix it by replacing LHS with
-	     RHS, if necessary.  See vect_get_smallest_scalar_type () for
-	     details.  */
-	  vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
-					 &rhs_size_unit);
-	  if (rhs_size_unit != lhs_size_unit)
-	    {
-	      number_of_vects *= rhs_size_unit;
-	      number_of_vects /= lhs_size_unit;
-	    }
+      if (!vectorized_defs)
+        {
+          if (first_iteration)
+            {
+              number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+              /* Number of vector stmts was calculated according to LHS in
+                 vect_schedule_slp_instance (), fix it by replacing LHS with
+                 RHS, if necessary.  See vect_get_smallest_scalar_type () for
+                 details.  */
+              vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
+                                             &rhs_size_unit);
+              if (rhs_size_unit != lhs_size_unit)
+                {
+                  number_of_vects *= rhs_size_unit;
+                  number_of_vects /= lhs_size_unit;
+                }
+            }
         }
 
       /* Allocate memory for vectorized defs.  */
Index: gcc/testsuite/gcc.dg/vect/pr81633.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr81633.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/vect/pr81633.c	(working copy)
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+
+static double identity[4][4] = {{1, 0, 0, 0},
+                                {0, 1, 0, 0},
+                                {0, 0, 1, 0},
+                                {0, 0, 0, 1}};
+static double expected[4][4] = {{1, 0, 0, 0},
+                                {0, 0, 0, 0},
+                                {0, 0, 0, 0},
+                                {0, 0, 0, 0}};
+
+static void __attribute__((noinline,noclone))
+kernel(double A[4][4])
+{
+  double tmp[4][4];
+  for (int j = 0; j < 4; j++)
+    for (int k = 0; k < 4; k++)
+      tmp[j][k] = identity[j][0] * identity[j][k];
+  for (int j = 0; j < 4; j++ )
+    for (int k = 0; k < 4; k++)
+      A[j][k] = tmp[j][k];
+}
+
+int main(void)
+{
+  double A[4][4] = {{0.0}};
+  kernel(A);
+  for ( int i = 0; i < 4; i++ )
+    for ( int j = 0; j < 4; j++ )
+      if (A[i][j] != expected[i][j])
+	__builtin_abort ();
+  return 0;
+}



More information about the Gcc-patches mailing list