This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix PR92094


The following fixes vectorization of nested cycles when the nested
cycle only constists of a PHI node.  As in the previous fix a
nested cycle only consists of the PHI, it doesn't necessarily have
another stmt only participating in that cycle (in this case it
participates in another nested cycle).

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2019-10-15  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/92094
	* tree-vect-loop.c (vectorizable_reduction): For nested cycles
	do not adjust the reduction definition def type.
	* tree-vect-stmts.c (vect_transform_stmt): Verify the scalar stmt
	defines the latch argument of the PHI.

	* gfortran.dg/pr92094.f90: New testcase.

Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	(revision 276983)
+++ gcc/tree-vect-loop.c	(working copy)
@@ -5742,20 +5751,9 @@ vectorizable_reduction (stmt_vec_info stmt_info, s
   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
     {
       if (is_a <gphi *> (stmt_info->stmt))
-	{
-	  /* Analysis for double-reduction is done on the outer
-	     loop PHI, nested cycles have no further restrictions.  */
-	  STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type;
-	  /* For nested cycles we want to let regular vectorizable_*
-	     routines handle code-generation.  */
-	  if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_double_reduction_def)
-	    {
-	      stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
-	      STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def;
-	      STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (stmt_info))
-		= vect_internal_def;
-	    }
-	}
+	/* Analysis for double-reduction is done on the outer
+	   loop PHI, nested cycles have no further restrictions.  */
+	STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type;
       else
 	STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
       return true;
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	(revision 276983)
+++ gcc/tree-vect-stmts.c	(working copy)
@@ -10906,13 +10906,16 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimp
       && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
     {
       gphi *phi;
+      edge e;
       if (!slp_node
 	  && (phi = dyn_cast <gphi *>
 		      (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
 	  && dominated_by_p (CDI_DOMINATORS,
-			     gimple_bb (orig_stmt_info->stmt), gimple_bb (phi)))
+			     gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
+	  && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
+	  && (PHI_ARG_DEF_FROM_EDGE (phi, e)
+	      == gimple_get_lhs (orig_stmt_info->stmt)))
 	{
-	  edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
 	  stmt_vec_info phi_info
 	    = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
 	  stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
@@ -10932,7 +10935,7 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimp
 	{
 	  slp_tree phi_node = slp_node_instance->reduc_phis;
 	  gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
-	  edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
+	  e = loop_latch_edge (gimple_bb (phi)->loop_father);
 	  gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
 		      == SLP_TREE_VEC_STMTS (slp_node).length ());
 	  for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
Index: gcc/testsuite/gfortran.dg/pr92094.f90
===================================================================
--- gcc/testsuite/gfortran.dg/pr92094.f90	(nonexistent)
+++ gcc/testsuite/gfortran.dg/pr92094.f90	(working copy)
@@ -0,0 +1,28 @@
+! { dg-do compile }
+! { dg-options "-O3" }
+      subroutine hesfcn(n, x, h, ldh)
+      integer n,ldh
+      double precision x(n), h(ldh)
+
+      integer i,j,k,kj
+      double precision th,u1,u2,v2
+ 
+      kj = 0
+      do 770 j = 1, n
+         kj = kj - j
+         do 760 k = 1, j
+            kj = kj + 1
+            v2 = 2 * x(k) - 1
+            u1 = 0
+            u2 = 2
+            do 750 i = 1, n
+               h(kj) = h(kj) + u2
+               th = 4 * v2 + u2 - u1
+               u1 = u2
+               u2 = th
+               th = v2 - 1
+  750       continue
+  760    continue
+  770 continue
+
+      end


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]