[PATCH] Dissect vect_create_epilog_for_reduction

Richard Biener rguenther@suse.de
Mon Sep 30 10:45:00 GMT 2019


This completes the first refactoring by decoupling epilogue creation
(vect_create_epilog_for_reduction) from vectorizing of the "reduction 
stmt" (vectorizable_reduction).

The epilogue is now created when vectorizing the live result of the
reduction stmt, thus through vectorizable_live_operation.

Backedge values of vectorized PHIs are filled when we vectorized
their defs.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

This was the first step to make the "reduction stmt" not vectorized
from vectorizable_reduction and thus generalizing "reduction stmt".

The next step is probably to split out nested cycle handling though
(first simplify code before turning it upside-down...).

Richard.

2019-09-30  Richard Biener  <rguenther@suse.de>

	* gimple.c (gimple_get_lhs): For PHIs return the result.
	* tree-vectorizer.h (vectorizable_live_operation): Also get the
	SLP instance as argument.
	* tree-vect-loop.c (vect_analyze_loop_operations): Also handle
	double-reduction PHIs with vectorizable_lc_phi.
	(vect_analyze_loop_operations): Adjust.
	(vect_create_epilog_for_reduction): Remove all code not dealing
	with reduction LC PHI or epilogue generation.
	(vectorizable_live_operation): Call vect_create_epilog_for_reduction
	for live stmts of reductions.
	* tree-vect-stmts.c (vectorizable_condition): When !for_reduction
	do not handle defs that are not vect_internal_def.
	(can_vectorize_live_stmts): Adjust.
	(vect_analyze_stmt): When the vectorized stmt defined a value
	used on backedges adjust the backedge uses of vectorized PHIs.

Index: gcc/gimple.c
===================================================================
--- gcc/gimple.c	(revision 276292)
+++ gcc/gimple.c	(working copy)
@@ -1797,6 +1797,8 @@ gimple_get_lhs (const gimple *stmt)
     return gimple_assign_lhs (stmt);
   else if (code == GIMPLE_CALL)
     return gimple_call_lhs (stmt);
+  else if (code == GIMPLE_PHI)
+    return gimple_phi_result (stmt);
   else
     return NULL_TREE;
 }
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	(revision 276292)
+++ gcc/tree-vect-loop.c	(working copy)
@@ -1526,7 +1526,9 @@ vect_analyze_loop_operations (loop_vec_i
 			  != vect_used_in_outer_by_reduction))
 		    return opt_result::failure_at (phi, "unsupported phi\n");
 
-		  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
+		  if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
+		       || (STMT_VINFO_DEF_TYPE (stmt_info)
+			   == vect_double_reduction_def))
 		      && !vectorizable_lc_phi (stmt_info, NULL, NULL))
 		    return opt_result::failure_at (phi, "unsupported phi\n");
                 }
@@ -1564,8 +1566,8 @@ vect_analyze_loop_operations (loop_vec_i
 	  if (ok
 	      && STMT_VINFO_LIVE_P (stmt_info)
 	      && !PURE_SLP_STMT (stmt_info))
-	    ok = vectorizable_live_operation (stmt_info, NULL, NULL, -1, NULL,
-					      &cost_vec);
+	    ok = vectorizable_live_operation (stmt_info, NULL, NULL, NULL,
+					      -1, NULL, &cost_vec);
 
           if (!ok)
 	    return opt_result::failure_at (phi,
@@ -4260,6 +4275,20 @@ vect_create_epilog_for_reduction (stmt_v
 				  slp_instance slp_node_instance)
 {
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  /* For double reductions we need to get at the inner loop reduction
+     stmt which has the meta info attached.  Our stmt_info is that of the
+     loop-closed PHI of the inner loop which we remember as
+     def for the reduction PHI generation.  */
+  bool double_reduc = false;
+  stmt_vec_info rdef_info = stmt_info;
+  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
+    {
+      gcc_assert (!slp_node);
+      double_reduc = true;
+      stmt_info = loop_vinfo->lookup_def (gimple_phi_arg_def
+					    (stmt_info->stmt, 0));
+      stmt_info = vect_stmt_to_vectorize (stmt_info);
+    }
   gphi *reduc_def_stmt
     = as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))->stmt);
   enum tree_code code = STMT_VINFO_REDUC_CODE (stmt_info);
@@ -4269,20 +4298,6 @@ vect_create_epilog_for_reduction (stmt_v
     neutral_op
       = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis, code,
 				      REDUC_GROUP_FIRST_ELEMENT (stmt_info));
-  bool double_reduc = false;
-  if (nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt_info))
-    {
-      tree def_arg
-	= PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
-				 loop_preheader_edge 
-				   (LOOP_VINFO_LOOP (loop_vinfo)->inner));
-      stmt_vec_info def_arg_stmt_info = loop_vinfo->lookup_def (def_arg);
-      if (def_arg_stmt_info
-	  && (STMT_VINFO_DEF_TYPE (def_arg_stmt_info)
-	      == vect_double_reduction_def))
-	double_reduc = true;
-    }
-  tree induc_val = NULL_TREE;
   stmt_vec_info prev_phi_info;
   tree vectype;
   machine_mode mode;
@@ -4298,23 +4313,20 @@ vect_create_epilog_for_reduction (stmt_v
   gimple *epilog_stmt = NULL;
   gimple *exit_phi;
   tree bitsize;
-  tree adjustment_def = NULL;
-  tree expr, def, initial_def = NULL;
+  tree expr, def;
   tree orig_name, scalar_result;
   imm_use_iterator imm_iter, phi_imm_iter;
   use_operand_p use_p, phi_use_p;
   gimple *use_stmt;
   bool nested_in_vect_loop = false;
   auto_vec<gimple *> new_phis;
-  auto_vec<stmt_vec_info> inner_phis;
   int j, i;
   auto_vec<tree> scalar_results;
-  unsigned int group_size = 1, k, ratio;
+  unsigned int group_size = 1, k;
   auto_vec<gimple *> phis;
   bool slp_reduc = false;
   bool direct_slp_reduc;
   tree new_phi_result;
-  stmt_vec_info inner_phi = NULL;
   tree induction_index = NULL_TREE;
 
   if (slp_node)
@@ -4327,28 +4339,15 @@ vect_create_epilog_for_reduction (stmt_v
       nested_in_vect_loop = true;
       gcc_assert (!slp_node);
     }
+  gcc_assert (!nested_in_vect_loop || double_reduc);
 
   vectype = STMT_VINFO_VECTYPE (stmt_info);
   gcc_assert (vectype);
   mode = TYPE_MODE (vectype);
 
-  /* 1. Create the reduction def-use cycle:
-     Set the backedge argument of REDUCTION_PHIS, i.e., transform
-
-        loop:
-          vec_def = phi <vec_init, null>        # REDUCTION_PHI
-          VECT_DEF = vector_stmt                # vectorized form of STMT
-          ...
-
-     into:
-
-        loop:
-          vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
-          VECT_DEF = vector_stmt                # vectorized form of STMT
-          ...
-
-     (in case of SLP, do it for all the phis).  */
-
+  tree initial_def = NULL;
+  tree induc_val = NULL_TREE;
+  tree adjustment_def = NULL;
   if (slp_node)
     ;
   else
@@ -4357,9 +4356,8 @@ vect_create_epilog_for_reduction (stmt_v
 	 of the reduction variable.  */
       initial_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
 					   loop_preheader_edge (loop));
-      /* Optimize: if initial_def is for REDUC_MAX smaller than the base
-	 and we can't use zero for induc_val, use initial_def.  Similarly
-	 for REDUC_MIN and initial_def larger than the base.  */
+      /* Optimize: for induction condition reduction, if we can't use zero
+         for induc_val, use initial_def.  */
       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
 	  == INTEGER_INDUC_COND_REDUCTION)
 	induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (stmt_info);
@@ -4371,50 +4369,26 @@ vect_create_epilog_for_reduction (stmt_v
 	adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (stmt_info);
     }
 
-  /* Set phi nodes latch arguments.  */
-  unsigned vec_num = 1;
-  int ncopies = 0;
+  unsigned vec_num;
+  int ncopies;
   if (slp_node)
-    vec_num = SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis).length ();
-  for (unsigned i = 0; i < vec_num; i++)
     {
-      if (slp_node)
-	{
-	  def = gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt);
-	  phi_info = SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis)[i];
-	}
-      else
+      vec_num = SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis).length ();
+      ncopies = 1;
+    }
+  else
+    {
+      vec_num = 1;
+      ncopies = 0;
+      phi_info = STMT_VINFO_VEC_STMT (loop_vinfo->lookup_stmt (reduc_def_stmt));
+      do
 	{
-	  def = gimple_get_lhs (STMT_VINFO_VEC_STMT (stmt_info)->stmt);
-	  phi_info = STMT_VINFO_VEC_STMT (loop_vinfo->lookup_stmt (reduc_def_stmt));
+	  ncopies++;
+	  phi_info = STMT_VINFO_RELATED_STMT (phi_info);
 	}
-      for (ncopies = 0;; ncopies++)
-        {
-          /* Set the loop-latch arg for the reduction-phi.  */
-	  if (ncopies != 0)
-	    {
-	      if (slp_node)
-		break;
-	      phi_info = STMT_VINFO_RELATED_STMT (phi_info);
-	      if (!phi_info)
-		break;
-	      def = vect_get_vec_def_for_stmt_copy (loop_vinfo, def);
-	    }
-
-	  gphi *phi = as_a <gphi *> (phi_info->stmt);
-	  add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
-
-          if (dump_enabled_p ())
-	    dump_printf_loc (MSG_NOTE, vect_location,
-			     "transform reduction: created def-use cycle: %G%G",
-			     phi, SSA_NAME_DEF_STMT (def));
-        }
+      while (phi_info);
     }
 
-  /* For vectorizing nested cycles the above is all we need to do.  */
-  if (nested_in_vect_loop && !double_reduc)
-    return;
-
   /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
      which is updated with the current index of the loop for every match of
      the original loop's cond_expr (VEC_STMT).  This results in a vector
@@ -4533,7 +4507,8 @@ vect_create_epilog_for_reduction (stmt_v
   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
          v_out1 = phi <VECT_DEF> 
          Store them in NEW_PHIS.  */
-
+  if (double_reduc)
+    loop = outer_loop;
   exit_bb = single_exit (loop)->dest;
   prev_phi_info = NULL;
   new_phis.create (slp_node ? vec_num : ncopies);
@@ -4542,7 +4517,7 @@ vect_create_epilog_for_reduction (stmt_v
       if (slp_node)
 	def = gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt);
       else
-	def = gimple_get_lhs (STMT_VINFO_VEC_STMT (stmt_info)->stmt);
+	def = gimple_get_lhs (STMT_VINFO_VEC_STMT (rdef_info)->stmt);
       for (j = 0; j < ncopies; j++)
         {
 	  tree new_def = copy_ssa_name (def);
@@ -4561,37 +4536,6 @@ vect_create_epilog_for_reduction (stmt_v
         }
     }
 
-  /* The epilogue is created for the outer-loop, i.e., for the loop being
-     vectorized.  Create exit phis for the outer loop.  */
-  if (double_reduc)
-    {
-      loop = outer_loop;
-      exit_bb = single_exit (loop)->dest;
-      inner_phis.create (slp_node ? vec_num : ncopies);
-      FOR_EACH_VEC_ELT (new_phis, i, phi)
-	{
-	  stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
-	  tree new_result = copy_ssa_name (PHI_RESULT (phi));
-	  gphi *outer_phi = create_phi_node (new_result, exit_bb);
-	  SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
-			   PHI_RESULT (phi));
-	  prev_phi_info = loop_vinfo->add_stmt (outer_phi);
-	  inner_phis.quick_push (phi_info);
-	  new_phis[i] = outer_phi;
-	  while (STMT_VINFO_RELATED_STMT (phi_info))
-            {
-	      phi_info = STMT_VINFO_RELATED_STMT (phi_info);
-	      new_result = copy_ssa_name (PHI_RESULT (phi_info->stmt));
-	      outer_phi = create_phi_node (new_result, exit_bb);
-	      SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
-			       PHI_RESULT (phi_info->stmt));
-	      stmt_vec_info outer_phi_info = loop_vinfo->add_stmt (outer_phi);
-	      STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi_info;
-	      prev_phi_info = outer_phi_info;
-	    }
-	}
-    }
-
   exit_gsi = gsi_after_labels (exit_bb);
 
   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
@@ -5381,23 +5325,10 @@ vect_create_epilog_for_reduction (stmt_v
      correspond to the first vector stmt, etc.
      (RATIO is equal to (REDUC_GROUP_SIZE / number of new vector stmts)).  */
   if (group_size > new_phis.length ())
-    {
-      ratio = group_size / new_phis.length ();
-      gcc_assert (!(group_size % new_phis.length ()));
-    }
-  else
-    ratio = 1;
+    gcc_assert (!(group_size % new_phis.length ()));
 
-  stmt_vec_info epilog_stmt_info = NULL;
   for (k = 0; k < group_size; k++)
     {
-      if (k % ratio == 0)
-        {
-	  epilog_stmt_info = loop_vinfo->lookup_stmt (new_phis[k / ratio]);
-	  if (double_reduc)
-	    inner_phi = inner_phis[k / ratio];
-        }
-
       if (slp_reduc)
         {
 	  stmt_vec_info scalar_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[k];
@@ -5408,84 +5339,6 @@ vect_create_epilog_for_reduction (stmt_v
 	  scalar_dest = gimple_assign_lhs (scalar_stmt_info->stmt);
         }
 
-      if (outer_loop)
-	{
-      phis.create (3);
-      /* Find the loop-closed-use at the loop exit of the original scalar
-         result.  (The reduction result is expected to have two immediate uses -
-         one at the latch block, and one at the loop exit).  */
-      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
-        if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
-	    && !is_gimple_debug (USE_STMT (use_p)))
-          phis.safe_push (USE_STMT (use_p));
-
-      /* While we expect to have found an exit_phi because of loop-closed-ssa
-         form we can end up without one if the scalar cycle is dead.  */
-
-      FOR_EACH_VEC_ELT (phis, i, exit_phi)
-        {
-	      stmt_vec_info exit_phi_vinfo
-		= loop_vinfo->lookup_stmt (exit_phi);
-              gphi *vect_phi;
-
-	      if (double_reduc)
-		STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi;
-	      else
-		STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt_info;
-              if (!double_reduc
-                  || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
-                      != vect_double_reduction_def)
-                continue;
-
-              /* Handle double reduction:
-
-                 stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
-                 stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
-                 stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
-                 stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
-
-                 At that point the regular reduction (stmt2 and stmt3) is
-                 already vectorized, as well as the exit phi node, stmt4.
-                 Here we vectorize the phi node of double reduction, stmt1, and
-                 update all relevant statements.  */
-
-              /* Go through all the uses of s2 to find double reduction phi
-                 node, i.e., stmt1 above.  */
-              orig_name = PHI_RESULT (exit_phi);
-              FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
-                {
-                  stmt_vec_info use_stmt_vinfo;
-                  basic_block bb = gimple_bb (use_stmt);
-
-                  /* Check that USE_STMT is really double reduction phi
-                     node.  */
-                  if (gimple_code (use_stmt) != GIMPLE_PHI
-                      || gimple_phi_num_args (use_stmt) != 2
-                      || bb->loop_father != outer_loop)
-                    continue;
-		  use_stmt_vinfo = loop_vinfo->lookup_stmt (use_stmt);
-                  if (!use_stmt_vinfo
-                      || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
-                          != vect_double_reduction_def)
-		    continue;
-
-                  /* Set the outer loop vector phi args.  The PHI node
-		     itself was created in vectorizable_reduction.  */
-		  vect_phi = as_a <gphi *>
-				(STMT_VINFO_VEC_STMT (use_stmt_vinfo)->stmt);
-
-                  /* Update phi node argument with vs2.  */
-		  add_phi_arg (vect_phi, PHI_RESULT (inner_phi->stmt),
-			       loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
-                  if (dump_enabled_p ())
-		    dump_printf_loc (MSG_NOTE, vect_location,
-				     "created double reduction phi node: %G",
-				     vect_phi);
-                }
-        }
-      phis.release ();
-	}
-
       if (nested_in_vect_loop)
         {
           if (double_reduc)
@@ -7153,10 +7006,6 @@ vectorizable_reduction (stmt_vec_info st
   if (single_defuse_cycle && !slp_node)
     STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
 
-  /* Finalize the reduction-phi (set its arguments) and create the
-     epilog reduction code.  */
-  vect_create_epilog_for_reduction (stmt_info, slp_node, slp_node_instance);
-
   return true;
 }
 
@@ -7172,10 +7021,8 @@ vectorizable_lc_phi (stmt_vec_info stmt_
       || gimple_phi_num_args (stmt_info->stmt) != 1)
     return false;
 
-  /* To handle the nested_cycle_def for double-reductions we have to
-     refactor epilogue generation more.  */
   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
-      /* && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def */)
+      && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def)
     return false;
 
   if (!vec_stmt) /* transformation not required.  */
@@ -7833,8 +7680,8 @@ vectorizable_induction (stmt_vec_info st
 bool
 vectorizable_live_operation (stmt_vec_info stmt_info,
 			     gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
-			     slp_tree slp_node, int slp_index,
-			     stmt_vec_info *vec_stmt,
+			     slp_tree slp_node, slp_instance slp_node_instance,
+			     int slp_index, stmt_vec_info *vec_stmt,
 			     stmt_vector_for_cost *)
 {
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
@@ -7851,8 +7698,34 @@ vectorizable_live_operation (stmt_vec_in
 
   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
 
-  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
-    return false;
+  /* The last stmt of a reduction is live and vectorized via
+     vect_create_epilog_for_reduction.  vectorizable_reduction assessed
+     validity so just trigger the transform here.  */
+  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
+      || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
+    {
+      if (!vec_stmt)
+	return true;
+      if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
+	{
+	  if (STMT_VINFO_REDUC_TYPE (stmt_info) == FOLD_LEFT_REDUCTION)
+	    return true;
+	  if (slp_node)
+	    {
+	      /* For reduction chains the meta-info is attached to
+	         the group leader.  */
+	      if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
+		stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
+	      /* For SLP reductions we vectorize the epilogue for
+	         all involved stmts together.  */
+	      else if (slp_index != 0)
+		return true;
+	    }
+	}
+      vect_create_epilog_for_reduction (stmt_info, slp_node,
+					slp_node_instance);
+      return true;
+    }
 
   /* FORNOW.  CHECKME.  */
   if (nested_in_vect_loop_p (loop, stmt_info))
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	(revision 276292)
+++ gcc/tree-vect-stmts.c	(working copy)
@@ -9809,19 +9879,17 @@ vectorizable_condition (stmt_vec_info st
   tree vec_cmp_type;
   bool masked = false;
 
+  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
+    return false;
+
   if (for_reduction && STMT_SLP_TYPE (stmt_info))
     return false;
 
   vect_reduction_type reduction_type
     = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
-  if (reduction_type == TREE_CODE_REDUCTION)
+  if (!for_reduction)
     {
-      if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
-	return false;
-
-      if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
-	  && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
-	       && for_reduction))
+      if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
 	return false;
 
       /* FORNOW: not yet supported.  */
@@ -10472,7 +10540,8 @@ vectorizable_comparison (stmt_vec_info s
 
 static bool
 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-			  slp_tree slp_node, stmt_vec_info *vec_stmt,
+			  slp_tree slp_node, slp_instance slp_node_instance,
+			  stmt_vec_info *vec_stmt,
 			  stmt_vector_for_cost *cost_vec)
 {
   if (slp_node)
@@ -10482,13 +10551,15 @@ can_vectorize_live_stmts (stmt_vec_info
       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
 	{
 	  if (STMT_VINFO_LIVE_P (slp_stmt_info)
-	      && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
+	      && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node,
+					       slp_node_instance, i,
 					       vec_stmt, cost_vec))
 	    return false;
 	}
     }
   else if (STMT_VINFO_LIVE_P (stmt_info)
-	   && !vectorizable_live_operation (stmt_info, gsi, slp_node, -1,
+	   && !vectorizable_live_operation (stmt_info, gsi, slp_node,
+					    slp_node_instance, -1,
 					    vec_stmt, cost_vec))
     return false;
 
@@ -10704,7 +10775,9 @@ vect_analyze_stmt (stmt_vec_info stmt_in
       need extra handling, except for vectorizable reductions.  */
   if (!bb_vinfo
       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
-      && !can_vectorize_live_stmts (stmt_info, NULL, node, NULL, cost_vec))
+      && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
+      && !can_vectorize_live_stmts (stmt_info, NULL, node, node_instance,
+				    NULL, cost_vec))
     return opt_result::failure_at (stmt_info->stmt,
 				   "not vectorized:"
 				   " live stmt not supported: %G",
@@ -10878,19 +10951,65 @@ vect_transform_stmt (stmt_vec_info stmt_
 	  }
     }
 
-  /* Handle stmts whose DEF is used outside the loop-nest that is
-     being vectorized.  */
-  if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
-    {
-      done = can_vectorize_live_stmts (stmt_info, gsi, slp_node, &vec_stmt,
-				       NULL);
-      gcc_assert (done);
-    }
-
   if (vec_stmt)
     STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
 
-  return is_store;
+  if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
+    return is_store;
+
+  /* If this stmt defines a value used on a backedge, update the
+     vectorized PHIs.  */
+  stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
+  if (!slp_node && STMT_VINFO_REDUC_DEF (orig_stmt_info)
+      && STMT_VINFO_REDUC_TYPE (orig_stmt_info) != FOLD_LEFT_REDUCTION
+      && is_a <gphi *> (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
+    {
+      gphi *phi = as_a <gphi *> (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt);
+      if (dominated_by_p (CDI_DOMINATORS,
+			  gimple_bb (orig_stmt_info->stmt), gimple_bb (phi)))
+	{
+	  edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
+	  stmt_vec_info phi_info
+	      = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
+	  stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+	  do
+	    {
+	      add_phi_arg (as_a <gphi *> (phi_info->stmt),
+			   gimple_get_lhs (vec_stmt->stmt), e,
+			   gimple_phi_arg_location (phi, e->dest_idx));
+	      phi_info = STMT_VINFO_RELATED_STMT (phi_info);
+	      vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
+	    }
+	  while (phi_info);
+	  gcc_assert (!vec_stmt);
+	}
+    }
+  else if (slp_node && STMT_VINFO_REDUC_DEF (orig_stmt_info)
+	   /* Going back and forth via STMT_VINFO_REDUC_DEF gets us to the
+	      stmt with the reduction meta in case of reduction groups.  */
+	   && (STMT_VINFO_REDUC_TYPE
+	         (STMT_VINFO_REDUC_DEF (STMT_VINFO_REDUC_DEF (orig_stmt_info)))
+	       != FOLD_LEFT_REDUCTION)
+	   && slp_node != slp_node_instance->reduc_phis)
+    {
+      slp_tree phi_node = slp_node_instance->reduc_phis;
+      gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
+      edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
+      gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
+		  == SLP_TREE_VEC_STMTS (slp_node).length ());
+      for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
+	add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]->stmt),
+		     gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt),
+		     e, gimple_phi_arg_location (phi, e->dest_idx));
+    }
+
+  /* Handle stmts whose DEF is used outside the loop-nest that is
+     being vectorized.  */
+  done = can_vectorize_live_stmts (stmt_info, gsi, slp_node,
+				   slp_node_instance, &vec_stmt, NULL);
+  gcc_assert (done);
+
+  return false;
 }
 
 
Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h	(revision 276292)
+++ gcc/tree-vectorizer.h	(working copy)
@@ -1649,7 +1652,8 @@ extern class loop *vect_transform_loop (
 extern opt_loop_vec_info vect_analyze_loop_form (class loop *,
 						 vec_info_shared *);
 extern bool vectorizable_live_operation (stmt_vec_info, gimple_stmt_iterator *,
-					 slp_tree, int, stmt_vec_info *,
+					 slp_tree, slp_instance, int,
+					 stmt_vec_info *,
 					 stmt_vector_for_cost *);
 extern bool vectorizable_reduction (stmt_vec_info, gimple_stmt_iterator *,
 				    stmt_vec_info *, slp_tree, slp_instance,



More information about the Gcc-patches mailing list