This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Refactor vectorizer cost model


This refactors the vectorizer cost model to call it when everything is
ready, avoiding some fixups.  It also fixes cost compute for
SLP reductions.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2015-05-28  Richard Biener  <rguenther@suse.de>

	* tree-vectorizer.h (struct _slp_instance): Remove body_cost_vec
	member.
	(SLP_INSTANCE_BODY_COST_VEC): Remove.
	(vect_update_slp_costs_according_to_vf): Likewise.
	(vect_slp_analyze_operations): Update prototype.
	* tree-vect-loop.c (vect_analyze_loop_2): Remove call to
	vect_update_slp_costs_according_to_vf, adjust.
	* tree-vect-slp.c (vect_free_slp_instance): Adjust.
	(vect_analyze_slp_cost_1): Likewise.
	(vect_analyze_slp_cost): Likewise.  Properly deal with
	widening reduction ops.  Commit body costs.
	(vect_analyze_slp_instance): Adjust.  Do not analyze SLP
	cost for loops from here.
	(vect_slp_analyze_operations): But do it from here when
	the vectorization factor is known and stmts are analyzed.
	(vect_bb_vectorization_profitable_p): Simplify.
	(vect_slp_analyze_bb_1): Do not compute SLP cost here.
	(vect_update_slp_costs_according_to_vf): Remove.


Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	(revision 223743)
+++ gcc/tree-vect-loop.c	(working copy)
@@ -1814,15 +1855,12 @@ vect_analyze_loop_2 (loop_vec_info loop_
 	  /* Update the vectorization factor based on the SLP decision.  */
 	  vect_update_vf_for_slp (loop_vinfo);
 
-	  /* Once VF is set, SLP costs should be updated since the number of
-	     created vector stmts depends on VF.  */
-	  vect_update_slp_costs_according_to_vf (loop_vinfo);
-
 	  /* Analyze operations in the SLP instances.  Note this may
 	     remove unsupported SLP instances which makes the above
 	     SLP kind detection invalid.  */
 	  unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
-	  vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
+	  vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
+				       LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
 	  if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
 	    return false;
 	}
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	(revision 223743)
+++ gcc/tree-vect-slp.c	(working copy)
@@ -130,7 +130,6 @@ vect_free_slp_instance (slp_instance ins
 {
   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
   SLP_INSTANCE_LOADS (instance).release ();
-  SLP_INSTANCE_BODY_COST_VEC (instance).release ();
   free (instance);
 }
 
@@ -1546,13 +1545,11 @@ vect_find_last_scalar_stmt_in_slp (slp_t
 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
 
 static void
-vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
-			 slp_instance instance, slp_tree node,
+vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
 			 stmt_vector_for_cost *prologue_cost_vec,
+			 stmt_vector_for_cost *body_cost_vec,
 			 unsigned ncopies_for_cost)
 {
-  stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
-
   unsigned i;
   slp_tree child;
   gimple stmt, s;
@@ -1563,9 +1560,8 @@ vect_analyze_slp_cost_1 (loop_vec_info l
   /* Recurse down the SLP tree.  */
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
     if (child)
-      vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
-			       instance, child, prologue_cost_vec,
-			       ncopies_for_cost);
+      vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
+			       body_cost_vec, ncopies_for_cost);
 
   /* Look at the first scalar stmt to determine the cost.  */
   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
@@ -1622,7 +1618,8 @@ vect_analyze_slp_cost_1 (loop_vec_info l
       enum vect_def_type dt;
       if (!op || op == lhs)
 	continue;
-      if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
+      if (vect_is_simple_use (op, NULL, STMT_VINFO_LOOP_VINFO (stmt_info),
+			      STMT_VINFO_BB_VINFO (stmt_info),
 			      &def_stmt, &def, &dt))
 	{
 	  /* Without looking at the actual initializer a vector of
@@ -1642,8 +1639,7 @@ vect_analyze_slp_cost_1 (loop_vec_info l
 /* Compute the cost for the SLP instance INSTANCE.  */
 
 static void
-vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
-		       slp_instance instance, unsigned nunits)
+vect_analyze_slp_cost (slp_instance instance, void *data)
 {
   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
   unsigned ncopies_for_cost;
@@ -1654,20 +1650,38 @@ vect_analyze_slp_cost (loop_vec_info loo
      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
      GROUP_SIZE / NUNITS otherwise.  */
   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
+  slp_tree node = SLP_INSTANCE_TREE (instance);
+  stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
+  /* Adjust the group_size by the vectorization factor which is always one
+     for basic-block vectorization.  */
+  if (STMT_VINFO_LOOP_VINFO (stmt_info))
+    group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
+  unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
+  /* For reductions look at a reduction operand in case the reduction
+     operation is widening like DOT_PROD or SAD.  */
+  if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
+    {
+      gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
+      switch (gimple_assign_rhs_code (stmt))
+	{
+	case DOT_PROD_EXPR:
+	case SAD_EXPR:
+	  nunits = TYPE_VECTOR_SUBPARTS
+	      (get_vectype_for_scalar_type (gimple_assign_rhs1 (stmt)));
+	  break;
+	default:;
+	}
+    }
   ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
 
   prologue_cost_vec.create (10);
   body_cost_vec.create (10);
-  SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
-  vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
-			   instance, SLP_INSTANCE_TREE (instance),
-			   &prologue_cost_vec, ncopies_for_cost);
+  vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
+			   &prologue_cost_vec, &body_cost_vec,
+			   ncopies_for_cost);
 
   /* Record the prologue costs, which were delayed until we were
-     sure that SLP was successful.  Unlike the body costs, we know
-     the final values now regardless of the loop vectorization factor.  */
-  void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
-		: BB_VINFO_TARGET_COST_DATA (bb_vinfo));
+     sure that SLP was successful.  */
   FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
     {
       struct _stmt_vec_info *stmt_info
@@ -1676,7 +1690,17 @@ vect_analyze_slp_cost (loop_vec_info loo
 			    si->misalign, vect_prologue);
     }
 
+  /* Record the instance's instructions in the target cost model.  */
+  FOR_EACH_VEC_ELT (body_cost_vec, i, si)
+    {
+      struct _stmt_vec_info *stmt_info
+	= si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
+      (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
+			    si->misalign, vect_body);
+    }
+
   prologue_cost_vec.release ();
+  body_cost_vec.release ();
 }
 
 /* Analyze an SLP instance starting from a group of grouped stores.  Call
@@ -1811,7 +1840,6 @@ vect_analyze_slp_instance (loop_vec_info
       SLP_INSTANCE_TREE (new_instance) = node;
       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
-      SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
       SLP_INSTANCE_LOADS (new_instance) = loads;
 
       /* Compute the load permutation.  */
@@ -1863,13 +1891,7 @@ vect_analyze_slp_instance (loop_vec_info
 
 
       if (loop_vinfo)
-	{
-	  /* Compute the costs of this SLP instance.  Delay this for BB
-	     vectorization as we don't have vector types computed yet.  */
-	  vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
-				 new_instance, TYPE_VECTOR_SUBPARTS (vectype));
-	  LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
-	}
+	LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
       else
         BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
 
@@ -2237,7 +2259,7 @@ vect_slp_analyze_node_operations (slp_tr
    operations are supported. */
 
 bool
-vect_slp_analyze_operations (vec<slp_instance> slp_instances)
+vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
 {
   slp_instance instance;
   int i;
@@ -2259,7 +2281,11 @@ vect_slp_analyze_operations (vec<slp_ins
           slp_instances.ordered_remove (i);
 	}
       else
-        i++;
+	{
+	  /* Compute the costs of the SLP instance.  */
+	  vect_analyze_slp_cost (instance, data);
+	  i++;
+	}
     }
 
   if (!slp_instances.length ())
@@ -2342,26 +2368,9 @@ vect_bb_vectorization_profitable_p (bb_v
 {
   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
   slp_instance instance;
-  int i, j;
+  int i;
   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
-  void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
-  stmt_vec_info stmt_info = NULL;
-  stmt_vector_for_cost body_cost_vec;
-  stmt_info_for_cost *ci;
-
-  /* Calculate vector costs.  */
-  FOR_EACH_VEC_ELT (slp_instances, i, instance)
-    {
-      body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
-
-      FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
-	{
-	  stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
-	  (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
-				stmt_info, ci->misalign, vect_body);
-	}
-    }
 
   /* Calculate scalar cost.  */
   FOR_EACH_VEC_ELT (slp_instances, i, instance)
@@ -2519,7 +2528,8 @@ vect_slp_analyze_bb_1 (basic_block bb)
       return NULL;
     }
 
-  if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo)))
+  if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
+				    BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -2529,15 +2539,6 @@ vect_slp_analyze_bb_1 (basic_block bb)
       return NULL;
     }
 
-  /* Compute the costs of the SLP instances.  */
-  FOR_EACH_VEC_ELT (slp_instances, i, instance)
-    {
-      gimple stmt = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0];
-      tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
-      vect_analyze_slp_cost (NULL, bb_vinfo,
-			     instance, TYPE_VECTOR_SUBPARTS (vectype));
-    }
-
   /* Cost model: check if the vectorization is worthwhile.  */
   if (!unlimited_cost_model (NULL)
       && !vect_bb_vectorization_profitable_p (bb_vinfo))
@@ -2616,45 +2617,6 @@ vect_slp_analyze_bb (basic_block bb)
 }
 
 
-/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
-   the number of created vector stmts depends on the unrolling factor).
-   However, the actual number of vector stmts for every SLP node depends on
-   VF which is set later in vect_analyze_operations ().  Hence, SLP costs
-   should be updated.  In this function we assume that the inside costs
-   calculated in vect_model_xxx_cost are linear in ncopies.  */
-
-void
-vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
-{
-  unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
-  vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
-  slp_instance instance;
-  stmt_vector_for_cost body_cost_vec;
-  stmt_info_for_cost *si;
-  void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
-
-  if (dump_enabled_p ())
-    dump_printf_loc (MSG_NOTE, vect_location,
-		     "=== vect_update_slp_costs_according_to_vf ===\n");
-
-  FOR_EACH_VEC_ELT (slp_instances, i, instance)
-    {
-      /* We assume that costs are linear in ncopies.  */
-      int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
-
-      /* Record the instance's instructions in the target cost model.
-	 This was delayed until here because the count of instructions
-	 isn't known beforehand.  */
-      body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
-
-      FOR_EACH_VEC_ELT (body_cost_vec, j, si)
-	(void) add_stmt_cost (data, si->count * ncopies, si->kind,
-			      vinfo_for_stmt (si->stmt), si->misalign,
-			      vect_body);
-    }
-}
-
-
 /* For constant and loop invariant defs of SLP_NODE this function returns
    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
    OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h	(revision 223743)
+++ gcc/tree-vectorizer.h	(working copy)
@@ -128,9 +128,6 @@ typedef struct _slp_instance {
   /* The unrolling factor required to vectorized this SLP instance.  */
   unsigned int unrolling_factor;
 
-  /* Vectorization costs associated with SLP instance.  */
-  stmt_vector_for_cost body_cost_vec;
-
   /* The group of nodes that contain loads of this SLP instance.  */
   vec<slp_tree> loads;
 } *slp_instance;
@@ -140,7 +137,6 @@ typedef struct _slp_instance {
 #define SLP_INSTANCE_TREE(S)                     (S)->root
 #define SLP_INSTANCE_GROUP_SIZE(S)               (S)->group_size
 #define SLP_INSTANCE_UNROLLING_FACTOR(S)         (S)->unrolling_factor
-#define SLP_INSTANCE_BODY_COST_VEC(S)            (S)->body_cost_vec
 #define SLP_INSTANCE_LOADS(S)                    (S)->loads
 
 #define SLP_TREE_CHILDREN(S)                     (S)->children
@@ -1114,9 +1110,9 @@ extern void vect_free_slp_instance (slp_
 extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> ,
                                           gimple_stmt_iterator *, int,
                                           slp_instance, bool);
-extern bool vect_slp_analyze_operations (vec<slp_instance> slp_instances);
+extern bool vect_slp_analyze_operations (vec<slp_instance> slp_instances,
+					 void *);
 extern bool vect_schedule_slp (loop_vec_info, bb_vec_info);
-extern void vect_update_slp_costs_according_to_vf (loop_vec_info);
 extern bool vect_analyze_slp (loop_vec_info, bb_vec_info, unsigned);
 extern bool vect_make_slp_decision (loop_vec_info);
 extern void vect_detect_hybrid_slp (loop_vec_info);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]