This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][3/3] Consider multiple vector sizes for vectorization based on cost


The following makes the vectorizer consider all vector sizes as advertised
by targetm.vectorize.autovectorize_vector_sizes and decide on which
vector size to use based on costs.

Given comparing costs is difficult if you do not know the number of
scalar iterations the patch simply uses the cost of a single vector
iteration (weighted by vectorization factor) to decide which variant
is better.  For this we compute this cost also for -fno-vect-cost-model
(so even with that you'll get the vectorizer choose between sizes)
and store it in a new LOOP_VINFO_SINGLE_VECTOR_ITERATION_COST.

Otherwise this is straight-forward and doesn't really depend on
dataref/ddr analysis sharing (but that makes it less costly).

Bootstrap / regtest pending on x86_64-unknown-linux-gnu.

Richard.

>From 4234213fcd5e6fe844bc1171938e12ab189ef290 Mon Sep 17 00:00:00 2001
From: Richard Guenther <rguenther@suse.de>
Date: Thu, 21 Jun 2018 13:40:14 +0200
Subject: [PATCH] try-multiple-vector-sizes-and-compare-costs

2018-06-22  Richard Biener  <rguenther@suse.de>

	* tree-vectorizer.h (struct _loop_vec_info): Add
	single_vector_iteration_cost member.
	(LOOP_VINFO_SINGLE_VECTOR_ITERATION_COST): New.
	* tree-vect-loop.c (_loop_vec_info::~_loop_vec_info): Do not
	reset loop->aux here.
	(vect_analyze_loop_form): Do not assert or set loop->aux here.
	(vect_analyze_loop): Iterate over all vector sizes and decide
	based on the vector iteration cost which one to use.
	(vect_estimate_min_profitable_iters): Move check for
	unlimited cost model later to not skip cost computation or
	dumping.  Set LOOP_VINFO_SINGLE_VECTOR_ITERATION_COST.
	(vect_create_epilog_for_reduction): Use loop_vinfo rather
	than loop_vec_info_for_loop.

diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index dacc8811636..0c2ab57bb20 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -947,8 +947,6 @@ _loop_vec_info::~_loop_vec_info ()
 
   release_vec_loop_masks (&masks);
   delete ivexpr_map;
-
-  loop->aux = NULL;
 }
 
 /* Return an invariant or register for EXPR and emit necessary
@@ -1395,8 +1393,6 @@ vect_analyze_loop_form (struct loop *loop, vec_info_shared *shared)
     STMT_VINFO_TYPE (vinfo_for_stmt (inner_loop_cond))
       = loop_exit_ctrl_vec_info_type;
 
-  gcc_assert (!loop->aux);
-  loop->aux = loop_vinfo;
   return loop_vinfo;
 }
 
@@ -2285,7 +2281,6 @@ loop_vec_info
 vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
 		   vec_info_shared *shared)
 {
-  loop_vec_info loop_vinfo;
   auto_vector_sizes vector_sizes;
 
   /* Autodetect first vector size we try.  */
@@ -2316,11 +2311,12 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
     }
 
   unsigned n_stmts;
+  auto_vec<std::pair<loop_vec_info, poly_uint64> > loop_vinfos;
   poly_uint64 autodetected_vector_size = 0;
   while (1)
     {
       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
-      loop_vinfo = vect_analyze_loop_form (loop, shared);
+      loop_vec_info loop_vinfo = vect_analyze_loop_form (loop, shared);
       if (!loop_vinfo)
 	{
 	  if (dump_enabled_p ())
@@ -2338,10 +2334,24 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
 	{
 	  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
 
-	  return loop_vinfo;
+	  loop_vinfos.safe_push (std::make_pair (loop_vinfo,
+						 current_vector_size));
 	}
+      else
+	delete loop_vinfo;
 
-      delete loop_vinfo;
+      /* If all vector sizes are going to fail or if we didn't end up
+         with any vector fail.  */
+      if (fatal
+	  || known_eq (current_vector_size, 0U))
+	{
+	  /* With gathers/scatters we can end up claiming dataref analysis
+	     failed for one but not the other vector size so we can't
+	     really trust "fatal" here and assert loop_vinfos has length
+	     zero.  Still stop looking for other vector sizes since that
+	     is what we've done before.  */
+	  break;
+	}
 
       if (next_size == 0)
 	autodetected_vector_size = current_vector_size;
@@ -2350,10 +2360,8 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
 	  && known_eq (vector_sizes[next_size], autodetected_vector_size))
 	next_size += 1;
 
-      if (fatal
-	  || next_size == vector_sizes.length ()
-	  || known_eq (current_vector_size, 0U))
-	return NULL;
+      if (next_size == vector_sizes.length ())
+	break;
 
       /* Try the next biggest vector size.  */
       current_vector_size = vector_sizes[next_size++];
@@ -2366,6 +2374,30 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
 	  dump_printf (MSG_NOTE, "\n");
 	}
     }
+
+  if (loop_vinfos.is_empty ())
+    return NULL;
+
+  /* Go over vinfos, selecting the one with best cost.  */
+  loop_vec_info loop_vinfo = loop_vinfos[0].first;
+  current_vector_size = loop_vinfos[0].second;
+  for (unsigned i = 1; i < loop_vinfos.length (); ++i)
+    if (known_lt (LOOP_VINFO_SINGLE_VECTOR_ITERATION_COST (loop_vinfos[i].first)
+		  * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
+		  LOOP_VINFO_SINGLE_VECTOR_ITERATION_COST (loop_vinfo)
+		  * LOOP_VINFO_VECT_FACTOR (loop_vinfos[i].first)))
+      {
+	/* ???  Just comparing the vector iteration cost possibly isn't
+	   the best thing to do.  */
+	delete loop_vinfo;
+	loop_vinfo = loop_vinfos[i].first;
+	current_vector_size = loop_vinfos[i].second;
+      }
+    else
+      delete loop_vinfos[i].first;
+
+  set_stmt_vec_info_vec (&loop_vinfo->stmt_vec_infos);
+  return loop_vinfo;
 }
 
 /* Return true if there is an in-order reduction function for CODE, storing
@@ -3432,15 +3464,6 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
   int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
 
-  /* Cost model disabled.  */
-  if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
-    {
-      dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
-      *ret_min_profitable_niters = 0;
-      *ret_min_profitable_estimate = 0;
-      return;
-    }
-
   /* Requires loop versioning tests to handle misalignment.  */
   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
     {
@@ -3693,6 +3716,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
 	       &vec_inside_cost, &vec_epilogue_cost);
 
+  LOOP_VINFO_SINGLE_VECTOR_ITERATION_COST (loop_vinfo) = vec_inside_cost;
   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
   
   if (dump_enabled_p ())
@@ -3716,6 +3740,17 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
                    peel_iters_epilogue);
     }
 
+  /* Cost model disabled.  We still do the above work to be able to
+     compute a vector iteration cost and provide cost analysis in the
+     dumps.  */
+  if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
+    {
+      dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
+      *ret_min_profitable_niters = 0;
+      *ret_min_profitable_estimate = 0;
+      return;
+    }
+
   /* Calculate number of iterations required to make the vector version
      profitable, relative to the loop bodies only.  The following condition
      must hold true:
@@ -5751,8 +5786,7 @@ vect_finalize_reduction:
 
                   /* Create vector phi node.  */
                   vect_phi = create_phi_node (vec_initial_def, bb);
-                  new_phi_vinfo = new_stmt_vec_info (vect_phi,
-                                    loop_vec_info_for_loop (outer_loop));
+                  new_phi_vinfo = new_stmt_vec_info (vect_phi, loop_vinfo);
                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
 
                   /* Create vs0 - initial def of the double reduction phi.  */
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 55f8e6e4407..e93f40ba73f 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -468,6 +468,9 @@ typedef struct _loop_vec_info : public vec_info {
   /* Cost of a single scalar iteration.  */
   int single_scalar_iteration_cost;
 
+  /* Cost of a single vector iteration.  */
+  int single_vector_iteration_cost;
+
   /* Is the loop vectorizable? */
   bool vectorizable;
 
@@ -571,6 +574,7 @@ typedef struct _loop_vec_info : public vec_info {
 #define LOOP_VINFO_HAS_MASK_STORE(L)       (L)->has_mask_store
 #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec
 #define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost
+#define LOOP_VINFO_SINGLE_VECTOR_ITERATION_COST(L) (L)->single_vector_iteration_cost
 #define LOOP_VINFO_ORIG_LOOP_INFO(L)       (L)->orig_loop_info
 
 #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L)	\


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]