[committed] aarch64: Move more code into aarch64_vector_costs

Richard Sandiford richard.sandiford@arm.com
Thu Nov 4 12:33:38 GMT 2021


I've now committed the patch to rework the vector costs hooks --
thanks to Richard for the review.

This patch moves more code into aarch64_vector_costs and reuses
some of the information that is now available in the base class.

I'm planing to significantly rework this code, with more hooks
into the vectoriser, but this seemed worth doing as a first step.

Tested on aarch64-linux-gnu and applied.

Richard


gcc/
	* config/aarch64/aarch64.c (aarch64_vector_costs): Make member
	variables private and add "m_" to their names.  Remove is_loop.
	(aarch64_record_potential_advsimd_unrolling): Replace with...
	(aarch64_vector_costs::record_potential_advsimd_unrolling): ...this.
	(aarch64_analyze_loop_vinfo): Replace with...
	(aarch64_vector_costs::analyze_loop_vinfo): ...this.
	Move initialization of (m_)vec_flags to add_stmt_cost.
	(aarch64_analyze_bb_vinfo): Delete.
	(aarch64_count_ops): Replace with...
	(aarch64_vector_costs::count_ops): ...this.
	(aarch64_vector_costs::add_stmt_cost): Set m_vec_flags,
	using m_costing_for_scalar to test whether we're costing
	scalar or vector code.
	(aarch64_adjust_body_cost_sve): Replace with...
	(aarch64_vector_costs::adjust_body_cost_sve): ...this.
	(aarch64_adjust_body_cost): Replace with...
	(aarch64_vector_costs::adjust_body_cost): ...this.
	(aarch64_vector_costs::finish_cost): Use m_vinfo instead of is_loop.
---
 gcc/config/aarch64/aarch64.c | 339 ++++++++++++++++-------------------
 1 file changed, 155 insertions(+), 184 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 771517dd4c4..cc65b58a48f 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14589,8 +14589,9 @@ struct aarch64_sve_op_count : aarch64_vec_op_count
 };
 
 /* Information about vector code that we're in the process of costing.  */
-struct aarch64_vector_costs : public vector_costs
+class aarch64_vector_costs : public vector_costs
 {
+public:
   using vector_costs::vector_costs;
 
   unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
@@ -14599,26 +14600,31 @@ struct aarch64_vector_costs : public vector_costs
 			      vect_cost_model_location where) override;
   void finish_cost () override;
 
-  /* True if we have performed one-time initialization based on the vec_info.
-
-     This variable exists because the vec_info is not passed to the
-     init_cost hook.  We therefore have to defer initialization based on
-     it till later.  */
-  bool analyzed_vinfo = false;
-
-  /* True if we're costing a vector loop, false if we're costing block-level
-     vectorization.  */
-  bool is_loop = false;
+private:
+  void record_potential_advsimd_unrolling (loop_vec_info);
+  void analyze_loop_vinfo (loop_vec_info);
+  void count_ops (unsigned int, vect_cost_for_stmt, stmt_vec_info, tree,
+		  unsigned int, aarch64_vec_op_count *,
+		  const aarch64_base_vec_issue_info *, unsigned int);
+  fractional_cost adjust_body_cost_sve (const aarch64_vec_issue_info *,
+					fractional_cost, fractional_cost,
+					bool, unsigned int, unsigned int *,
+					bool *);
+  unsigned int adjust_body_cost (unsigned int);
+
+  /* True if we have performed one-time initialization based on the
+     vec_info.  */
+  bool m_analyzed_vinfo = false;
 
   /* True if we've seen an SVE operation that we cannot currently vectorize
      using Advanced SIMD.  */
-  bool saw_sve_only_op = false;
+  bool m_saw_sve_only_op = false;
 
-  /* - If VEC_FLAGS is zero then we're costing the original scalar code.
-     - If VEC_FLAGS & VEC_ADVSIMD is nonzero then we're costing Advanced
+  /* - If M_VEC_FLAGS is zero then we're costing the original scalar code.
+     - If M_VEC_FLAGS & VEC_ADVSIMD is nonzero then we're costing Advanced
        SIMD code.
-     - If VEC_FLAGS & VEC_ANY_SVE is nonzero then we're costing SVE code.  */
-  unsigned int vec_flags = 0;
+     - If M_VEC_FLAGS & VEC_ANY_SVE is nonzero then we're costing SVE code.  */
+  unsigned int m_vec_flags = 0;
 
   /* On some CPUs, SVE and Advanced SIMD provide the same theoretical vector
      throughput, such as 4x128 Advanced SIMD vs. 2x256 SVE.  In those
@@ -14628,39 +14634,39 @@ struct aarch64_vector_costs : public vector_costs
      than length-agnostic SVE, since the SVE loop would execute an unknown
      number of times and so could not be completely unrolled in the same way.
 
-     If we're applying this heuristic, UNROLLED_ADVSIMD_NITERS is the
+     If we're applying this heuristic, M_UNROLLED_ADVSIMD_NITERS is the
      number of Advanced SIMD loop iterations that would be unrolled and
-     UNROLLED_ADVSIMD_STMTS estimates the total number of statements
+     M_UNROLLED_ADVSIMD_STMTS estimates the total number of statements
      in the unrolled loop.  Both values are zero if we're not applying
      the heuristic.  */
-  unsigned HOST_WIDE_INT unrolled_advsimd_niters = 0;
-  unsigned HOST_WIDE_INT unrolled_advsimd_stmts = 0;
+  unsigned HOST_WIDE_INT m_unrolled_advsimd_niters = 0;
+  unsigned HOST_WIDE_INT m_unrolled_advsimd_stmts = 0;
 
   /* If we're vectorizing a loop that executes a constant number of times,
      this variable gives the number of times that the vector loop would
      iterate, otherwise it is zero.  */
-  uint64_t num_vector_iterations = 0;
+  uint64_t m_num_vector_iterations = 0;
 
   /* Used only when vectorizing loops.  Estimates the number and kind of scalar
      operations that would be needed to perform the same work as one iteration
      of the vector loop.  */
-  aarch64_vec_op_count scalar_ops;
+  aarch64_vec_op_count m_scalar_ops;
 
-  /* Used only when vectorizing loops.  If VEC_FLAGS & VEC_ADVSIMD,
+  /* Used only when vectorizing loops.  If M_VEC_FLAGS & VEC_ADVSIMD,
      this structure estimates the number and kind of operations that the
-     vector loop would contain.  If VEC_FLAGS & VEC_SVE, the structure
+     vector loop would contain.  If M_VEC_FLAGS & VEC_SVE, the structure
      estimates what the equivalent Advanced SIMD-only code would need in
      order to perform the same work as one iteration of the SVE loop.  */
-  aarch64_vec_op_count advsimd_ops;
+  aarch64_vec_op_count m_advsimd_ops;
 
   /* Used only when vectorizing loops with SVE.  It estimates the number and
      kind of operations that the SVE loop would contain.  */
-  aarch64_sve_op_count sve_ops;
+  aarch64_sve_op_count m_sve_ops;
 
   /* Used to detect cases in which we end up costing the same load twice,
      once to account for results that are actually used and once to account
      for unused results.  */
-  hash_map<nofree_ptr_hash<_stmt_vec_info>, unsigned int> seen_loads;
+  hash_map<nofree_ptr_hash<_stmt_vec_info>, unsigned int> m_seen_loads;
 };
 
 /* Implement TARGET_VECTORIZE_CREATE_COSTS.  */
@@ -14703,12 +14709,11 @@ aarch64_simd_vec_costs_for_flags (unsigned int flags)
 }
 
 /* Decide whether to use the unrolling heuristic described above
-   aarch64_vector_costs::unrolled_advsimd_niters, updating that
-   field if so.  LOOP_VINFO describes the loop that we're vectorizing
-   and COSTS are the costs that we're calculating for it.  */
-static void
-aarch64_record_potential_advsimd_unrolling (loop_vec_info loop_vinfo,
-					    aarch64_vector_costs *costs)
+   m_unrolled_advsimd_niters, updating that field if so.  LOOP_VINFO
+   describes the loop that we're vectorizing.  */
+void
+aarch64_vector_costs::
+record_potential_advsimd_unrolling (loop_vec_info loop_vinfo)
 {
   /* The heuristic only makes sense on targets that have the same
      vector throughput for SVE and Advanced SIMD.  */
@@ -14718,7 +14723,7 @@ aarch64_record_potential_advsimd_unrolling (loop_vec_info loop_vinfo,
 
   /* We only want to apply the heuristic if LOOP_VINFO is being
      vectorized for SVE.  */
-  if (!(costs->vec_flags & VEC_ANY_SVE))
+  if (!(m_vec_flags & VEC_ANY_SVE))
     return;
 
   /* Check whether it is possible in principle to use Advanced SIMD
@@ -14751,17 +14756,14 @@ aarch64_record_potential_advsimd_unrolling (loop_vec_info loop_vinfo,
 
   /* Record that we're applying the heuristic and should try to estimate
      the number of statements in the Advanced SIMD loop.  */
-  costs->unrolled_advsimd_niters = unrolled_advsimd_niters;
+  m_unrolled_advsimd_niters = unrolled_advsimd_niters;
 }
 
-/* Do one-time initialization of COSTS given that we're costing the loop
-   vectorization described by LOOP_VINFO.  */
-static void
-aarch64_analyze_loop_vinfo (loop_vec_info loop_vinfo,
-			    aarch64_vector_costs *costs)
+/* Do one-time initialization of the aarch64_vector_costs given that we're
+   costing the loop vectorization described by LOOP_VINFO.  */
+void
+aarch64_vector_costs::analyze_loop_vinfo (loop_vec_info loop_vinfo)
 {
-  costs->is_loop = true;
-
   /* Record the number of times that the vector loop would execute,
      if known.  */
   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -14770,26 +14772,14 @@ aarch64_analyze_loop_vinfo (loop_vec_info loop_vinfo,
     {
       unsigned int vf = vect_vf_for_cost (loop_vinfo);
       if (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())
-	costs->num_vector_iterations = scalar_niters / vf;
+	m_num_vector_iterations = scalar_niters / vf;
       else
-	costs->num_vector_iterations = CEIL (scalar_niters, vf);
+	m_num_vector_iterations = CEIL (scalar_niters, vf);
     }
 
-  /* Detect whether we're costing the scalar code or the vector code.
-     This is a bit hacky: it would be better if the vectorizer told
-     us directly.
-
-     If we're costing the vector code, record whether we're vectorizing
-     for Advanced SIMD or SVE.  */
-  if (costs == LOOP_VINFO_TARGET_COST_DATA (loop_vinfo))
-    costs->vec_flags = aarch64_classify_vector_mode (loop_vinfo->vector_mode);
-  else
-    costs->vec_flags = 0;
-
-  /* Detect whether we're vectorizing for SVE and should
-     apply the unrolling heuristic described above
-     aarch64_vector_costs::unrolled_advsimd_niters.  */
-  aarch64_record_potential_advsimd_unrolling (loop_vinfo, costs);
+  /* Detect whether we're vectorizing for SVE and should apply the unrolling
+     heuristic described above m_unrolled_advsimd_niters.  */
+  record_potential_advsimd_unrolling (loop_vinfo);
 
   /* Record the issue information for any SVE WHILE instructions that the
      loop needs.  */
@@ -14804,21 +14794,10 @@ aarch64_analyze_loop_vinfo (loop_vec_info loop_vinfo,
       FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), num_vectors_m1, rgm)
 	if (rgm->type)
 	  num_masks += num_vectors_m1 + 1;
-      costs->sve_ops.pred_ops += num_masks * issue_info->sve->while_pred_ops;
+      m_sve_ops.pred_ops += num_masks * issue_info->sve->while_pred_ops;
     }
 }
 
-/* Do one-time initialization of COSTS given that we're costing the block
-   vectorization described by BB_VINFO.  */
-static void
-aarch64_analyze_bb_vinfo (bb_vec_info bb_vinfo, aarch64_vector_costs *costs)
-{
-  /* Unfortunately, there's no easy way of telling whether we're costing
-     the vector code or the scalar code, so just assume that we're costing
-     the vector code.  */
-  costs->vec_flags = aarch64_classify_vector_mode (bb_vinfo->vector_mode);
-}
-
 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
 static int
 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
@@ -15352,30 +15331,30 @@ aarch64_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info,
   return stmt_cost;
 }
 
-/* VINFO, COSTS, COUNT, KIND, STMT_INFO and VECTYPE are the same as for
+/* COUNT, KIND, STMT_INFO and VECTYPE are the same as for
    vector_costs::add_stmt_cost and they describe an operation in the
    body of a vector loop.  Record issue information relating to the vector
-   operation in OPS, where OPS is one of COSTS->scalar_ops, COSTS->advsimd_ops
-   or COSTS->sve_ops; see the comments above those variables for details.
+   operation in OPS, where OPS is one of m_scalar_ops, m_advsimd_ops
+   or m_sve_ops; see the comments above those variables for details.
    In addition:
 
-   - VEC_FLAGS is zero if OPS is COSTS->scalar_ops.
+   - VEC_FLAGS is zero if OPS is m_scalar_ops.
 
-   - VEC_FLAGS & VEC_ADVSIMD is nonzero if OPS is COSTS->advsimd_ops.
+   - VEC_FLAGS & VEC_ADVSIMD is nonzero if OPS is m_advsimd_ops.
 
-   - VEC_FLAGS & VEC_ANY_SVE is nonzero if OPS is COSTS->sve_ops.
+   - VEC_FLAGS & VEC_ANY_SVE is nonzero if OPS is m_sve_ops.
 
    ISSUE_INFO provides the scalar, Advanced SIMD or SVE issue information
    associated with OPS and VEC_FLAGS.  FACTOR says how many iterations of
    the loop described by VEC_FLAGS would be needed to match one iteration
    of the vector loop in VINFO.  */
-static void
-aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs,
-		   unsigned int count, enum vect_cost_for_stmt kind,
-		   _stmt_vec_info *stmt_info, tree vectype,
-		   unsigned int vec_flags, aarch64_vec_op_count *ops,
-		   const aarch64_base_vec_issue_info *issue_info,
-		   unsigned int factor)
+void
+aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
+				 stmt_vec_info stmt_info, tree vectype,
+				 unsigned int vec_flags,
+				 aarch64_vec_op_count *ops,
+				 const aarch64_base_vec_issue_info *issue_info,
+				 unsigned int factor)
 {
   if (!issue_info)
     return;
@@ -15394,9 +15373,9 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs,
       && vect_is_reduction (stmt_info))
     {
       unsigned int base
-	= aarch64_in_loop_reduction_latency (vinfo, stmt_info, vectype,
+	= aarch64_in_loop_reduction_latency (m_vinfo, stmt_info, vectype,
 					     vec_flags);
-      if (vect_reduc_type (vinfo, stmt_info) == FOLD_LEFT_REDUCTION)
+      if (vect_reduc_type (m_vinfo, stmt_info) == FOLD_LEFT_REDUCTION)
 	{
 	  if (aarch64_sve_mode_p (TYPE_MODE (vectype)))
 	    {
@@ -15423,7 +15402,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs,
     }
 
   /* Assume that multiply-adds will become a single operation.  */
-  if (stmt_info && aarch64_multiply_add_p (vinfo, stmt_info, vec_flags))
+  if (stmt_info && aarch64_multiply_add_p (m_vinfo, stmt_info, vec_flags))
     return;
 
   /* When costing scalar statements in vector code, the count already
@@ -15473,7 +15452,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs,
 	{
 	  bool existed = false;
 	  unsigned int &prev_count
-	    = costs->seen_loads.get_or_insert (stmt_info, &existed);
+	    = m_seen_loads.get_or_insert (stmt_info, &existed);
 	  if (existed)
 	    num_copies -= prev_count;
 	  else
@@ -15504,7 +15483,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs,
      have only accounted for one.  */
   if (vec_flags && (kind == vector_stmt || kind == vec_to_scalar))
     {
-      int reduc_type = vect_reduc_type (vinfo, stmt_info);
+      int reduc_type = vect_reduc_type (m_vinfo, stmt_info);
       if ((reduc_type == EXTRACT_LAST_REDUCTION && (vec_flags & VEC_ADVSIMD))
 	  || reduc_type == COND_REDUCTION)
 	ops->general_ops += num_copies;
@@ -15517,7 +15496,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs,
 	unsigned int base = (FLOAT_TYPE_P (type)
 			     ? sve_issue->fp_cmp_pred_ops
 			     : sve_issue->int_cmp_pred_ops);
-	costs->sve_ops.pred_ops += base * num_copies;
+	m_sve_ops.pred_ops += base * num_copies;
       }
 
   /* Add any extra overhead associated with LD[234] and ST[234] operations.  */
@@ -15543,8 +15522,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs,
       && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
     {
       unsigned int pairs = CEIL (count, 2);
-      costs->sve_ops.pred_ops
-	+= sve_issue->gather_scatter_pair_pred_ops * pairs;
+      m_sve_ops.pred_ops += sve_issue->gather_scatter_pair_pred_ops * pairs;
       ops->general_ops += sve_issue->gather_scatter_pair_general_ops * pairs;
     }
 }
@@ -15564,14 +15542,17 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 
   /* Do one-time initialization based on the vinfo.  */
   loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
-  bb_vec_info bb_vinfo = dyn_cast<bb_vec_info> (m_vinfo);
-  if (!analyzed_vinfo && aarch64_use_new_vector_costs_p ())
+  if (!m_analyzed_vinfo && aarch64_use_new_vector_costs_p ())
     {
+      /* If we're costing the vector code, record whether we're vectorizing
+	 for Advanced SIMD or SVE.  */
+      if (!m_costing_for_scalar)
+	m_vec_flags = aarch64_classify_vector_mode (m_vinfo->vector_mode);
+
       if (loop_vinfo)
-	aarch64_analyze_loop_vinfo (loop_vinfo, this);
-      else
-	aarch64_analyze_bb_vinfo (bb_vinfo, this);
-      this->analyzed_vinfo = true;
+	analyze_loop_vinfo (loop_vinfo);
+
+      m_analyzed_vinfo = true;
     }
 
   /* Try to get a more accurate cost by looking at STMT_INFO instead
@@ -15579,7 +15560,7 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
   if (stmt_info && aarch64_use_new_vector_costs_p ())
     {
       if (vectype && aarch64_sve_only_stmt_p (stmt_info, vectype))
-	this->saw_sve_only_op = true;
+	m_saw_sve_only_op = true;
 
       /* If we scalarize a strided store, the vectorizer costs one
 	 vec_to_scalar for each element.  However, we can store the first
@@ -15587,10 +15568,10 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
       if (vect_is_store_elt_extraction (kind, stmt_info))
 	count -= 1;
 
-      stmt_cost = aarch64_detect_scalar_stmt_subtype
-	(m_vinfo, kind, stmt_info, stmt_cost);
+      stmt_cost = aarch64_detect_scalar_stmt_subtype (m_vinfo, kind,
+						      stmt_info, stmt_cost);
 
-      if (vectype && this->vec_flags)
+      if (vectype && m_vec_flags)
 	stmt_cost = aarch64_detect_vector_stmt_subtype (m_vinfo, kind,
 							stmt_info, vectype,
 							where, stmt_cost);
@@ -15614,37 +15595,33 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
       auto *issue_info = aarch64_tune_params.vec_costs->issue_info;
       if (loop_vinfo
 	  && issue_info
-	  && this->vec_flags
+	  && m_vec_flags
 	  && where == vect_body
 	  && (!LOOP_VINFO_LOOP (loop_vinfo)->inner || in_inner_loop_p)
 	  && vectype
 	  && stmt_cost != 0)
 	{
 	  /* Record estimates for the scalar code.  */
-	  aarch64_count_ops (m_vinfo, this, count, kind, stmt_info, vectype,
-			     0, &this->scalar_ops, issue_info->scalar,
-			     vect_nunits_for_cost (vectype));
+	  count_ops (count, kind, stmt_info, vectype, 0, &m_scalar_ops,
+		     issue_info->scalar, vect_nunits_for_cost (vectype));
 
 	  if (aarch64_sve_mode_p (m_vinfo->vector_mode) && issue_info->sve)
 	    {
 	      /* Record estimates for a possible Advanced SIMD version
 		 of the SVE code.  */
-	      aarch64_count_ops (m_vinfo, this, count, kind, stmt_info,
-				 vectype, VEC_ADVSIMD, &this->advsimd_ops,
-				 issue_info->advsimd,
-				 aarch64_estimated_sve_vq ());
+	      count_ops (count, kind, stmt_info, vectype, VEC_ADVSIMD,
+			 &m_advsimd_ops, issue_info->advsimd,
+			 aarch64_estimated_sve_vq ());
 
 	      /* Record estimates for the SVE code itself.  */
-	      aarch64_count_ops (m_vinfo, this, count, kind, stmt_info,
-				 vectype, VEC_ANY_SVE, &this->sve_ops,
-				 issue_info->sve, 1);
+	      count_ops (count, kind, stmt_info, vectype, VEC_ANY_SVE,
+			 &m_sve_ops, issue_info->sve, 1);
 	    }
 	  else
 	    /* Record estimates for the Advanced SIMD code.  Treat SVE like
 	       Advanced SIMD if the CPU has no specific SVE costs.  */
-	    aarch64_count_ops (m_vinfo, this, count, kind, stmt_info,
-			       vectype, VEC_ADVSIMD, &this->advsimd_ops,
-			       issue_info->advsimd, 1);
+	    count_ops (count, kind, stmt_info, vectype, VEC_ADVSIMD,
+		       &m_advsimd_ops, issue_info->advsimd, 1);
 	}
 
       /* If we're applying the SVE vs. Advanced SIMD unrolling heuristic,
@@ -15652,9 +15629,8 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	 loop.  For simplicitly, we assume that one iteration of the
 	 Advanced SIMD loop would need the same number of statements
 	 as one iteration of the SVE loop.  */
-      if (where == vect_body && this->unrolled_advsimd_niters)
-	this->unrolled_advsimd_stmts
-	  += count * this->unrolled_advsimd_niters;
+      if (where == vect_body && m_unrolled_advsimd_niters)
+	m_unrolled_advsimd_stmts += count * m_unrolled_advsimd_niters;
     }
   return record_stmt_cost (stmt_info, where, (count * stmt_cost).ceil ());
 }
@@ -15698,32 +15674,28 @@ aarch64_estimate_min_cycles_per_iter
   return cycles;
 }
 
-/* Subroutine of aarch64_adjust_body_cost for handling SVE.
-   Use ISSUE_INFO to work out how fast the SVE code can be issued and compare
-   it to the equivalent value for scalar code (SCALAR_CYCLES_PER_ITER).
-   If COULD_USE_ADVSIMD is true, also compare it to the issue rate of
-   Advanced SIMD code (ADVSIMD_CYCLES_PER_ITER).
+/* Subroutine of adjust_body_cost for handling SVE.  Use ISSUE_INFO to work out
+   how fast the SVE code can be issued and compare it to the equivalent value
+   for scalar code (SCALAR_CYCLES_PER_ITER).  If COULD_USE_ADVSIMD is true,
+   also compare it to the issue rate of Advanced SIMD code
+   (ADVSIMD_CYCLES_PER_ITER).
 
-   COSTS is as for aarch64_adjust_body_cost.  ORIG_BODY_COST is the cost
-   originally passed to aarch64_adjust_body_cost and *BODY_COST is the current
-   value of the adjusted cost.  *SHOULD_DISPARAGE is true if we think the loop
-   body is too expensive.  */
+   ORIG_BODY_COST is the cost originally passed to adjust_body_cost and
+   *BODY_COST is the current value of the adjusted cost.  *SHOULD_DISPARAGE
+   is true if we think the loop body is too expensive.  */
 
-static fractional_cost
-aarch64_adjust_body_cost_sve (const aarch64_vector_costs *costs,
-			      const aarch64_vec_issue_info *issue_info,
-			      fractional_cost scalar_cycles_per_iter,
-			      fractional_cost advsimd_cycles_per_iter,
-			      bool could_use_advsimd,
-			      unsigned int orig_body_cost,
-			      unsigned int *body_cost,
-			      bool *should_disparage)
+fractional_cost
+aarch64_vector_costs::
+adjust_body_cost_sve (const aarch64_vec_issue_info *issue_info,
+		      fractional_cost scalar_cycles_per_iter,
+		      fractional_cost advsimd_cycles_per_iter,
+		      bool could_use_advsimd, unsigned int orig_body_cost,
+		      unsigned int *body_cost, bool *should_disparage)
 {
   /* Estimate the minimum number of cycles per iteration needed to issue
      non-predicate operations.  */
   fractional_cost sve_nonpred_issue_cycles_per_iter
-    = aarch64_estimate_min_cycles_per_iter (&costs->sve_ops,
-					    issue_info->sve);
+    = aarch64_estimate_min_cycles_per_iter (&m_sve_ops, issue_info->sve);
 
   /* Estimate the minimum number of cycles per iteration needed to rename
      SVE instructions.
@@ -15739,9 +15711,9 @@ aarch64_adjust_body_cost_sve (const aarch64_vector_costs *costs,
        ??? This value is very much on the pessimistic side, but seems to work
        pretty well in practice.  */
     sve_rename_cycles_per_iter
-      = { costs->sve_ops.general_ops
-	  + costs->sve_ops.loads
-	  + costs->sve_ops.pred_ops + 1, 5 };
+      = { m_sve_ops.general_ops
+	  + m_sve_ops.loads
+	  + m_sve_ops.pred_ops + 1, 5 };
 
   /* Combine the rename and non-predicate issue limits into a single value.  */
   fractional_cost sve_nonpred_cycles_per_iter
@@ -15750,7 +15722,7 @@ aarch64_adjust_body_cost_sve (const aarch64_vector_costs *costs,
   /* Separately estimate the minimum number of cycles per iteration needed
      to issue the predicate operations.  */
   fractional_cost sve_pred_issue_cycles_per_iter
-    = { costs->sve_ops.pred_ops, issue_info->sve->pred_ops_per_cycle };
+    = { m_sve_ops.pred_ops, issue_info->sve->pred_ops_per_cycle };
 
   /* Calculate the overall limit on the number of cycles per iteration.  */
   fractional_cost sve_cycles_per_iter
@@ -15758,15 +15730,15 @@ aarch64_adjust_body_cost_sve (const aarch64_vector_costs *costs,
 
   if (dump_enabled_p ())
     {
-      costs->sve_ops.dump ();
+      m_sve_ops.dump ();
       dump_printf_loc (MSG_NOTE, vect_location,
 		       "  estimated cycles per iteration = %f\n",
 		       sve_cycles_per_iter.as_double ());
-      if (costs->sve_ops.pred_ops)
+      if (m_sve_ops.pred_ops)
 	dump_printf_loc (MSG_NOTE, vect_location,
 			 "    predicate issue = %f\n",
 			 sve_pred_issue_cycles_per_iter.as_double ());
-      if (costs->sve_ops.pred_ops || sve_rename_cycles_per_iter)
+      if (m_sve_ops.pred_ops || sve_rename_cycles_per_iter)
 	dump_printf_loc (MSG_NOTE, vect_location,
 			 "    non-predicate issue = %f\n",
 			 sve_nonpred_issue_cycles_per_iter.as_double ());
@@ -15843,10 +15815,10 @@ aarch64_adjust_body_cost_sve (const aarch64_vector_costs *costs,
   return sve_cycles_per_iter;
 }
 
-/* BODY_COST is the cost of a vector loop body recorded in COSTS.
-   Adjust the cost as necessary and return the new cost.  */
-static unsigned int
-aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
+/* BODY_COST is the cost of a vector loop body.  Adjust the cost as necessary
+   and return the new cost.  */
+unsigned int
+aarch64_vector_costs::adjust_body_cost (unsigned int body_cost)
 {
   unsigned int orig_body_cost = body_cost;
   bool should_disparage = false;
@@ -15855,15 +15827,15 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
     dump_printf_loc (MSG_NOTE, vect_location,
 		     "Original vector body cost = %d\n", body_cost);
 
-  if (costs->unrolled_advsimd_stmts)
+  if (m_unrolled_advsimd_stmts)
     {
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_NOTE, vect_location, "Number of insns in"
 			 " unrolled Advanced SIMD loop = %d\n",
-			 costs->unrolled_advsimd_stmts);
+			 m_unrolled_advsimd_stmts);
 
       /* Apply the Advanced SIMD vs. SVE unrolling heuristic described above
-	 aarch64_vector_costs::unrolled_advsimd_niters.
+	 m_unrolled_advsimd_niters.
 
 	 The balance here is tricky.  On the one hand, we can't be sure whether
 	 the code is vectorizable with Advanced SIMD or not.  However, even if
@@ -15871,8 +15843,8 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
 	 the scalar code could also be unrolled.  Some of the code might then
 	 benefit from SLP, or from using LDP and STP.  We therefore apply
 	 the heuristic regardless of can_use_advsimd_p.  */
-      if (costs->unrolled_advsimd_stmts
-	  && (costs->unrolled_advsimd_stmts
+      if (m_unrolled_advsimd_stmts
+	  && (m_unrolled_advsimd_stmts
 	      <= (unsigned int) param_max_completely_peeled_insns))
 	{
 	  unsigned int estimated_vq = aarch64_estimated_sve_vq ();
@@ -15894,28 +15866,28 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
     return body_cost;
 
   fractional_cost scalar_cycles_per_iter
-    = aarch64_estimate_min_cycles_per_iter (&costs->scalar_ops,
+    = aarch64_estimate_min_cycles_per_iter (&m_scalar_ops,
 					    issue_info->scalar);
 
   fractional_cost advsimd_cycles_per_iter
-    = aarch64_estimate_min_cycles_per_iter (&costs->advsimd_ops,
+    = aarch64_estimate_min_cycles_per_iter (&m_advsimd_ops,
 					    issue_info->advsimd);
 
   bool could_use_advsimd
-    = ((costs->vec_flags & VEC_ADVSIMD)
+    = ((m_vec_flags & VEC_ADVSIMD)
        || (aarch64_autovec_preference != 2
 	   && (aarch64_tune_params.extra_tuning_flags
 	       & AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT)
-	   && !costs->saw_sve_only_op));
+	   && !m_saw_sve_only_op));
 
   if (dump_enabled_p ())
     {
-      if (IN_RANGE (costs->num_vector_iterations, 0, 65536))
+      if (IN_RANGE (m_num_vector_iterations, 0, 65536))
 	dump_printf_loc (MSG_NOTE, vect_location,
 			 "Vector loop iterates at most %wd times\n",
-			 costs->num_vector_iterations);
+			 m_num_vector_iterations);
       dump_printf_loc (MSG_NOTE, vect_location, "Scalar issue estimate:\n");
-      costs->scalar_ops.dump ();
+      m_scalar_ops.dump ();
       dump_printf_loc (MSG_NOTE, vect_location,
 		       "  estimated cycles per iteration = %f\n",
 		       scalar_cycles_per_iter.as_double ());
@@ -15923,7 +15895,7 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
 	{
 	  dump_printf_loc (MSG_NOTE, vect_location,
 			   "Advanced SIMD issue estimate:\n");
-	  costs->advsimd_ops.dump ();
+	  m_advsimd_ops.dump ();
 	  dump_printf_loc (MSG_NOTE, vect_location,
 			   "  estimated cycles per iteration = %f\n",
 			   advsimd_cycles_per_iter.as_double ());
@@ -15934,19 +15906,17 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
     }
 
   fractional_cost vector_cycles_per_iter = advsimd_cycles_per_iter;
-  unsigned int vector_reduction_latency = costs->advsimd_ops.reduction_latency;
+  unsigned int vector_reduction_latency = m_advsimd_ops.reduction_latency;
 
-  if ((costs->vec_flags & VEC_ANY_SVE) && issue_info->sve)
+  if ((m_vec_flags & VEC_ANY_SVE) && issue_info->sve)
     {
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_NOTE, vect_location, "SVE issue estimate:\n");
-      vector_reduction_latency = costs->sve_ops.reduction_latency;
+      vector_reduction_latency = m_sve_ops.reduction_latency;
       vector_cycles_per_iter
-	= aarch64_adjust_body_cost_sve (costs, issue_info,
-					scalar_cycles_per_iter,
-					advsimd_cycles_per_iter,
-					could_use_advsimd, orig_body_cost,
-					&body_cost, &should_disparage);
+	= adjust_body_cost_sve (issue_info, scalar_cycles_per_iter,
+				advsimd_cycles_per_iter, could_use_advsimd,
+				orig_body_cost, &body_cost, &should_disparage);
 
       if (aarch64_tune_params.vec_costs == &neoverse512tvb_vector_cost)
 	{
@@ -15956,22 +15926,22 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
 	  if (dump_enabled_p ())
 	    dump_printf_loc (MSG_NOTE, vect_location,
 			     "Neoverse V1 estimate:\n");
-	  aarch64_adjust_body_cost_sve (costs, &neoversev1_vec_issue_info,
-					scalar_cycles_per_iter * 2,
-					advsimd_cycles_per_iter * 2,
-					could_use_advsimd, orig_body_cost,
-					&body_cost, &should_disparage);
+	  adjust_body_cost_sve (&neoversev1_vec_issue_info,
+				scalar_cycles_per_iter * 2,
+				advsimd_cycles_per_iter * 2,
+				could_use_advsimd, orig_body_cost,
+				&body_cost, &should_disparage);
 	}
     }
 
   /* Decide whether to stick to latency-based costs or whether to try to
      take issue rates into account.  */
   unsigned int threshold = aarch64_loop_vect_issue_rate_niters;
-  if (costs->vec_flags & VEC_ANY_SVE)
+  if (m_vec_flags & VEC_ANY_SVE)
     threshold = CEIL (threshold, aarch64_estimated_sve_vq ());
 
-  if (costs->num_vector_iterations >= 1
-      && costs->num_vector_iterations < threshold)
+  if (m_num_vector_iterations >= 1
+      && m_num_vector_iterations < threshold)
     {
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_NOTE, vect_location,
@@ -16004,8 +15974,8 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
      vector code is an improvement, even if adding the other (non-loop-carried)
      latencies tends to hide this saving.  We therefore reduce the cost of the
      vector loop body in proportion to the saving.  */
-  else if (costs->scalar_ops.reduction_latency > vector_reduction_latency
-	   && costs->scalar_ops.reduction_latency == scalar_cycles_per_iter
+  else if (m_scalar_ops.reduction_latency > vector_reduction_latency
+	   && m_scalar_ops.reduction_latency == scalar_cycles_per_iter
 	   && scalar_cycles_per_iter > vector_cycles_per_iter
 	   && !should_disparage)
     {
@@ -16023,10 +15993,11 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
 void
 aarch64_vector_costs::finish_cost ()
 {
-  if (this->is_loop
-      && this->vec_flags
+  loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
+  if (loop_vinfo
+      && m_vec_flags
       && aarch64_use_new_vector_costs_p ())
-    m_costs[vect_body] = aarch64_adjust_body_cost (this, m_costs[vect_body]);
+    m_costs[vect_body] = adjust_body_cost (m_costs[vect_body]);
 
   vector_costs::finish_cost ();
 }
-- 
2.25.1



More information about the Gcc-patches mailing list