[committed] aarch64: Fold aarch64_sve_op_count into aarch64_vec_op_count

Fri Nov 12 17:36:34 GMT 2021

Later patches make aarch64 use the new vector hooks.  We then
only need to track one set of ops for each aarch64_vector_costs
structure.  This in turn means that it's more convenient to merge
aarch64_sve_op_count and aarch64_vec_op_count.

The patch also adds issue info and vec flags to aarch64_vec_op_count,
so that the structure is more self-descriptive.  This simplifies some
things later.

Tested on aarch64-linux-gnu & applied.

Richard


gcc/
	* config/aarch64/aarch64.c (aarch64_sve_op_count): Fold into...
	(aarch64_vec_op_count): ...this.  Add a constructor.
	(aarch64_vec_op_count::vec_flags): New function.
	(aarch64_vec_op_count::base_issue_info): Likewise.
	(aarch64_vec_op_count::simd_issue_info): Likewise.
	(aarch64_vec_op_count::sve_issue_info): Likewise.
	(aarch64_vec_op_count::m_issue_info): New member variable.
	(aarch64_vec_op_count::m_vec_flags): Likewise.
	(aarch64_vector_costs): Add a constructor.
	(aarch64_vector_costs::m_sve_ops): Change type to aarch64_vec_op_count.
	(aarch64_vector_costs::aarch64_vector_costs): New function.
	Initialize m_scalar_ops, m_advsimd_ops and m_sve_ops.
	(aarch64_vector_costs::count_ops): Remove vec_flags and
	issue_info parameters, using the new aarch64_vec_op_count
	functions instead.
	(aarch64_vector_costs::add_stmt_cost): Update call accordingly.
	(aarch64_sve_op_count::dump): Fold into...
	(aarch64_vec_op_count::dump): ..here.
---
 gcc/config/aarch64/aarch64.c | 153 ++++++++++++++++++++++-------------
 1 file changed, 96 insertions(+), 57 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 850288d0e01..c8a3cb38473 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14707,8 +14707,16 @@ aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
 /* Information about how the CPU would issue the scalar, Advanced SIMD
    or SVE version of a vector loop, using the scheme defined by the
    aarch64_base_vec_issue_info hierarchy of structures.  */
-struct aarch64_vec_op_count
+class aarch64_vec_op_count
 {
+public:
+  aarch64_vec_op_count (const aarch64_vec_issue_info *, unsigned int);
+
+  unsigned int vec_flags () const { return m_vec_flags; }
+  const aarch64_base_vec_issue_info *base_issue_info () const;
+  const aarch64_simd_vec_issue_info *simd_issue_info () const;
+  const aarch64_sve_vec_issue_info *sve_issue_info () const;
+
   void dump () const;
 
   /* The number of individual "general" operations.  See the comments
@@ -14724,23 +14732,71 @@ struct aarch64_vec_op_count
      operations, which in the vector code become associated with
      reductions.  */
   unsigned int reduction_latency = 0;
-};
-
-/* Extends aarch64_vec_op_count with SVE-specific information.  */
-struct aarch64_sve_op_count : aarch64_vec_op_count
-{
-  void dump () const;
 
   /* The number of individual predicate operations.  See the comments
      in aarch64_sve_vec_issue_info for details.  */
   unsigned int pred_ops = 0;
+
+private:
+  /* The issue information for the core.  */
+  const aarch64_vec_issue_info *m_issue_info;
+
+  /* - If M_VEC_FLAGS is zero then this structure describes scalar code
+     - If M_VEC_FLAGS & VEC_ADVSIMD is nonzero then this structure describes
+       Advanced SIMD code.
+     - If M_VEC_FLAGS & VEC_ANY_SVE is nonzero then this structure describes
+       SVE code.  */
+  unsigned int m_vec_flags;
 };
 
+aarch64_vec_op_count::
+aarch64_vec_op_count (const aarch64_vec_issue_info *issue_info,
+		      unsigned int vec_flags)
+  : m_issue_info (issue_info),
+    m_vec_flags (vec_flags)
+{
+}
+
+/* Return the base issue information (i.e. the parts that make sense
+   for both scalar and vector code).  Return null if we have no issue
+   information.  */
+const aarch64_base_vec_issue_info *
+aarch64_vec_op_count::base_issue_info () const
+{
+  if (auto *ret = simd_issue_info ())
+    return ret;
+  if (m_issue_info)
+    return m_issue_info->scalar;
+  return nullptr;
+}
+
+/* If the structure describes vector code and we have associated issue
+   information, return that issue information, otherwise return null.  */
+const aarch64_simd_vec_issue_info *
+aarch64_vec_op_count::simd_issue_info () const
+{
+  if (auto *ret = sve_issue_info ())
+    return ret;
+  if (m_issue_info && m_vec_flags)
+    return m_issue_info->advsimd;
+  return nullptr;
+}
+
+/* If the structure describes SVE code and we have associated issue
+   information, return that issue information, otherwise return null.  */
+const aarch64_sve_vec_issue_info *
+aarch64_vec_op_count::sve_issue_info () const
+{
+  if (m_issue_info && (m_vec_flags & VEC_ANY_SVE))
+    return m_issue_info->sve;
+  return nullptr;
+}
+
 /* Information about vector code that we're in the process of costing.  */
 class aarch64_vector_costs : public vector_costs
 {
 public:
-  using vector_costs::vector_costs;
+  aarch64_vector_costs (vec_info *, bool);
 
   unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
 			      stmt_vec_info stmt_info, tree vectype,
@@ -14752,8 +14808,7 @@ private:
   void record_potential_advsimd_unrolling (loop_vec_info);
   void analyze_loop_vinfo (loop_vec_info);
   void count_ops (unsigned int, vect_cost_for_stmt, stmt_vec_info, tree,
-		  unsigned int, aarch64_vec_op_count *,
-		  const aarch64_base_vec_issue_info *, unsigned int);
+		  aarch64_vec_op_count *, unsigned int);
   fractional_cost adjust_body_cost_sve (const aarch64_vec_issue_info *,
 					fractional_cost, fractional_cost,
 					bool, unsigned int, unsigned int *,
@@ -14809,7 +14864,7 @@ private:
 
   /* Used only when vectorizing loops with SVE.  It estimates the number and
      kind of operations that the SVE loop would contain.  */
-  aarch64_sve_op_count m_sve_ops;
+  aarch64_vec_op_count m_sve_ops;
 
   /* Used to detect cases in which we end up costing the same load twice,
      once to account for results that are actually used and once to account
@@ -14817,6 +14872,15 @@ private:
   hash_map<nofree_ptr_hash<_stmt_vec_info>, unsigned int> m_seen_loads;
 };
 
+aarch64_vector_costs::aarch64_vector_costs (vec_info *vinfo,
+					    bool costing_for_scalar)
+  : vector_costs (vinfo, costing_for_scalar),
+    m_scalar_ops (aarch64_tune_params.vec_costs->issue_info, 0),
+    m_advsimd_ops (aarch64_tune_params.vec_costs->issue_info, VEC_ADVSIMD),
+    m_sve_ops (aarch64_tune_params.vec_costs->issue_info, VEC_ANY_SVE)
+{
+}
+
 /* Implement TARGET_VECTORIZE_CREATE_COSTS.  */
 vector_costs *
 aarch64_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
@@ -15484,36 +15548,21 @@ aarch64_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info,
    body of a vector loop.  Record issue information relating to the vector
    operation in OPS, where OPS is one of m_scalar_ops, m_advsimd_ops
    or m_sve_ops; see the comments above those variables for details.
-   In addition:
 
-   - VEC_FLAGS is zero if OPS is m_scalar_ops.
-
-   - VEC_FLAGS & VEC_ADVSIMD is nonzero if OPS is m_advsimd_ops.
-
-   - VEC_FLAGS & VEC_ANY_SVE is nonzero if OPS is m_sve_ops.
-
-   ISSUE_INFO provides the scalar, Advanced SIMD or SVE issue information
-   associated with OPS and VEC_FLAGS.  FACTOR says how many iterations of
-   the loop described by VEC_FLAGS would be needed to match one iteration
-   of the vector loop in VINFO.  */
+   FACTOR says how many iterations of the loop described by VEC_FLAGS would be
+   needed to match one iteration of the vector loop in VINFO.  */
 void
 aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
 				 stmt_vec_info stmt_info, tree vectype,
-				 unsigned int vec_flags,
 				 aarch64_vec_op_count *ops,
-				 const aarch64_base_vec_issue_info *issue_info,
 				 unsigned int factor)
 {
-  if (!issue_info)
+  const aarch64_base_vec_issue_info *base_issue = ops->base_issue_info ();
+  if (!base_issue)
     return;
-
-  const aarch64_simd_vec_issue_info *simd_issue = nullptr;
-  if (vec_flags)
-    simd_issue = static_cast<const aarch64_simd_vec_issue_info *> (issue_info);
-
-  const aarch64_sve_vec_issue_info *sve_issue = nullptr;
-  if (vec_flags & VEC_ANY_SVE)
-    sve_issue = static_cast<const aarch64_sve_vec_issue_info *> (issue_info);
+  const aarch64_simd_vec_issue_info *simd_issue = ops->simd_issue_info ();
+  const aarch64_sve_vec_issue_info *sve_issue = ops->sve_issue_info ();
+  unsigned int vec_flags = ops->vec_flags ();
 
   /* Calculate the minimum cycles per iteration imposed by a reduction
      operation.  */
@@ -15608,7 +15657,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
 	}
       ops->loads += num_copies;
       if (vec_flags || FLOAT_TYPE_P (vectype))
-	ops->general_ops += issue_info->fp_simd_load_general_ops * num_copies;
+	ops->general_ops += base_issue->fp_simd_load_general_ops * num_copies;
       break;
 
     case vector_store:
@@ -15616,7 +15665,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
     case scalar_store:
       ops->stores += num_copies;
       if (vec_flags || FLOAT_TYPE_P (vectype))
-	ops->general_ops += issue_info->fp_simd_store_general_ops * num_copies;
+	ops->general_ops += base_issue->fp_simd_store_general_ops * num_copies;
       break;
     }
 
@@ -15644,7 +15693,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
 	unsigned int base = (FLOAT_TYPE_P (type)
 			     ? sve_issue->fp_cmp_pred_ops
 			     : sve_issue->int_cmp_pred_ops);
-	m_sve_ops.pred_ops += base * num_copies;
+	ops->pred_ops += base * num_copies;
       }
 
   /* Add any extra overhead associated with LD[234] and ST[234] operations.  */
@@ -15670,7 +15719,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
       && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
     {
       unsigned int pairs = CEIL (count, 2);
-      m_sve_ops.pred_ops += sve_issue->gather_scatter_pair_pred_ops * pairs;
+      ops->pred_ops += sve_issue->gather_scatter_pair_pred_ops * pairs;
       ops->general_ops += sve_issue->gather_scatter_pair_general_ops * pairs;
     }
 }
@@ -15740,9 +15789,7 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
       /* If we're recording a nonzero vector loop body cost for the
 	 innermost loop, also estimate the operations that would need
 	 to be issued by all relevant implementations of the loop.  */
-      auto *issue_info = aarch64_tune_params.vec_costs->issue_info;
       if (loop_vinfo
-	  && issue_info
 	  && m_vec_flags
 	  && where == vect_body
 	  && (!LOOP_VINFO_LOOP (loop_vinfo)->inner || in_inner_loop_p)
@@ -15750,26 +15797,24 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	  && stmt_cost != 0)
 	{
 	  /* Record estimates for the scalar code.  */
-	  count_ops (count, kind, stmt_info, vectype, 0, &m_scalar_ops,
-		     issue_info->scalar, vect_nunits_for_cost (vectype));
+	  count_ops (count, kind, stmt_info, vectype, &m_scalar_ops,
+		     vect_nunits_for_cost (vectype));
 
-	  if (aarch64_sve_mode_p (m_vinfo->vector_mode) && issue_info->sve)
+	  if (aarch64_sve_mode_p (m_vinfo->vector_mode)
+	      && m_sve_ops.base_issue_info ())
 	    {
 	      /* Record estimates for a possible Advanced SIMD version
 		 of the SVE code.  */
-	      count_ops (count, kind, stmt_info, vectype, VEC_ADVSIMD,
-			 &m_advsimd_ops, issue_info->advsimd,
+	      count_ops (count, kind, stmt_info, vectype, &m_advsimd_ops,
 			 aarch64_estimated_sve_vq ());
 
 	      /* Record estimates for the SVE code itself.  */
-	      count_ops (count, kind, stmt_info, vectype, VEC_ANY_SVE,
-			 &m_sve_ops, issue_info->sve, 1);
+	      count_ops (count, kind, stmt_info, vectype, &m_sve_ops, 1);
 	    }
 	  else
 	    /* Record estimates for the Advanced SIMD code.  Treat SVE like
 	       Advanced SIMD if the CPU has no specific SVE costs.  */
-	    count_ops (count, kind, stmt_info, vectype, VEC_ADVSIMD,
-		       &m_advsimd_ops, issue_info->advsimd, 1);
+	    count_ops (count, kind, stmt_info, vectype, &m_advsimd_ops, 1);
 	}
 
       /* If we're applying the SVE vs. Advanced SIMD unrolling heuristic,
@@ -15793,19 +15838,13 @@ aarch64_vec_op_count::dump () const
 		   "  store operations = %d\n", stores);
   dump_printf_loc (MSG_NOTE, vect_location,
 		   "  general operations = %d\n", general_ops);
+  if (sve_issue_info ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+		     "  predicate operations = %d\n", pred_ops);
   dump_printf_loc (MSG_NOTE, vect_location,
 		   "  reduction latency = %d\n", reduction_latency);
 }
 
-/* Dump information about the structure.  */
-void
-aarch64_sve_op_count::dump () const
-{
-  aarch64_vec_op_count::dump ();
-  dump_printf_loc (MSG_NOTE, vect_location,
-		   "  predicate operations = %d\n", pred_ops);
-}
-
 /* Use ISSUE_INFO to estimate the minimum number of cycles needed to issue
    the operations described by OPS.  This is a very simplistic model!  */
 static fractional_cost
-- 
2.25.1