[PATCH 2/5]AArch64 sve: combine nested if predicates

Tamar Christina Tamar.Christina@arm.com
Mon Nov 15 10:47:01 GMT 2021


Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-linux-gnu and no issues.

gcc/ChangeLog:

	* tree-vect-stmts.c (prepare_load_store_mask): Rename to...
	(prepare_vec_mask): ...This and record operations that have already been
	masked.
	(vectorizable_call): Use it.
	(vectorizable_operation): Likewise.
	(vectorizable_store): Likewise.
	(vectorizable_load): Likewise.
	* tree-vectorizer.c (vec_cond_masked_key::get_cond_ops_from_tree): New.
	* tree-vectorizer.h (struct vec_cond_masked_key): New.
	(class _loop_vec_info): Add vec_cond_masked_set.
	(vec_cond_masked_set_type): New.
	(struct default_hash_traits<vec_cond_masked_key>): New.


gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/sve/pred-combine-and.c: New test.

--- inline copy of patch ---

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
new file mode 100644
index 0000000000000000000000000000000000000000..ee927346abe518caa3cba397b11dfd1ee7e93630
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+void f5(float * restrict z0, float * restrict z1, float *restrict x, float * restrict y, float c, int n)
+{
+    for (int i = 0; i < n; i++) {
+        float a = x[i];
+        float b = y[i];
+        if (a > b) {
+            z0[i] = a + b;
+            if (a > c) {
+                z1[i] = a - b;
+            }
+        }
+    }
+}
+
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s} 2 } } */
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 2284ad069e4d521f4e0bd43d34181a258cd636ef..b1946b589043312a9b29d832f9b8398e24787a5f 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1796,23 +1796,30 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
 /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
    form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
    that needs to be applied to all loads and stores in a vectorized loop.
-   Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
+   Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
+   otherwise return VEC_MASK & LOOP_MASK.
 
    MASK_TYPE is the type of both masks.  If new statements are needed,
    insert them before GSI.  */
 
 static tree
-prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
-			 gimple_stmt_iterator *gsi)
+prepare_vec_mask (tree mask_type, loop_vec_info loop_vinfo, tree loop_mask,
+		  tree vec_mask, gimple_stmt_iterator *gsi)
 {
   gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
   if (!loop_mask)
     return vec_mask;
 
   gcc_assert (TREE_TYPE (loop_mask) == mask_type);
+
+  vec_cond_masked_key cond (vec_mask, loop_mask);
+  if (loop_vinfo->vec_cond_masked_set.contains (cond))
+    return vec_mask;
+
   tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
   gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
 					  vec_mask, loop_mask);
+
   gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
   return and_res;
 }
@@ -3526,8 +3533,9 @@ vectorizable_call (vec_info *vinfo,
 			  gcc_assert (ncopies == 1);
 			  tree mask = vect_get_loop_mask (gsi, masks, vec_num,
 							  vectype_out, i);
-			  vargs[mask_opno] = prepare_load_store_mask
-			    (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
+			  vargs[mask_opno] = prepare_vec_mask
+			    (TREE_TYPE (mask), loop_vinfo, mask,
+			     vargs[mask_opno], gsi);
 			}
 
 		      gcall *call;
@@ -3564,8 +3572,8 @@ vectorizable_call (vec_info *vinfo,
 	      tree mask = vect_get_loop_mask (gsi, masks, ncopies,
 					      vectype_out, j);
 	      vargs[mask_opno]
-		= prepare_load_store_mask (TREE_TYPE (mask), mask,
-					   vargs[mask_opno], gsi);
+		= prepare_vec_mask (TREE_TYPE (mask), loop_vinfo, mask,
+				    vargs[mask_opno], gsi);
 	    }
 
 	  gimple *new_stmt;
@@ -6302,10 +6310,46 @@ vectorizable_operation (vec_info *vinfo,
 	}
       else
 	{
+	  tree mask = NULL_TREE;
+	  /* When combining two masks check is either of them is elsewhere
+	     combined with a loop mask, if that's the case we can mark that the
+	     new combined mask doesn't need to be combined with a loop mask.  */
+	  if (masked_loop_p && code == BIT_AND_EXPR)
+	    {
+	      scalar_cond_masked_key cond0 (op0, ncopies);
+	      if (loop_vinfo->scalar_cond_masked_set.contains (cond0))
+		{
+		  mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+						  vectype, i);
+
+		  vop0 = prepare_vec_mask (TREE_TYPE (mask), loop_vinfo, mask,
+					   vop0, gsi);
+		}
+
+	      scalar_cond_masked_key cond1 (op1, ncopies);
+	      if (loop_vinfo->scalar_cond_masked_set.contains (cond1))
+		{
+		  mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+						  vectype, i);
+
+		  vop1 = prepare_vec_mask (TREE_TYPE (mask), loop_vinfo, mask,
+					   vop1, gsi);
+		}
+	    }
+
 	  new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
 	  new_temp = make_ssa_name (vec_dest, new_stmt);
 	  gimple_assign_set_lhs (new_stmt, new_temp);
 	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+
+	  /* Enter the combined value into the vector cond hash so we don't
+	     AND it with a loop mask again.  */
+	  if (mask)
+	    {
+	      vec_cond_masked_key cond (new_temp, mask);
+	      loop_vinfo->vec_cond_masked_set.add (cond);
+	    }
+
 	  if (vec_cvt_dest)
 	    {
 	      new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
@@ -8166,8 +8210,8 @@ vectorizable_store (vec_info *vinfo,
 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
 					     vectype, j);
 	  if (vec_mask)
-	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
-						  vec_mask, gsi);
+	    final_mask = prepare_vec_mask (mask_vectype, loop_vinfo,
+					   final_mask, vec_mask, gsi);
 
 	  gcall *call;
 	  if (final_mask)
@@ -8221,8 +8265,8 @@ vectorizable_store (vec_info *vinfo,
 						 vec_num * ncopies,
 						 vectype, vec_num * j + i);
 	      if (vec_mask)
-		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
-						      vec_mask, gsi);
+		final_mask = prepare_vec_mask (mask_vectype, loop_vinfo,
+					       final_mask, vec_mask, gsi);
 
 	      if (memory_access_type == VMAT_GATHER_SCATTER)
 		{
@@ -9442,8 +9486,8 @@ vectorizable_load (vec_info *vinfo,
 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
 					     vectype, j);
 	  if (vec_mask)
-	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
-						  vec_mask, gsi);
+	    final_mask = prepare_vec_mask (mask_vectype, loop_vinfo,
+					   final_mask, vec_mask, gsi);
 
 	  gcall *call;
 	  if (final_mask)
@@ -9494,8 +9538,8 @@ vectorizable_load (vec_info *vinfo,
 						 vec_num * ncopies,
 						 vectype, vec_num * j + i);
 	      if (vec_mask)
-		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
-						      vec_mask, gsi);
+		final_mask = prepare_vec_mask (mask_vectype, loop_vinfo,
+					       final_mask, vec_mask, gsi);
 
 	      if (i > 0)
 		dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index d466be6ffcb7b2f7724332367cce2eee75245240..894f30b35fa5491e31c58ecb8a46717ef3598a9a 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -327,6 +327,80 @@ struct default_hash_traits<scalar_cond_masked_key>
 
 typedef hash_set<scalar_cond_masked_key> scalar_cond_masked_set_type;
 
+/* Key for map that records association between
+   vector conditions and corresponding loop mask, and
+   is populated by prepare_vec_mask.  */
+
+struct vec_cond_masked_key
+{
+  vec_cond_masked_key (tree t, tree loop_mask_)
+    : loop_mask (loop_mask_)
+  {
+    get_cond_ops_from_tree (t);
+  }
+
+  void get_cond_ops_from_tree (tree);
+
+  tree loop_mask;
+  tree_code code;
+  tree op0;
+  tree op1;
+};
+
+template<>
+struct default_hash_traits<vec_cond_masked_key>
+{
+  typedef vec_cond_masked_key compare_type;
+  typedef vec_cond_masked_key value_type;
+
+  static inline hashval_t
+  hash (value_type v)
+  {
+    inchash::hash h;
+    h.add_int (v.code);
+    inchash::add_expr (v.op0, h, 0);
+    inchash::add_expr (v.op1, h, 0);
+    h.add_ptr (v.loop_mask);
+    return h.end ();
+  }
+
+  static inline bool
+  equal (value_type existing, value_type candidate)
+  {
+    return (existing.code == candidate.code
+	    && existing.loop_mask == candidate.loop_mask
+	    && operand_equal_p (existing.op0, candidate.op0, 0)
+	    && operand_equal_p (existing.op1, candidate.op1, 0));
+  }
+
+  static const bool empty_zero_p = true;
+
+  static inline void
+  mark_empty (value_type &v)
+  {
+    v.loop_mask = NULL_TREE;
+  }
+
+  static inline bool
+  is_empty (value_type v)
+  {
+    return v.loop_mask == NULL_TREE;
+  }
+
+  static inline void mark_deleted (value_type &) {}
+
+  static inline bool is_deleted (const value_type &)
+  {
+    return false;
+  }
+
+  static inline void remove (value_type &) {}
+};
+
+typedef hash_set<vec_cond_masked_key> vec_cond_masked_set_type;
+
+
+
 /* Describes two objects whose addresses must be unequal for the vectorized
    loop to be valid.  */
 typedef std::pair<tree, tree> vec_object_pair;
@@ -643,6 +717,9 @@ public:
   /* Set of scalar conditions that have loop mask applied.  */
   scalar_cond_masked_set_type scalar_cond_masked_set;
 
+  /* Set of vector conditions that have loop mask applied.  */
+  vec_cond_masked_set_type vec_cond_masked_set;
+
   /* If we are using a loop mask to align memory addresses, this variable
      contains the number of vector elements that we should skip in the
      first iteration of the vector loop (i.e. the number of leading
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 5c93961869db08e5ef4e2d22deffdaba8554d78f..4e4d3ca49b46dc051c9084372e8501457e23c632 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -1720,6 +1720,40 @@ scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
   this->inverted_p = false;
 }
 
+/* If the condition represented by T is a comparison or the SSA name
+   result of a comparison, extract the comparison's operands.  Represent
+   T as NE_EXPR <T, 0> otherwise.  */
+
+void
+vec_cond_masked_key::get_cond_ops_from_tree (tree t)
+{
+  if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
+    {
+      this->code = TREE_CODE (t);
+      this->op0 = TREE_OPERAND (t, 0);
+      this->op1 = TREE_OPERAND (t, 1);
+      return;
+    }
+
+  if (TREE_CODE (t) == SSA_NAME)
+    if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)))
+      {
+	tree_code code = gimple_assign_rhs_code (stmt);
+	if (TREE_CODE_CLASS (code) == tcc_comparison)
+	  {
+	    this->code = code;
+	    this->op0 = gimple_assign_rhs1 (stmt);
+	    this->op1 = gimple_assign_rhs2 (stmt);
+	    return;
+	  }
+      }
+
+  this->code = NE_EXPR;
+  this->op0 = t;
+  this->op1 = build_zero_cst (TREE_TYPE (t));
+}
+
+
 /* See the comment above the declaration for details.  */
 
 unsigned int
-------------- next part --------------
A non-text attachment was scrubbed...
Name: rb14777.patch
Type: application/octet-stream
Size: 10413 bytes
Desc: rb14777.patch
URL: <https://gcc.gnu.org/pipermail/gcc-patches/attachments/20211115/52881aa4/attachment-0001.obj>


More information about the Gcc-patches mailing list