[PATCH 2/5]AArch64 sve: combine nested if predicates

Richard Sandiford richard.sandiford@arm.com
Tue Nov 30 16:24:42 GMT 2021


Tamar Christina <Tamar.Christina@arm.com> writes:
> Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-linux-gnu and no issues.
>
> gcc/ChangeLog:
>
> 	* tree-vect-stmts.c (prepare_load_store_mask): Rename to...
> 	(prepare_vec_mask): ...This and record operations that have already been
> 	masked.
> 	(vectorizable_call): Use it.
> 	(vectorizable_operation): Likewise.
> 	(vectorizable_store): Likewise.
> 	(vectorizable_load): Likewise.
> 	* tree-vectorizer.c (vec_cond_masked_key::get_cond_ops_from_tree): New.
> 	* tree-vectorizer.h (struct vec_cond_masked_key): New.
> 	(class _loop_vec_info): Add vec_cond_masked_set.
> 	(vec_cond_masked_set_type): New.
> 	(struct default_hash_traits<vec_cond_masked_key>): New.
>
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/aarch64/sve/pred-combine-and.c: New test.
>
> --- inline copy of patch ---
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..ee927346abe518caa3cba397b11dfd1ee7e93630
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +void f5(float * restrict z0, float * restrict z1, float *restrict x, float * restrict y, float c, int n)
> +{
> +    for (int i = 0; i < n; i++) {
> +        float a = x[i];
> +        float b = y[i];
> +        if (a > b) {
> +            z0[i] = a + b;
> +            if (a > c) {
> +                z1[i] = a - b;
> +            }
> +        }
> +    }
> +}
> +
> +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s} 2 } } */
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 2284ad069e4d521f4e0bd43d34181a258cd636ef..b1946b589043312a9b29d832f9b8398e24787a5f 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -1796,23 +1796,30 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
>  /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
>     form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
>     that needs to be applied to all loads and stores in a vectorized loop.
> -   Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
> +   Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
> +   otherwise return VEC_MASK & LOOP_MASK.
>  
>     MASK_TYPE is the type of both masks.  If new statements are needed,
>     insert them before GSI.  */
>  
>  static tree
> -prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
> -			 gimple_stmt_iterator *gsi)
> +prepare_vec_mask (tree mask_type, loop_vec_info loop_vinfo, tree loop_mask,
> +		  tree vec_mask, gimple_stmt_iterator *gsi)

Minor, but: loop_vinfo normally comes first when present.

>  {
>    gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
>    if (!loop_mask)
>      return vec_mask;
>  
>    gcc_assert (TREE_TYPE (loop_mask) == mask_type);
> +
> +  vec_cond_masked_key cond (vec_mask, loop_mask);
> +  if (loop_vinfo->vec_cond_masked_set.contains (cond))
> +    return vec_mask;
> +
>    tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
>    gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
>  					  vec_mask, loop_mask);
> +
>    gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
>    return and_res;
>  }
> @@ -3526,8 +3533,9 @@ vectorizable_call (vec_info *vinfo,
>  			  gcc_assert (ncopies == 1);
>  			  tree mask = vect_get_loop_mask (gsi, masks, vec_num,
>  							  vectype_out, i);
> -			  vargs[mask_opno] = prepare_load_store_mask
> -			    (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
> +			  vargs[mask_opno] = prepare_vec_mask
> +			    (TREE_TYPE (mask), loop_vinfo, mask,
> +			     vargs[mask_opno], gsi);
>  			}
>  
>  		      gcall *call;
> @@ -3564,8 +3572,8 @@ vectorizable_call (vec_info *vinfo,
>  	      tree mask = vect_get_loop_mask (gsi, masks, ncopies,
>  					      vectype_out, j);
>  	      vargs[mask_opno]
> -		= prepare_load_store_mask (TREE_TYPE (mask), mask,
> -					   vargs[mask_opno], gsi);
> +		= prepare_vec_mask (TREE_TYPE (mask), loop_vinfo, mask,
> +				    vargs[mask_opno], gsi);
>  	    }
>  
>  	  gimple *new_stmt;
> @@ -6302,10 +6310,46 @@ vectorizable_operation (vec_info *vinfo,
>  	}
>        else
>  	{
> +	  tree mask = NULL_TREE;
> +	  /* When combining two masks check is either of them is elsewhere
> +	     combined with a loop mask, if that's the case we can mark that the
> +	     new combined mask doesn't need to be combined with a loop mask.  */
> +	  if (masked_loop_p && code == BIT_AND_EXPR)
> +	    {
> +	      scalar_cond_masked_key cond0 (op0, ncopies);
> +	      if (loop_vinfo->scalar_cond_masked_set.contains (cond0))
> +		{
> +		  mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
> +						  vectype, i);
> +
> +		  vop0 = prepare_vec_mask (TREE_TYPE (mask), loop_vinfo, mask,
> +					   vop0, gsi);
> +		}
> +
> +	      scalar_cond_masked_key cond1 (op1, ncopies);
> +	      if (loop_vinfo->scalar_cond_masked_set.contains (cond1))
> +		{
> +		  mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
> +						  vectype, i);
> +
> +		  vop1 = prepare_vec_mask (TREE_TYPE (mask), loop_vinfo, mask,
> +					   vop1, gsi);
> +		}
> +	    }
> +
>  	  new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
>  	  new_temp = make_ssa_name (vec_dest, new_stmt);
>  	  gimple_assign_set_lhs (new_stmt, new_temp);
>  	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
> +
> +	  /* Enter the combined value into the vector cond hash so we don't
> +	     AND it with a loop mask again.  */
> +	  if (mask)
> +	    {
> +	      vec_cond_masked_key cond (new_temp, mask);
> +	      loop_vinfo->vec_cond_masked_set.add (cond);
> +	    }
> +
>  	  if (vec_cvt_dest)
>  	    {
>  	      new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
> @@ -8166,8 +8210,8 @@ vectorizable_store (vec_info *vinfo,
>  	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
>  					     vectype, j);
>  	  if (vec_mask)
> -	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
> -						  vec_mask, gsi);
> +	    final_mask = prepare_vec_mask (mask_vectype, loop_vinfo,
> +					   final_mask, vec_mask, gsi);
>  
>  	  gcall *call;
>  	  if (final_mask)
> @@ -8221,8 +8265,8 @@ vectorizable_store (vec_info *vinfo,
>  						 vec_num * ncopies,
>  						 vectype, vec_num * j + i);
>  	      if (vec_mask)
> -		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
> -						      vec_mask, gsi);
> +		final_mask = prepare_vec_mask (mask_vectype, loop_vinfo,
> +					       final_mask, vec_mask, gsi);
>  
>  	      if (memory_access_type == VMAT_GATHER_SCATTER)
>  		{
> @@ -9442,8 +9486,8 @@ vectorizable_load (vec_info *vinfo,
>  	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
>  					     vectype, j);
>  	  if (vec_mask)
> -	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
> -						  vec_mask, gsi);
> +	    final_mask = prepare_vec_mask (mask_vectype, loop_vinfo,
> +					   final_mask, vec_mask, gsi);
>  
>  	  gcall *call;
>  	  if (final_mask)
> @@ -9494,8 +9538,8 @@ vectorizable_load (vec_info *vinfo,
>  						 vec_num * ncopies,
>  						 vectype, vec_num * j + i);
>  	      if (vec_mask)
> -		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
> -						      vec_mask, gsi);
> +		final_mask = prepare_vec_mask (mask_vectype, loop_vinfo,
> +					       final_mask, vec_mask, gsi);
>  
>  	      if (i > 0)
>  		dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index d466be6ffcb7b2f7724332367cce2eee75245240..894f30b35fa5491e31c58ecb8a46717ef3598a9a 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -327,6 +327,80 @@ struct default_hash_traits<scalar_cond_masked_key>
>  
>  typedef hash_set<scalar_cond_masked_key> scalar_cond_masked_set_type;
>  
> +/* Key for map that records association between
> +   vector conditions and corresponding loop mask, and
> +   is populated by prepare_vec_mask.  */
> +
> +struct vec_cond_masked_key
> +{
> +  vec_cond_masked_key (tree t, tree loop_mask_)
> +    : loop_mask (loop_mask_)
> +  {
> +    get_cond_ops_from_tree (t);
> +  }
> +
> +  void get_cond_ops_from_tree (tree);
> +
> +  tree loop_mask;
> +  tree_code code;
> +  tree op0;
> +  tree op1;
> +};
> +
> +template<>
> +struct default_hash_traits<vec_cond_masked_key>
> +{
> +  typedef vec_cond_masked_key compare_type;
> +  typedef vec_cond_masked_key value_type;
> +
> +  static inline hashval_t
> +  hash (value_type v)
> +  {
> +    inchash::hash h;
> +    h.add_int (v.code);
> +    inchash::add_expr (v.op0, h, 0);
> +    inchash::add_expr (v.op1, h, 0);
> +    h.add_ptr (v.loop_mask);
> +    return h.end ();
> +  }
> +
> +  static inline bool
> +  equal (value_type existing, value_type candidate)
> +  {
> +    return (existing.code == candidate.code
> +	    && existing.loop_mask == candidate.loop_mask
> +	    && operand_equal_p (existing.op0, candidate.op0, 0)
> +	    && operand_equal_p (existing.op1, candidate.op1, 0));
> +  }
> +
> +  static const bool empty_zero_p = true;
> +
> +  static inline void
> +  mark_empty (value_type &v)
> +  {
> +    v.loop_mask = NULL_TREE;
> +  }
> +
> +  static inline bool
> +  is_empty (value_type v)
> +  {
> +    return v.loop_mask == NULL_TREE;
> +  }
> +
> +  static inline void mark_deleted (value_type &) {}
> +
> +  static inline bool is_deleted (const value_type &)
> +  {
> +    return false;
> +  }
> +
> +  static inline void remove (value_type &) {}
> +};
> +
> +typedef hash_set<vec_cond_masked_key> vec_cond_masked_set_type;
> +
> +
> +
>  /* Describes two objects whose addresses must be unequal for the vectorized
>     loop to be valid.  */
>  typedef std::pair<tree, tree> vec_object_pair;
> @@ -643,6 +717,9 @@ public:
>    /* Set of scalar conditions that have loop mask applied.  */
>    scalar_cond_masked_set_type scalar_cond_masked_set;
>  
> +  /* Set of vector conditions that have loop mask applied.  */
> +  vec_cond_masked_set_type vec_cond_masked_set;
> +
>    /* If we are using a loop mask to align memory addresses, this variable
>       contains the number of vector elements that we should skip in the
>       first iteration of the vector loop (i.e. the number of leading
> diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
> index 5c93961869db08e5ef4e2d22deffdaba8554d78f..4e4d3ca49b46dc051c9084372e8501457e23c632 100644
> --- a/gcc/tree-vectorizer.c
> +++ b/gcc/tree-vectorizer.c
> @@ -1720,6 +1720,40 @@ scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
>    this->inverted_p = false;
>  }
>  
> +/* If the condition represented by T is a comparison or the SSA name
> +   result of a comparison, extract the comparison's operands.  Represent
> +   T as NE_EXPR <T, 0> otherwise.  */
> +
> +void
> +vec_cond_masked_key::get_cond_ops_from_tree (tree t)
> +{
> +  if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
> +    {
> +      this->code = TREE_CODE (t);
> +      this->op0 = TREE_OPERAND (t, 0);
> +      this->op1 = TREE_OPERAND (t, 1);
> +      return;
> +    }
> +
> +  if (TREE_CODE (t) == SSA_NAME)
> +    if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)))
> +      {
> +	tree_code code = gimple_assign_rhs_code (stmt);
> +	if (TREE_CODE_CLASS (code) == tcc_comparison)
> +	  {
> +	    this->code = code;
> +	    this->op0 = gimple_assign_rhs1 (stmt);
> +	    this->op1 = gimple_assign_rhs2 (stmt);
> +	    return;
> +	  }
> +      }
> +
> +  this->code = NE_EXPR;
> +  this->op0 = t;
> +  this->op1 = build_zero_cst (TREE_TYPE (t));
> +}
> +
> +

This hashing looks unnecessarily complex.  The values we're hashing are
vector SSA_NAMEs, so I think we should be able to hash and compare them
as a plain pair of pointers.

The type could then be std::pair and the hashing could be done using
pair_hash from hash-traits.h.

Thanks,
Richard


More information about the Gcc-patches mailing list