This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[SVE] PR86753


Hi,
The attached patch tries to fix PR86753.

For following test:
void
f1 (int *restrict x, int *restrict y, int *restrict z)
{
  for (int i = 0; i < 100; ++i)
    x[i] = y[i] ? z[i] : 10;
}

vect dump shows:
  vect_cst__42 = { 0, ... };
  vect_cst__48 = { 0, ... };

  vect__4.7_41 = .MASK_LOAD (vectp_y.5_38, 4B, loop_mask_40);
  _4 = *_3;
  _5 = z_12(D) + _2;
  mask__35.8_43 = vect__4.7_41 != vect_cst__42;
  _35 = _4 != 0;
  vec_mask_and_46 = mask__35.8_43 & loop_mask_40;
  vect_iftmp.11_47 = .MASK_LOAD (vectp_z.9_44, 4B, vec_mask_and_46);
  iftmp.0_13 = 0;
  vect_iftmp.12_50 = VEC_COND_EXPR <vect__4.7_41 != vect_cst__48,
vect_iftmp.11_47, vect_cst__49>;

and following code-gen:
L2:
        ld1w    z0.s, p2/z, [x1, x3, lsl 2]
        cmpne   p1.s, p3/z, z0.s, #0
        cmpne   p0.s, p2/z, z0.s, #0
        ld1w    z0.s, p0/z, [x2, x3, lsl 2]
        sel     z0.s, p1, z0.s, z1.s

We could reuse vec_mask_and_46 in vec_cond_expr since the conditions
vect__4.7_41 != vect_cst__48 and vect__4.7_41 != vect_cst__42
are equivalent, and vect_iftmp.11_47 depends on vect__4.7_41 != vect_cst__48.

I suppose in general for vec_cond_expr <C, T, E> if T comes from masked load,
which is conditional on C, then we could reuse the mask used in load,
in vec_cond_expr ?

The patch maintains a hash_map cond_to_vec_mask
from <cond, loop_mask -> vec_mask (with loop predicate applied).
In prepare_load_store_mask, we record <cond, loop_mask> -> vec_mask & loop_mask,
and in vectorizable_condition, we check if <cond, loop_mask> exists in
cond_to_vec_mask
and if found, the corresponding vec_mask is used as 1st operand of
vec_cond_expr.

<cond, loop_mask> is represented with cond_vmask_key, and the patch
adds tree_cond_ops to represent condition operator and operands coming
either from cond_expr
or a gimple comparison stmt. If the stmt is not comparison, it returns
<ne_expr, lhs, 0> and inserts that into cond_to_vec_mask.

With patch, the redundant p1 is eliminated and sel uses p0 for above test.

For following test:
void
f2 (int *restrict x, int *restrict y, int *restrict z, int fallback)
{
  for (int i = 0; i < 100; ++i)
    x[i] = y[i] ? z[i] : fallback;
}

input to vectorizer has operands swapped in cond_expr:
  _36 = _4 != 0;
  iftmp.0_14 = .MASK_LOAD (_5, 32B, _36);
  iftmp.0_8 = _4 == 0 ? fallback_12(D) : iftmp.0_14;

So we need to check for inverted condition in cond_to_vec_mask,
and swap the operands.
Does the patch look OK so far ?

One major issue remaining with the patch is value  numbering.
Currently, it does value numbering for entire function using sccvn
during start of vect pass, which is too expensive since we only need
block based VN. I am looking into that.

Thanks,
Prathamesh
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index b0cbbac0cb5..bf54f80dd8b 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -8608,6 +8608,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
     {
       basic_block bb = bbs[i];
       stmt_vec_info stmt_info;
+      loop_vinfo->cond_to_vec_mask = new cond_vmask_map_type (8);
 
       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
 	   gsi_next (&si))
@@ -8717,6 +8718,9 @@ vect_transform_loop (loop_vec_info loop_vinfo)
 		}
 	    }
 	}
+
+      delete loop_vinfo->cond_to_vec_mask;
+      loop_vinfo->cond_to_vec_mask = 0;
     }				/* BBs in loop */
 
   /* The vectorization factor is always > 1, so if we use an IV increment of 1.
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 1e2dfe5d22d..862206b3256 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1989,17 +1989,31 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
 
 static tree
 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
-			 gimple_stmt_iterator *gsi)
+			 gimple_stmt_iterator *gsi, tree mask,
+			 cond_vmask_map_type *cond_to_vec_mask)
 {
   gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
   if (!loop_mask)
     return vec_mask;
 
   gcc_assert (TREE_TYPE (loop_mask) == mask_type);
+
+  tree *slot = 0;
+  if (cond_to_vec_mask)
+    {
+      cond_vmask_key cond (mask, loop_mask);
+      slot = &cond_to_vec_mask->get_or_insert (cond);
+      if (*slot)
+	return *slot;
+    }
+
   tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
   gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
 					  vec_mask, loop_mask);
   gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
+
+  if (slot)
+    *slot = and_res;
   return and_res;
 }
 
@@ -3514,8 +3528,10 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 			  gcc_assert (ncopies == 1);
 			  tree mask = vect_get_loop_mask (gsi, masks, vec_num,
 							  vectype_out, i);
+			  tree scalar_mask = gimple_call_arg (gsi_stmt (*gsi), mask_opno);
 			  vargs[mask_opno] = prepare_load_store_mask
-			    (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
+			    (TREE_TYPE (mask), mask, vargs[mask_opno], gsi,
+			     scalar_mask, vinfo->cond_to_vec_mask);
 			}
 
 		      gcall *call;
@@ -3564,9 +3580,11 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 	    {
 	      tree mask = vect_get_loop_mask (gsi, masks, ncopies,
 					      vectype_out, j);
+	      tree scalar_mask = gimple_call_arg (gsi_stmt (*gsi), mask_opno);
 	      vargs[mask_opno]
 		= prepare_load_store_mask (TREE_TYPE (mask), mask,
-					   vargs[mask_opno], gsi);
+					   vargs[mask_opno], gsi,
+					   scalar_mask, vinfo->cond_to_vec_mask);
 	    }
 
 	  if (cfn == CFN_GOMP_SIMD_LANE)
@@ -8109,7 +8127,8 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 					     vectype, j);
 	  if (vec_mask)
 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
-						  vec_mask, gsi);
+						  vec_mask, gsi, mask,
+						  vinfo->cond_to_vec_mask);
 
 	  gcall *call;
 	  if (final_mask)
@@ -8163,7 +8182,8 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 						 vectype, vec_num * j + i);
 	      if (vec_mask)
 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
-						      vec_mask, gsi);
+						      vec_mask, gsi, mask,
+						      vinfo->cond_to_vec_mask);
 
 	      if (memory_access_type == VMAT_GATHER_SCATTER)
 		{
@@ -9304,7 +9324,8 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 					     vectype, j);
 	  if (vec_mask)
 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
-						  vec_mask, gsi);
+						  vec_mask, gsi, mask,
+						  vinfo->cond_to_vec_mask);
 
 	  gcall *call;
 	  if (final_mask)
@@ -9355,7 +9376,8 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 						 vectype, vec_num * j + i);
 	      if (vec_mask)
 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
-						      vec_mask, gsi);
+						      vec_mask, gsi, mask,
+						      vinfo->cond_to_vec_mask);
 
 	      if (i > 0)
 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
@@ -9975,6 +9997,38 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   /* Handle cond expr.  */
   for (j = 0; j < ncopies; j++)
     {
+      tree vec_mask = NULL_TREE;
+
+      if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+	  && TREE_CODE_CLASS (TREE_CODE (cond_expr)) == tcc_comparison
+	  && loop_vinfo->cond_to_vec_mask)
+	{
+	  vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+	  if (masks)
+	    {
+	      tree loop_mask = vect_get_loop_mask (gsi, masks,
+						   ncopies, vectype, j);
+
+	      cond_vmask_key cond (cond_expr, loop_mask);
+	      tree *slot = loop_vinfo->cond_to_vec_mask->get (cond);
+	      if (slot && *slot)
+		vec_mask = *slot;
+	      else
+		{
+		  cond.cond_ops.code
+		    = invert_tree_comparison (cond.cond_ops.code, true);
+		  slot = loop_vinfo->cond_to_vec_mask->get (cond);
+		  if (slot && *slot)
+		    {
+		      vec_mask = *slot;
+		      tree tmp = then_clause;
+		      then_clause = else_clause;
+		      else_clause = tmp;
+		    }
+		}
+	    }
+	}
+
       stmt_vec_info new_stmt_info = NULL;
       if (j == 0)
 	{
@@ -10054,6 +10108,8 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
 	  if (masked)
 	    vec_compare = vec_cond_lhs;
+	  else if (vec_mask)
+	    vec_compare = vec_mask;
 	  else
 	    {
 	      vec_cond_rhs = vec_oprnds1[i];
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index dc181524744..9cf36049a4c 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -82,6 +82,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "opt-problem.h"
 #include "internal-fn.h"
 
+#include "dominance.h"
+#include "tree-ssa-sccvn.h"
 
 /* Loop or bb location, with hotness information.  */
 dump_user_location_t vect_location;
@@ -461,7 +463,8 @@ vec_info::vec_info (vec_info::vec_kind kind_in, void *target_cost_data_in,
 		    vec_info_shared *shared_)
   : kind (kind_in),
     shared (shared_),
-    target_cost_data (target_cost_data_in)
+    target_cost_data (target_cost_data_in),
+    cond_to_vec_mask (0)
 {
   stmt_vec_infos.create (50);
 }
@@ -1033,7 +1036,6 @@ try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
 			       vect_loop_dist_alias_call (loop));
 }
 
-
 /* Function vectorize_loops.
 
    Entry point to loop vectorization phase.  */
@@ -1050,6 +1052,9 @@ vectorize_loops (void)
   bool any_ifcvt_loops = false;
   unsigned ret = 0;
 
+  calculate_dominance_info (CDI_DOMINATORS);
+  run_rpo_vn (VN_WALK);
+
   vect_loops_num = number_of_loops (cfun);
 
   /* Bail out if there are no loops.  */
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 1456cde4c2c..72e35f8b751 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -26,6 +26,8 @@ typedef class _stmt_vec_info *stmt_vec_info;
 #include "tree-data-ref.h"
 #include "tree-hash-traits.h"
 #include "target.h"
+#include "tree-ssa-sccvn.h"
+#include "hash-map.h"
 
 /* Used for naming of new temporaries.  */
 enum vect_var_kind {
@@ -193,6 +195,92 @@ public:
   poly_uint64 min_value;
 };
 
+struct cond_vmask_key
+{
+  cond_vmask_key (tree t, tree loop_mask_)
+    : cond_ops (t), loop_mask (loop_mask_)
+  {}
+
+  static unsigned get_value_id (tree x)
+  {
+    if (TREE_CONSTANT (x))
+      return get_constant_value_id (x);
+    return VN_INFO (x)->value_id;
+  }
+
+  hashval_t hash () const
+  {
+    inchash::hash h;
+    h.add_int (cond_ops.code);
+    h.add_int (get_value_id (cond_ops.op0));
+    h.add_int (get_value_id (cond_ops.op1));
+    h.add_int (SSA_NAME_VERSION (loop_mask));
+    return h.end ();
+  }
+
+  void mark_empty ()
+  {
+    loop_mask = NULL_TREE;
+  }
+
+  bool is_empty ()
+  {
+    return loop_mask == NULL_TREE;
+  }
+
+  tree_cond_ops cond_ops;
+  tree loop_mask;
+};
+
+inline bool operator== (const cond_vmask_key &c1, const cond_vmask_key &c2)
+{
+    return c1.loop_mask == c2.loop_mask
+	   && c1.cond_ops.code == c2.cond_ops.code
+	   && cond_vmask_key::get_value_id (c1.cond_ops.op0)
+	      == cond_vmask_key::get_value_id (c2.cond_ops.op0)
+	   && cond_vmask_key::get_value_id (c1.cond_ops.op1)
+	      == cond_vmask_key::get_value_id (c2.cond_ops.op1);
+}
+
+struct cond_vmask_key_traits
+{
+  typedef cond_vmask_key value_type;
+  typedef cond_vmask_key compare_type;
+
+  static inline hashval_t hash (value_type v)
+  {
+    return v.hash ();
+  }
+
+  static inline bool equal (value_type existing, value_type candidate)
+  {
+    return existing == candidate;
+  }
+
+  static inline void mark_empty (value_type& v)
+  {
+    v.mark_empty ();
+  }
+
+  static inline bool is_empty (value_type v)
+  {
+    return v.is_empty ();
+  }
+
+  static void mark_deleted (value_type&) {}
+
+  static inline bool is_deleted (value_type)
+  {
+    return false;
+  }
+
+  static inline void remove (value_type &) {}
+};
+
+typedef hash_map<cond_vmask_key, tree,
+		 simple_hashmap_traits <cond_vmask_key_traits, tree> >
+  cond_vmask_map_type;
+
 /* Vectorizer state shared between different analyses like vector sizes
    of the same CFG region.  */
 class vec_info_shared {
@@ -255,6 +343,8 @@ public:
   /* Cost data used by the target cost model.  */
   void *target_cost_data;
 
+  cond_vmask_map_type *cond_to_vec_mask;
+
 private:
   stmt_vec_info new_stmt_vec_info (gimple *stmt);
   void set_vinfo_for_stmt (gimple *, stmt_vec_info);
diff --git a/gcc/tree.c b/gcc/tree.c
index 8f80012c6e8..32a8fcf1eb8 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -15204,6 +15204,44 @@ max_object_size (void)
   return TYPE_MAX_VALUE (ptrdiff_type_node);
 }
 
+/* If code(T) is comparison op or def of comparison stmt,
+   extract it's operands.
+   Else return <NE_EXPR, T, 0>.  */
+
+tree_cond_ops::tree_cond_ops (tree t)
+{
+  if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
+    {
+      this->code = TREE_CODE (t);
+      this->op0 = TREE_OPERAND (t, 0);
+      this->op1 = TREE_OPERAND (t, 1);
+      return;
+    }
+
+  else if (TREE_CODE (t) == SSA_NAME)
+    {
+      gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t));
+      if (stmt)
+	{
+	  tree_code code = gimple_assign_rhs_code (stmt);
+	  if (TREE_CODE_CLASS (code) == tcc_comparison)
+	    {
+	      this->code = code;
+	      this->op0 = gimple_assign_rhs1 (stmt);
+	      this->op1 = gimple_assign_rhs2 (stmt);
+	      return;
+	    }
+	}
+
+      this->code = NE_EXPR;
+      this->op0 = t;
+      this->op1 = build_zero_cst (TREE_TYPE (t));
+    }
+
+  else
+    gcc_unreachable ();
+}
+
 #if CHECKING_P
 
 namespace selftest {
diff --git a/gcc/tree.h b/gcc/tree.h
index 94dbb95a78a..6b9385129a8 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -6141,4 +6141,14 @@ public:
   operator location_t () const { return m_combined_loc; }
 };
 
+struct tree_cond_ops
+{
+  tree_code code;
+  tree op0;
+  tree op1;
+
+  tree_cond_ops (tree);
+};
+
+
 #endif  /* GCC_TREE_H  */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]