[PATCH] tree-optimization/117559 - avoid hybrid SLP for masked load/store lanes

Richard Biener rguenther@suse.de
Wed Nov 13 13:07:12 GMT 2024


Hybrid analysis is confused by the mask_conversion pattern making a
uniform mask non-uniform.  As load/store lanes only uses a single
lane to mask all data lanes the SLP graph doesn't cover the alternate
(redundant) mask lanes and thus their pattern defs.  The following adds
a hack to mark them covered.

Fixes gcc.target/aarch64/sve/mask_struct_store_?.c with forced SLP.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

	PR tree-optimization/117559
	* tree-vect-slp.cc (vect_mark_slp_stmts): Pass in vinfo,
	mark all mask defs of a load/store-lane .MASK_LOAD/STORE
	as pure.
	(vect_make_slp_decision): Adjust.
	(vect_slp_analyze_bb_1): Likewise.
---
 gcc/tree-vect-slp.cc | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 02ec3bc28b4..99f305bcf48 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -3200,7 +3200,8 @@ debug (slp_instance instance)
 /* Mark the tree rooted at NODE with PURE_SLP.  */
 
 static void
-vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
+vect_mark_slp_stmts (vec_info *vinfo, slp_tree node,
+		     hash_set<slp_tree> &visited)
 {
   int i;
   stmt_vec_info stmt_info;
@@ -3214,18 +3215,40 @@ vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
 
   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
     if (stmt_info)
-      STMT_SLP_TYPE (stmt_info) = pure_slp;
+      {
+	STMT_SLP_TYPE (stmt_info) = pure_slp;
+	/* ???  For .MASK_LOAD and .MASK_STORE detected as load/store-lanes
+	   when there is the mask_conversion pattern applied we have lost the
+	   alternate lanes of the uniform mask which nevertheless
+	   have separate pattern defs.  To not confuse hybrid
+	   analysis we mark those as covered as well here.  */
+	if (node->ldst_lanes)
+	  if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
+	    if (gimple_call_internal_p (call, IFN_MASK_LOAD)
+		|| gimple_call_internal_p (call, IFN_MASK_STORE))
+	      {
+		tree mask = gimple_call_arg (call,
+					     internal_fn_mask_index
+					     (gimple_call_internal_fn (call)));
+		if (TREE_CODE (mask) == SSA_NAME)
+		  if (stmt_vec_info mask_info = vinfo->lookup_def (mask))
+		    {
+		      mask_info = vect_stmt_to_vectorize (mask_info);
+		      STMT_SLP_TYPE (mask_info) = pure_slp;
+		    }
+	      }
+      }
 
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
     if (child)
-      vect_mark_slp_stmts (child, visited);
+      vect_mark_slp_stmts (vinfo, child, visited);
 }
 
 static void
-vect_mark_slp_stmts (slp_tree node)
+vect_mark_slp_stmts (vec_info *vinfo, slp_tree node)
 {
   hash_set<slp_tree> visited;
-  vect_mark_slp_stmts (node, visited);
+  vect_mark_slp_stmts (vinfo, node, visited);
 }
 
 /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
@@ -7407,7 +7430,7 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
 	 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
 	 loop-based vectorization.  Such stmts will be marked as HYBRID.  */
-      vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
+      vect_mark_slp_stmts (loop_vinfo, SLP_INSTANCE_TREE (instance));
       decided_to_slp++;
     }
 
@@ -9341,7 +9364,7 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
 
       /* Mark all the statements that we want to vectorize as pure SLP and
 	 relevant.  */
-      vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
+      vect_mark_slp_stmts (bb_vinfo, SLP_INSTANCE_TREE (instance));
       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
       unsigned j;
       stmt_vec_info root;
-- 
2.43.0


More information about the Gcc-patches mailing list