[PATCH] tree-optimization/117559 - avoid hybrid SLP for masked load/store lanes
Richard Biener
rguenther@suse.de
Wed Nov 13 13:07:12 GMT 2024
Hybrid analysis is confused by the mask_conversion pattern making a
uniform mask non-uniform. As load/store lanes only uses a single
lane to mask all data lanes the SLP graph doesn't cover the alternate
(redundant) mask lanes and thus their pattern defs. The following adds
a hack to mark them covered.
Fixes gcc.target/aarch64/sve/mask_struct_store_?.c with forced SLP.
Bootstrap and regtest running on x86_64-unknown-linux-gnu.
PR tree-optimization/117559
* tree-vect-slp.cc (vect_mark_slp_stmts): Pass in vinfo,
mark all mask defs of a load/store-lane .MASK_LOAD/STORE
as pure.
(vect_make_slp_decision): Adjust.
(vect_slp_analyze_bb_1): Likewise.
---
gcc/tree-vect-slp.cc | 37 ++++++++++++++++++++++++++++++-------
1 file changed, 30 insertions(+), 7 deletions(-)
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 02ec3bc28b4..99f305bcf48 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -3200,7 +3200,8 @@ debug (slp_instance instance)
/* Mark the tree rooted at NODE with PURE_SLP. */
static void
-vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
+vect_mark_slp_stmts (vec_info *vinfo, slp_tree node,
+ hash_set<slp_tree> &visited)
{
int i;
stmt_vec_info stmt_info;
@@ -3214,18 +3215,40 @@ vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
if (stmt_info)
- STMT_SLP_TYPE (stmt_info) = pure_slp;
+ {
+ STMT_SLP_TYPE (stmt_info) = pure_slp;
+ /* ??? For .MASK_LOAD and .MASK_STORE detected as load/store-lanes
+ when there is the mask_conversion pattern applied we have lost the
+ alternate lanes of the uniform mask which nevertheless
+ have separate pattern defs. To not confuse hybrid
+ analysis we mark those as covered as well here. */
+ if (node->ldst_lanes)
+ if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
+ if (gimple_call_internal_p (call, IFN_MASK_LOAD)
+ || gimple_call_internal_p (call, IFN_MASK_STORE))
+ {
+ tree mask = gimple_call_arg (call,
+ internal_fn_mask_index
+ (gimple_call_internal_fn (call)));
+ if (TREE_CODE (mask) == SSA_NAME)
+ if (stmt_vec_info mask_info = vinfo->lookup_def (mask))
+ {
+ mask_info = vect_stmt_to_vectorize (mask_info);
+ STMT_SLP_TYPE (mask_info) = pure_slp;
+ }
+ }
+ }
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
if (child)
- vect_mark_slp_stmts (child, visited);
+ vect_mark_slp_stmts (vinfo, child, visited);
}
static void
-vect_mark_slp_stmts (slp_tree node)
+vect_mark_slp_stmts (vec_info *vinfo, slp_tree node)
{
hash_set<slp_tree> visited;
- vect_mark_slp_stmts (node, visited);
+ vect_mark_slp_stmts (vinfo, node, visited);
}
/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
@@ -7407,7 +7430,7 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
/* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
loop-based vectorization. Such stmts will be marked as HYBRID. */
- vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
+ vect_mark_slp_stmts (loop_vinfo, SLP_INSTANCE_TREE (instance));
decided_to_slp++;
}
@@ -9341,7 +9364,7 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
/* Mark all the statements that we want to vectorize as pure SLP and
relevant. */
- vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
+ vect_mark_slp_stmts (bb_vinfo, SLP_INSTANCE_TREE (instance));
vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
unsigned j;
stmt_vec_info root;
--
2.43.0
More information about the Gcc-patches
mailing list