[PATCH 2/4]middle-end: lower COND_EXPR into gimple form in vect_recog_bool_pattern
Richard Biener
rguenther@suse.de
Fri Sep 6 13:08:57 GMT 2024
On Tue, 3 Sep 2024, Tamar Christina wrote:
> Hi All,
>
> Currently the vectorizer cheats when lowering COND_EXPR during bool recog.
> In the cases where the conditonal is loop invariant or non-boolean it instead
> converts the operation back into GENERIC and hides much of the operation from
> the analysis part of the vectorizer.
>
> i.e.
>
> a ? b : c
>
> is transformed into:
>
> a != 0 ? b : c
>
> however by doing so we can't perform any optimization on the mask as they aren't
> explicit until quite late during codegen.
>
> To fix this this patch lowers booleans earlier and so ensures that we are always
> in GIMPLE.
>
> For when the value is a loop invariant boolean we have to generate an additional
> conversion from bool to the integer mask form.
>
> This is done by creating a loop invariant a ? -1 : 0 with the target mask
> precision and then doing a normal != 0 comparison on that.
>
> To support this the patch also adds the ability to during pattern matching
> create a loop invariant pattern that won't be seen by the vectorizer and will
> instead me materialized inside the loop preheader in the case of loops, or in
> the case of BB vectorization it materializes it in the first BB in the region.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu, arm-none-linux-gnueabihf,
> x86_64-pc-linux-gnu -m32, -m64 and no issues.
>
> Ok for master?
OK, but can you clarify a question below?
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> * tree-vect-patterns.cc (append_inv_pattern_def_seq): New.
> (vect_recog_bool_pattern): Lower COND_EXPRs.
> * tree-vect-slp.cc (vect_schedule_slp): Materialize loop invariant
> statements.
> * tree-vect-loop.cc (vect_transform_loop): Likewise.
> * tree-vect-stmts.cc (vectorizable_comparison_1): Remove
> VECT_SCALAR_BOOLEAN_TYPE_P handling for vectype.
> * tree-vectorizer.cc (vec_info::vec_info): Initialize
> inv_pattern_def_seq.
> * tree-vectorizer.h (LOOP_VINFO_INV_PATTERN_DEF_SEQ): New.
> (class vec_info): Add inv_pattern_def_seq.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/vect/bb-slp-conditional_store_1.c: New test.
> * gcc.dg/vect/vect-conditional_store_5.c: New test.
> * gcc.dg/vect/vect-conditional_store_6.c: New test.
>
> ---
> diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-conditional_store_1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-conditional_store_1.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..650a3bfbfb1dd44afc2d58bbe85f75f1d28b9bd0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-conditional_store_1.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-require-effective-target vect_float } */
> +
> +/* { dg-additional-options "-mavx2" { target avx2 } } */
> +/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
> +
> +void foo3 (float *restrict a, int *restrict c)
> +{
> +#pragma GCC unroll 8
> + for (int i = 0; i < 8; i++)
> + c[i] = a[i] > 1.0;
> +}
> +
> +/* { dg-final { scan-tree-dump "vectorized using SLP" "slp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_5.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_5.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..37d60fa76351c13980427751be4450c14617a9a9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_5.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-require-effective-target vect_masked_store } */
> +
> +/* { dg-additional-options "-mavx2" { target avx2 } } */
> +/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
> +
> +#include <stdbool.h>
> +
> +void foo3 (float *restrict a, int *restrict b, int *restrict c, int n, int stride)
> +{
> + if (stride <= 1)
> + return;
> +
> + bool ai = a[0];
> +
> + for (int i = 0; i < n; i++)
> + {
> + int res = c[i];
> + int t = b[i+stride];
> + if (ai)
> + t = res;
> + c[i] = t;
> + }
> +}
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { target aarch64-*-* } } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-conditional_store_6.c b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_6.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..5e1aedf3726b073c132bb64a9b474592ceb8e9b9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-conditional_store_6.c
> @@ -0,0 +1,24 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-require-effective-target vect_masked_store } */
> +
> +/* { dg-additional-options "-mavx2" { target avx2 } } */
> +/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
> +
> +void foo3 (unsigned long long *restrict a, int *restrict b, int *restrict c, int n, int stride)
> +{
> + if (stride <= 1)
> + return;
> +
> + for (int i = 0; i < n; i++)
> + {
> + int res = c[i];
> + int t = b[i+stride];
> + if (a[i])
> + t = res;
> + c[i] = t;
> + }
> +}
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +/* { dg-final { scan-tree-dump-not "VEC_COND_EXPR " "vect" { target aarch64-*-* } } } */
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 6456220cdc9bb0ba35baf04c555060ea38d13bbc..e4bed1f88435cb6ebad5651a266a9c74106500c0 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -12404,6 +12404,18 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
> vect_schedule_slp (loop_vinfo, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
> }
>
> + /* Generate the loop invariant statements. */
> + if (!gimple_seq_empty_p (LOOP_VINFO_INV_PATTERN_DEF_SEQ (loop_vinfo)))
> + {
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location,
> + "------>generating loop invariant statements\n");
> + gimple_stmt_iterator gsi;
> + gsi = gsi_after_labels (loop_preheader_edge (loop)->src);
> + gsi_insert_seq_before (&gsi, LOOP_VINFO_INV_PATTERN_DEF_SEQ (loop_vinfo),
> + GSI_CONTINUE_LINKING);
> + }
> +
So this is one instance ...
> /* FORNOW: the vectorizer supports only loops which body consist
> of one basic block (header + empty latch). When the vectorizer will
> support more involved loop forms, the order by which the BBs are
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index 4b112910df357e9f2783f7173b71812085126389..3aae6a066954d9e1e0d41803dd43307fa297af67 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -182,6 +182,17 @@ append_pattern_def_seq (vec_info *vinfo,
> new_stmt);
> }
>
> +
> +/* Add NEW_STMT to VINFO's invariant pattern definition statements. These
> + statements are not vectorized but are materialized as scalar in the loop
> + preheader. */
> +
> +static inline void
> +append_inv_pattern_def_seq (vec_info *vinfo, gimple *new_stmt)
> +{
> + gimple_seq_add_stmt_without_update (&vinfo->inv_pattern_def_seq, new_stmt);
> +}
> +
> /* The caller wants to perform new operations on vect_external variable
> VAR, so that the result of the operations would also be vect_external.
> Return the edge on which the operations can be performed, if one exists.
> @@ -5983,12 +5994,34 @@ vect_recog_bool_pattern (vec_info *vinfo,
> var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
> else if (integer_type_for_mask (var, vinfo))
> return NULL;
> + else if (TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE
> + && !vect_get_internal_def (vinfo, var))
> + {
> + /* If the condition is already a boolean then manually convert it to a
> + mask of the given integer type but don't set a vectype. */
> + tree lhs_ivar = vect_recog_temp_ssa_var (type, NULL);
> + pattern_stmt = gimple_build_assign (lhs_ivar, COND_EXPR, var,
> + build_all_ones_cst (type),
> + build_zero_cst (type));
> + append_inv_pattern_def_seq (vinfo, pattern_stmt);
> + var = lhs_ivar;
> + }
> +
> + tree lhs_var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
> + pattern_stmt = gimple_build_assign (lhs_var, NE_EXPR, var,
> + build_zero_cst (TREE_TYPE (var)));
> +
> + tree new_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (var));
> + if (!new_vectype)
> + return NULL;
> +
> + new_vectype = truth_type_for (new_vectype);
> + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype,
> + TREE_TYPE (var));
>
> lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
> pattern_stmt
> - = gimple_build_assign (lhs, COND_EXPR,
> - build2 (NE_EXPR, boolean_type_node,
> - var, build_int_cst (TREE_TYPE (var), 0)),
> + = gimple_build_assign (lhs, COND_EXPR, lhs_var,
> gimple_assign_rhs2 (last_stmt),
> gimple_assign_rhs3 (last_stmt));
> *type_out = vectype;
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 43ecd2689701451b706b41d73ba60773af4cf8a5..1ea836ca8fe1d4867504c6da42a40c22b6638385 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -10393,4 +10393,23 @@ vect_schedule_slp (vec_info *vinfo, const vec<slp_instance> &slp_instances)
> SLP_TREE_REPRESENTATIVE (root) = NULL;
> }
> }
> +
> + /* Generate the invariant statements. */
> + if (!gimple_seq_empty_p (vinfo->inv_pattern_def_seq))
> + {
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location,
> + "------>generating invariant statements\n");
> + gimple_stmt_iterator gsi;
> + basic_block bb_inv = vinfo->bbs[0];
> + loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
> + if (loop_vinfo)
> + bb_inv = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo))->src;
> +
> + gsi = gsi_after_labels (bb_inv);
> + gsi_insert_seq_before (&gsi, vinfo->inv_pattern_def_seq,
> + GSI_CONTINUE_LINKING);
> + vinfo->inv_pattern_def_seq = NULL;
> + }
... and this another. This one is only required for BB vectorization?
In that context it should probably best be done from vect_slp_region
before the vect_schedule_slp call or at least guarded with
BB vect?
I suspect you ran into this only for BB vectorization from loop
vect as that runs on if-converted code?
Thanks for clarifying.
Richard.
> +
> }
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 385e63163c2450b1cee9f3c2e5df11636116ec2d..c761360d2e80bbf25f55c00ee69bbadfcc9128e1 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -12652,11 +12652,7 @@ vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
> /* Invariant comparison. */
> if (!vectype)
> {
> - if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
> - vectype = mask_type;
> - else
> - vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
> - slp_node);
> + vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1), slp_node);
> if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
> return false;
> }
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index df6c8ada2f7814ac1ea89913e881dd659bd2da62..d3389767325229e953212236c248c30697bbce0d 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -509,6 +509,12 @@ public:
> /* The count of the basic blocks in the vectorization region. */
> unsigned int nbbs;
>
> + /* Used to keep a sequence of def stmts of a pattern stmt that are loop
> + invariant if they exists.
> + The sequence is emitted in the loop preheader should the loop be vectorized
> + and are reset when undoing patterns. */
> + gimple_seq inv_pattern_def_seq;
> +
> private:
> stmt_vec_info new_stmt_vec_info (gimple *stmt);
> void set_vinfo_for_stmt (gimple *, stmt_vec_info, bool = true);
> @@ -1039,6 +1045,7 @@ public:
> #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info
> #define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond
> #define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor
> +#define LOOP_VINFO_INV_PATTERN_DEF_SEQ(L) (L)->inv_pattern_def_seq
>
> #define LOOP_VINFO_FULLY_MASKED_P(L) \
> (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L) \
> diff --git a/gcc/tree-vectorizer.cc b/gcc/tree-vectorizer.cc
> index 1fb4fb36ed44d11baea2d3db8eedf9685be344e8..70b83720fe28b8c8c567f68df1ceb308d49043dc 100644
> --- a/gcc/tree-vectorizer.cc
> +++ b/gcc/tree-vectorizer.cc
> @@ -465,7 +465,8 @@ vec_info::vec_info (vec_info::vec_kind kind_in, vec_info_shared *shared_)
> shared (shared_),
> stmt_vec_info_ro (false),
> bbs (NULL),
> - nbbs (0)
> + nbbs (0),
> + inv_pattern_def_seq (NULL)
> {
> stmt_vec_infos.create (50);
> }
>
>
>
>
>
--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
More information about the Gcc-patches
mailing list