This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [mask-load, patch 1/2] Use boolean predicate for masked loads and store
- From: Richard Biener <richard dot guenther at gmail dot com>
- To: Ilya Enkovich <enkovich dot gnu at gmail dot com>
- Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Tue, 3 Nov 2015 14:40:50 +0100
- Subject: Re: [mask-load, patch 1/2] Use boolean predicate for masked loads and store
- Authentication-results: sourceware.org; auth=none
- References: <20151008154029 dot GH63757 at msticlxl57 dot ims dot intel dot com> <CAFiYyc0ZsaGUhoqxjeFnDs7LOTn9-pVQve2HKP-BY9ygN71Zwg at mail dot gmail dot com> <CAMbmDYb1jvdHuR-qpc=DngjRJgPcKsWz99sgi9S_zRwG6fBPOA at mail dot gmail dot com> <CAFiYyc2rHBcdd9toVqoyDy6gL8RCkOQkLdeb-Jhqrhr0XGN=Rg at mail dot gmail dot com> <CAMbmDYZ4HcBEUZRgKcgUCJTFfqCPBf3e0AE3woR6H4TXdsdPtA at mail dot gmail dot com> <20151028152348 dot GA63456 at msticlxl57 dot ims dot intel dot com>
On Wed, Oct 28, 2015 at 4:23 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
> On 23 Oct 13:36, Ilya Enkovich wrote:
>> 2015-10-23 13:32 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>> >
>> > No, we'd get
>> >
>> > mask_1 = bool != 1;
>> >
>> > and the 'mask' variable should have been simplified to 'bool'
>> > (yes, we'd insert a dead stmt). gimple_build simplifies
>> > stmts via the match-and-simplify machinery and match.pd
>> > knows how to invert conditions.
>> >
>>
>> Thanks! I'll try it.
>>
>> Ilya
>
> Hi,
>
> Here is a new version. Changes you suggested cause BIT_NOT_EXPR used for generated mask (instead of != 1 used before). It required a small fix to get it vectorized to avoid regressions. Is this version OK?
Ok.
Thanks,
Richard.
> Thanks,
> Ilya
> --
> gcc/
>
> 2015-10-28 Ilya Enkovich <enkovich.gnu@gmail.com>
>
> * internal-fn.c (expand_MASK_LOAD): Adjust to maskload optab changes.
> (expand_MASK_STORE): Adjust to maskstore optab changes.
> * optabs-query.c (can_vec_mask_load_store_p): Add MASK_MODE arg.
> Adjust to maskload, maskstore optab changes.
> * optabs-query.h (can_vec_mask_load_store_p): Add MASK_MODE arg.
> * optabs.def (maskload_optab): Transform into convert optab.
> (maskstore_optab): Likewise.
> * tree-if-conv.c (ifcvt_can_use_mask_load_store): Adjust to
> can_vec_mask_load_store_p signature change.
> (predicate_mem_writes): Use boolean mask.
> * tree-vect-stmts.c (vectorizable_mask_load_store): Adjust to
> can_vec_mask_load_store_p signature change. Allow invariant masks.
> (vectorizable_operation): Ignore type precision for boolean vectors.
>
> gcc/testsuite/
>
> 2015-10-28 Ilya Enkovich <enkovich.gnu@gmail.com>
>
> * gcc.target/i386/avx2-vec-mask-bit-not.c: New test.
>
>
> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index f12d3af..2317e20 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -1901,7 +1901,9 @@ expand_MASK_LOAD (gcall *stmt)
> create_output_operand (&ops[0], target, TYPE_MODE (type));
> create_fixed_operand (&ops[1], mem);
> create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
> - expand_insn (optab_handler (maskload_optab, TYPE_MODE (type)), 3, ops);
> + expand_insn (convert_optab_handler (maskload_optab, TYPE_MODE (type),
> + TYPE_MODE (TREE_TYPE (maskt))),
> + 3, ops);
> }
>
> static void
> @@ -1924,7 +1926,9 @@ expand_MASK_STORE (gcall *stmt)
> create_fixed_operand (&ops[0], mem);
> create_input_operand (&ops[1], reg, TYPE_MODE (type));
> create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
> - expand_insn (optab_handler (maskstore_optab, TYPE_MODE (type)), 3, ops);
> + expand_insn (convert_optab_handler (maskstore_optab, TYPE_MODE (type),
> + TYPE_MODE (TREE_TYPE (maskt))),
> + 3, ops);
> }
>
> static void
> diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c
> index 254089f..c20597c 100644
> --- a/gcc/optabs-query.c
> +++ b/gcc/optabs-query.c
> @@ -466,7 +466,9 @@ can_mult_highpart_p (machine_mode mode, bool uns_p)
> /* Return true if target supports vector masked load/store for mode. */
>
> bool
> -can_vec_mask_load_store_p (machine_mode mode, bool is_load)
> +can_vec_mask_load_store_p (machine_mode mode,
> + machine_mode mask_mode,
> + bool is_load)
> {
> optab op = is_load ? maskload_optab : maskstore_optab;
> machine_mode vmode;
> @@ -474,7 +476,7 @@ can_vec_mask_load_store_p (machine_mode mode, bool is_load)
>
> /* If mode is vector mode, check it directly. */
> if (VECTOR_MODE_P (mode))
> - return optab_handler (op, mode) != CODE_FOR_nothing;
> + return convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing;
>
> /* Otherwise, return true if there is some vector mode with
> the mask load/store supported. */
> @@ -485,7 +487,12 @@ can_vec_mask_load_store_p (machine_mode mode, bool is_load)
> if (!VECTOR_MODE_P (vmode))
> return false;
>
> - if (optab_handler (op, vmode) != CODE_FOR_nothing)
> + mask_mode = targetm.vectorize.get_mask_mode (GET_MODE_NUNITS (vmode),
> + GET_MODE_SIZE (vmode));
> + if (mask_mode == VOIDmode)
> + return false;
> +
> + if (convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
> return true;
>
> vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
> @@ -496,8 +503,10 @@ can_vec_mask_load_store_p (machine_mode mode, bool is_load)
> if (cur <= GET_MODE_SIZE (mode))
> continue;
> vmode = mode_for_vector (mode, cur / GET_MODE_SIZE (mode));
> + mask_mode = targetm.vectorize.get_mask_mode (GET_MODE_NUNITS (vmode),
> + cur);
> if (VECTOR_MODE_P (vmode)
> - && optab_handler (op, vmode) != CODE_FOR_nothing)
> + && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
> return true;
> }
> return false;
> diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
> index 81ac362..162d2e9 100644
> --- a/gcc/optabs-query.h
> +++ b/gcc/optabs-query.h
> @@ -140,7 +140,7 @@ enum insn_code find_widening_optab_handler_and_mode (optab, machine_mode,
> machine_mode, int,
> machine_mode *);
> int can_mult_highpart_p (machine_mode, bool);
> -bool can_vec_mask_load_store_p (machine_mode, bool);
> +bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool);
> bool can_compare_and_swap_p (machine_mode, bool);
> bool can_atomic_exchange_p (machine_mode, bool);
> bool lshift_cheap_p (bool);
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 1f9c1cf..9804378 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -63,6 +63,8 @@ OPTAB_CD(vcond_optab, "vcond$a$b")
> OPTAB_CD(vcondu_optab, "vcondu$a$b")
> OPTAB_CD(vec_cmp_optab, "vec_cmp$a$b")
> OPTAB_CD(vec_cmpu_optab, "vec_cmpu$a$b")
> +OPTAB_CD(maskload_optab, "maskload$a$b")
> +OPTAB_CD(maskstore_optab, "maskstore$a$b")
>
> OPTAB_NL(add_optab, "add$P$a3", PLUS, "add", '3', gen_int_fp_fixed_libfunc)
> OPTAB_NX(add_optab, "add$F$a3")
> @@ -266,8 +268,6 @@ OPTAB_D (udot_prod_optab, "udot_prod$I$a")
> OPTAB_D (usum_widen_optab, "widen_usum$I$a3")
> OPTAB_D (usad_optab, "usad$I$a")
> OPTAB_D (ssad_optab, "ssad$I$a")
> -OPTAB_D (maskload_optab, "maskload$a")
> -OPTAB_D (maskstore_optab, "maskstore$a")
> OPTAB_D (vec_extract_optab, "vec_extract$a")
> OPTAB_D (vec_init_optab, "vec_init$a")
> OPTAB_D (vec_pack_sfix_trunc_optab, "vec_pack_sfix_trunc_$a")
> diff --git a/gcc/testsuite/gcc.target/i386/avx2-vec-mask-bit-not.c b/gcc/testsuite/gcc.target/i386/avx2-vec-mask-bit-not.c
> new file mode 100644
> index 0000000..0c946ca
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx2-vec-mask-bit-not.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target avx2 } */
> +/* { dg-options "-mavx2 -O3 -fopenmp-simd -fdump-tree-vect-details" } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
> +
> +#define N 1024
> +
> +int a[N], b[N], c[N], d[N], e[N];
> +
> +void
> +test (void)
> +{
> + int i;
> + #pragma omp simd
> + for (i = 0; i < N; i++)
> + if (!(a[i] > b[i] && c[i] < d[i]))
> + e[i] = 0;
> +}
> diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
> index f201ab5..50e959f 100644
> --- a/gcc/tree-if-conv.c
> +++ b/gcc/tree-if-conv.c
> @@ -811,7 +811,7 @@ ifcvt_can_use_mask_load_store (gimple *stmt)
> || VECTOR_MODE_P (mode))
> return false;
>
> - if (can_vec_mask_load_store_p (mode, is_load))
> + if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
> return true;
>
> return false;
> @@ -2068,8 +2068,9 @@ predicate_mem_writes (loop_p loop)
> {
> tree lhs = gimple_assign_lhs (stmt);
> tree rhs = gimple_assign_rhs1 (stmt);
> - tree ref, addr, ptr, masktype, mask_op0, mask_op1, mask;
> + tree ref, addr, ptr, mask;
> gimple *new_stmt;
> + gimple_seq stmts = NULL;
> int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (lhs)));
> ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
> mark_addressable (ref);
> @@ -2082,16 +2083,27 @@ predicate_mem_writes (loop_p loop)
> mask = vect_masks[index];
> else
> {
> - masktype = build_nonstandard_integer_type (bitsize, 1);
> - mask_op0 = build_int_cst (masktype, swap ? 0 : -1);
> - mask_op1 = build_int_cst (masktype, swap ? -1 : 0);
> - cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
> - is_gimple_condexpr,
> - NULL_TREE,
> - true, GSI_SAME_STMT);
> - mask = fold_build_cond_expr (masktype, unshare_expr (cond),
> - mask_op0, mask_op1);
> - mask = ifc_temp_var (masktype, mask, &gsi);
> + if (COMPARISON_CLASS_P (cond))
> + mask = gimple_build (&stmts, TREE_CODE (cond),
> + boolean_type_node,
> + TREE_OPERAND (cond, 0),
> + TREE_OPERAND (cond, 1));
> + else
> + {
> + gcc_assert (TREE_CODE (cond) == SSA_NAME);
> + mask = cond;
> + }
> +
> + if (swap)
> + {
> + tree true_val
> + = constant_boolean_node (true, TREE_TYPE (mask));
> + mask = gimple_build (&stmts, BIT_XOR_EXPR,
> + TREE_TYPE (mask), mask, true_val);
> + }
> + gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
> +
> + mask = ifc_temp_var (TREE_TYPE (mask), mask, &gsi);
> /* Save mask and its size for further use. */
> vect_sizes.safe_push (bitsize);
> vect_masks.safe_push (mask);
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 9413197..195ecf8 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -1708,6 +1708,7 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
> bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
> struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
> tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> + tree mask_vectype;
> tree elem_type;
> gimple *new_stmt;
> tree dummy;
> @@ -1734,8 +1735,8 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
>
> is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
> mask = gimple_call_arg (stmt, 2);
> - if (TYPE_PRECISION (TREE_TYPE (mask))
> - != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
> +
> + if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
> return false;
>
> /* FORNOW. This restriction should be relaxed. */
> @@ -1764,6 +1765,18 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
> if (STMT_VINFO_STRIDED_P (stmt_info))
> return false;
>
> + if (TREE_CODE (mask) != SSA_NAME)
> + return false;
> +
> + if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
> + return false;
> +
> + if (!mask_vectype)
> + mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
> +
> + if (!mask_vectype)
> + return false;
> +
> if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> {
> gimple *def_stmt;
> @@ -1795,13 +1808,9 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
> : DR_STEP (dr), size_zero_node) <= 0)
> return false;
> else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
> - || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
> - return false;
> -
> - if (TREE_CODE (mask) != SSA_NAME)
> - return false;
> -
> - if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt))
> + || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
> + TYPE_MODE (mask_vectype),
> + !is_store))
> return false;
>
> if (is_store)
> @@ -4702,8 +4711,9 @@ vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
>
> /* Most operations cannot handle bit-precision types without extra
> truncations. */
> - if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
> - != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
> + if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
> + && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
> + != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
> /* Exception are bitwise binary operations. */
> && code != BIT_IOR_EXPR
> && code != BIT_XOR_EXPR