[RFC] Partial vectors for s390
Richard Sandiford
richard.sandiford@arm.com
Tue Oct 26 14:18:34 GMT 2021
Robin Dapp <rdapp@linux.ibm.com> writes:
> Hi Richard,
>
>> We already have code to probe the predicates of the underlying
>> define_expands/insns to see whether they support certain constant
>> IFN arguments; see e.g. internal_gather_scatter_fn_supported_p.
>> We could do something similar here: add an extra operand to the optab,
>> and an extra argument to the IFN, that gives a bias amount.
>> The PowerPC version would require 0, the System Z version would
>> require -1. The vectoriser would probe to see which value
>> it should use.
>>
>> Doing it that way ensures that the gimple is still self-describing.
>> It avoids gimple semantics depending on target hooks.
>
> As I don't have much previous exposure to the vectoriser code, I cobbled
> together something pretty ad-hoc (attached). Does this come somehow
> close to what you have in mind?
Yeah, looks good.
> internal_len_load_supported_p should rather be called
> internal_len_load_bias_supported_p or so I guess and the part where we
> exclude multiple loop_lens is still missing.
Since we only support one bias, it might be better to make the
internal-fn.c function return the bias as an int (with some marker
value for “not supported”), so that the caller doesn't need to probe
both values.
> Would we also check for a viable bias there and then either accept
> multiple lens or not?
Yeah, I think so.
Thanks,
Richard
>
> Regards
> Robin
>
> commit 2320dbfdfe1477b15a2ac59847d2a52e68de49ab
> Author: Robin Dapp <rdapp@linux.ibm.com>
> Date: Tue Oct 26 14:36:08 2021 +0200
>
> bias1
>
> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index 8312d08aab2..bf97d3e471a 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -2696,9 +2696,9 @@ expand_call_mem_ref (tree type, gcall *stmt, int index)
> static void
> expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
> {
> - class expand_operand ops[3];
> - tree type, lhs, rhs, maskt;
> - rtx mem, target, mask;
> + class expand_operand ops[4];
> + tree type, lhs, rhs, maskt, biast;
> + rtx mem, target, mask, bias;
> insn_code icode;
>
> maskt = gimple_call_arg (stmt, 2);
> @@ -2727,7 +2727,18 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
> TYPE_UNSIGNED (TREE_TYPE (maskt)));
> else
> create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
> - expand_insn (icode, 3, ops);
> + if (optab == len_load_optab)
> + {
> + biast = gimple_call_arg (stmt, 3);
> + bias = expand_normal (biast);
> + create_input_operand (&ops[3], bias, SImode);
> + }
> +
> + if (optab != len_load_optab)
> + expand_insn (icode, 3, ops);
> + else
> + expand_insn (icode, 4, ops);
> +
> if (!rtx_equal_p (target, ops[0].value))
> emit_move_insn (target, ops[0].value);
> }
> @@ -2741,9 +2752,9 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
> static void
> expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
> {
> - class expand_operand ops[3];
> - tree type, lhs, rhs, maskt;
> - rtx mem, reg, mask;
> + class expand_operand ops[4];
> + tree type, lhs, rhs, maskt, biast;
> + rtx mem, reg, mask, bias;
> insn_code icode;
>
> maskt = gimple_call_arg (stmt, 2);
> @@ -2770,7 +2781,17 @@ expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
> TYPE_UNSIGNED (TREE_TYPE (maskt)));
> else
> create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
> - expand_insn (icode, 3, ops);
> + if (optab == len_store_optab)
> + {
> + biast = gimple_call_arg (stmt, 4);
> + bias = expand_normal (biast);
> + create_input_operand (&ops[3], bias, SImode);
> + }
> +
> + if (optab != len_store_optab)
> + expand_insn (icode, 3, ops);
> + else
> + expand_insn (icode, 4, ops);
> }
>
> #define expand_mask_store_optab_fn expand_partial_store_optab_fn
> @@ -4154,6 +4175,25 @@ internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type,
> && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale)));
> }
>
> +bool
> +internal_len_load_supported_p (internal_fn ifn, tree load_type, int bias)
> +{
> + if (bias > 0 || bias < -1)
> + return false;
> +
> + machine_mode mode = TYPE_MODE (load_type);
> +
> + optab optab = direct_internal_fn_optab (ifn);
> + insn_code icode = direct_optab_handler (optab, mode);
> + int output_ops = internal_load_fn_p (ifn) ? 1 : 0;
> +
> + if (icode != CODE_FOR_nothing
> + && insn_operand_matches (icode, 2 + output_ops, GEN_INT (bias)))
> + return true;
> +
> + return false;
> +}
> +
> /* Return true if the target supports IFN_CHECK_{RAW,WAR}_PTRS function IFN
> for pointers of type TYPE when the accesses have LENGTH bytes and their
> common byte alignment is ALIGN. */
> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> index 19d0f849a5a..d0bf9941bcc 100644
> --- a/gcc/internal-fn.h
> +++ b/gcc/internal-fn.h
> @@ -225,6 +225,7 @@ extern int internal_fn_mask_index (internal_fn);
> extern int internal_fn_stored_value_index (internal_fn);
> extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
> tree, tree, int);
> +extern bool internal_len_load_supported_p (internal_fn ifn, tree, int);
> extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree,
> poly_uint64, unsigned int);
>
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index d7723b1a92a..50537763ace 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -8272,12 +8272,14 @@ vectorizable_store (vec_info *vinfo,
> opt_machine_mode new_ovmode
> = get_len_load_store_mode (vmode, false);
> machine_mode new_vmode = new_ovmode.require ();
> + tree vtype = vectype;
> /* Need conversion if it's wrapped with VnQI. */
> if (vmode != new_vmode)
> {
> tree new_vtype
> = build_vector_type_for_mode (unsigned_intQI_type_node,
> - new_vmode);
> + new_vmode);
> + vtype = new_vtype;
> tree var
> = vect_get_new_ssa_name (new_vtype, vect_simple_var);
> vec_oprnd
> @@ -8289,9 +8291,29 @@ vectorizable_store (vec_info *vinfo,
> gsi);
> vec_oprnd = var;
> }
> +
> + /* Check which bias value to use. Default is 0. */
> + tree bias = build_int_cst (intSI_type_node, 0);
> + tree new_len = final_len;
> + if (!internal_len_load_supported_p (IFN_LEN_LOAD, vtype, 0)
> + && internal_len_load_supported_p (IFN_LEN_LOAD,
> + vtype, -1))
> + {
> + bias = build_int_cst (intSI_type_node, -1);
> + new_len = make_ssa_name (TREE_TYPE (final_len));
> + gassign *m1 = gimple_build_assign (new_len,
> + MINUS_EXPR,
> + final_len,
> + build_one_cst
> + (TREE_TYPE
> + (final_len)));
> + vect_finish_stmt_generation (vinfo, stmt_info, m1,
> + gsi);
> + }
> gcall *call
> - = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
> - ptr, final_len, vec_oprnd);
> + = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr,
> + ptr, new_len, vec_oprnd,
> + bias);
> gimple_call_set_nothrow (call, true);
> vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
> new_stmt = call;
> @@ -9588,24 +9610,50 @@ vectorizable_load (vec_info *vinfo,
> vec_num * j + i);
> tree ptr = build_int_cst (ref_type,
> align * BITS_PER_UNIT);
> +
> + machine_mode vmode = TYPE_MODE (vectype);
> + opt_machine_mode new_ovmode
> + = get_len_load_store_mode (vmode, true);
> + machine_mode new_vmode = new_ovmode.require ();
> + tree qi_type = unsigned_intQI_type_node;
> + tree new_vtype
> + = build_vector_type_for_mode (qi_type, new_vmode);
> +
> + tree vtype = vectype;
> + if (vmode != new_vmode)
> + vtype = new_vtype;
> +
> + /* Check which bias value to use. Default is 0. */
> + tree bias = build_int_cst (intSI_type_node, 0);
> + tree new_len = final_len;
> + if (!internal_len_load_supported_p (IFN_LEN_LOAD,
> + vtype, 0)
> + && internal_len_load_supported_p (IFN_LEN_LOAD,
> + vtype, -1))
> + {
> + bias = build_int_cst (intSI_type_node, -1);
> + new_len = make_ssa_name (TREE_TYPE (final_len));
> + gassign *m1 = gimple_build_assign (new_len,
> + MINUS_EXPR,
> + final_len,
> + build_one_cst
> + (TREE_TYPE
> + (final_len)));
> + vect_finish_stmt_generation (vinfo, stmt_info, m1,
> + gsi);
> + }
> +
> gcall *call
> - = gimple_build_call_internal (IFN_LEN_LOAD, 3,
> + = gimple_build_call_internal (IFN_LEN_LOAD, 4,
> dataref_ptr, ptr,
> - final_len);
> + new_len, bias);
> gimple_call_set_nothrow (call, true);
> new_stmt = call;
> data_ref = NULL_TREE;
>
> /* Need conversion if it's wrapped with VnQI. */
> - machine_mode vmode = TYPE_MODE (vectype);
> - opt_machine_mode new_ovmode
> - = get_len_load_store_mode (vmode, true);
> - machine_mode new_vmode = new_ovmode.require ();
> if (vmode != new_vmode)
> {
> - tree qi_type = unsigned_intQI_type_node;
> - tree new_vtype
> - = build_vector_type_for_mode (qi_type, new_vmode);
> tree var = vect_get_new_ssa_name (new_vtype,
> vect_simple_var);
> gimple_set_lhs (call, var);
More information about the Gcc-patches
mailing list