This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [04/13] Refactor expand_vec_perm
- From: Richard Biener <richard dot guenther at gmail dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>, Richard Sandiford <richard dot sandiford at linaro dot org>
- Date: Tue, 12 Dec 2017 16:17:08 +0100
- Subject: Re: [04/13] Refactor expand_vec_perm
- Authentication-results: sourceware.org; auth=none
- References: <87indfmrgt.fsf@linaro.org> <87zi6rlclc.fsf@linaro.org>
On Sun, Dec 10, 2017 at 12:13 AM, Richard Sandiford
<richard.sandiford@linaro.org> wrote:
> This patch splits the variable handling out of expand_vec_perm into
> a subroutine, so that the next patch can use a different interface
> for expanding constant permutes. expand_vec_perm now does all the
> CONST_VECTOR handling directly and defers to expand_vec_perm_var
> for other rtx codes. Handling CONST_VECTORs includes handling the
> fallback to variable permutes.
>
> The patch also adds an assert for valid optab modes to expand_vec_perm_1,
> so that we get it when using optabs for CONST_VECTORs. The MODE_VECTOR_INT
> part was previously in expand_vec_perm and the mode_for_int_vector part
> is new.
>
> Most of the patch is just reindentation, so I've attached a -b version.
Ok.
>
> 2017-12-06 Richard Sandiford <richard.sandiford@linaro.org>
>
> gcc/
> * optabs.c (expand_vec_perm_1): Assert that SEL has an integer
> vector mode and that that mode matches the mode of the data
> being permuted.
> (expand_vec_perm): Split handling of non-CONST_VECTOR selectors
> out into expand_vec_perm_var. Do all CONST_VECTOR handling here,
> directly using expand_vec_perm_1 when forcing selectors into
> registers.
> (expand_vec_perm_var): New function, split out from expand_vec_perm.
>
> Index: gcc/optabs.c
> ===================================================================
> --- gcc/optabs.c 2017-12-09 22:47:14.731310077 +0000
> +++ gcc/optabs.c 2017-12-09 22:47:23.878315657 +0000
> @@ -5405,6 +5405,8 @@ expand_vec_perm_1 (enum insn_code icode,
> machine_mode smode = GET_MODE (sel);
> struct expand_operand ops[4];
>
> + gcc_assert (GET_MODE_CLASS (smode) == MODE_VECTOR_INT
> + || mode_for_int_vector (tmode).require () == smode);
> create_output_operand (&ops[0], target, tmode);
> create_input_operand (&ops[3], sel, smode);
>
> @@ -5431,8 +5433,13 @@ expand_vec_perm_1 (enum insn_code icode,
> return NULL_RTX;
> }
>
> -/* Generate instructions for vec_perm optab given its mode
> - and three operands. */
> +static rtx expand_vec_perm_var (machine_mode, rtx, rtx, rtx, rtx);
> +
> +/* Implement a permutation of vectors v0 and v1 using the permutation
> + vector in SEL and return the result. Use TARGET to hold the result
> + if nonnull and convenient.
> +
> + MODE is the mode of the vectors being permuted (V0 and V1). */
>
> rtx
> expand_vec_perm (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
> @@ -5443,6 +5450,9 @@ expand_vec_perm (machine_mode mode, rtx
> rtx tmp, sel_qi = NULL;
> rtvec vec;
>
> + if (GET_CODE (sel) != CONST_VECTOR)
> + return expand_vec_perm_var (mode, v0, v1, sel, target);
> +
> if (!target || GET_MODE (target) != mode)
> target = gen_reg_rtx (mode);
>
> @@ -5455,86 +5465,125 @@ expand_vec_perm (machine_mode mode, rtx
> if (!qimode_for_vec_perm (mode).exists (&qimode))
> qimode = VOIDmode;
>
> - /* If the input is a constant, expand it specially. */
> - gcc_assert (GET_MODE_CLASS (GET_MODE (sel)) == MODE_VECTOR_INT);
> - if (GET_CODE (sel) == CONST_VECTOR)
> - {
> - /* See if this can be handled with a vec_shr. We only do this if the
> - second vector is all zeroes. */
> - enum insn_code shift_code = optab_handler (vec_shr_optab, mode);
> - enum insn_code shift_code_qi = ((qimode != VOIDmode && qimode != mode)
> - ? optab_handler (vec_shr_optab, qimode)
> - : CODE_FOR_nothing);
> - rtx shift_amt = NULL_RTX;
> - if (v1 == CONST0_RTX (GET_MODE (v1))
> - && (shift_code != CODE_FOR_nothing
> - || shift_code_qi != CODE_FOR_nothing))
> + /* See if this can be handled with a vec_shr. We only do this if the
> + second vector is all zeroes. */
> + insn_code shift_code = optab_handler (vec_shr_optab, mode);
> + insn_code shift_code_qi = ((qimode != VOIDmode && qimode != mode)
> + ? optab_handler (vec_shr_optab, qimode)
> + : CODE_FOR_nothing);
> +
> + if (v1 == CONST0_RTX (GET_MODE (v1))
> + && (shift_code != CODE_FOR_nothing
> + || shift_code_qi != CODE_FOR_nothing))
> + {
> + rtx shift_amt = shift_amt_for_vec_perm_mask (sel);
> + if (shift_amt)
> {
> - shift_amt = shift_amt_for_vec_perm_mask (sel);
> - if (shift_amt)
> + struct expand_operand ops[3];
> + if (shift_code != CODE_FOR_nothing)
> {
> - struct expand_operand ops[3];
> - if (shift_code != CODE_FOR_nothing)
> - {
> - create_output_operand (&ops[0], target, mode);
> - create_input_operand (&ops[1], v0, mode);
> - create_convert_operand_from_type (&ops[2], shift_amt,
> - sizetype);
> - if (maybe_expand_insn (shift_code, 3, ops))
> - return ops[0].value;
> - }
> - if (shift_code_qi != CODE_FOR_nothing)
> - {
> - tmp = gen_reg_rtx (qimode);
> - create_output_operand (&ops[0], tmp, qimode);
> - create_input_operand (&ops[1], gen_lowpart (qimode, v0),
> - qimode);
> - create_convert_operand_from_type (&ops[2], shift_amt,
> - sizetype);
> - if (maybe_expand_insn (shift_code_qi, 3, ops))
> - return gen_lowpart (mode, ops[0].value);
> - }
> + create_output_operand (&ops[0], target, mode);
> + create_input_operand (&ops[1], v0, mode);
> + create_convert_operand_from_type (&ops[2], shift_amt, sizetype);
> + if (maybe_expand_insn (shift_code, 3, ops))
> + return ops[0].value;
> + }
> + if (shift_code_qi != CODE_FOR_nothing)
> + {
> + rtx tmp = gen_reg_rtx (qimode);
> + create_output_operand (&ops[0], tmp, qimode);
> + create_input_operand (&ops[1], gen_lowpart (qimode, v0), qimode);
> + create_convert_operand_from_type (&ops[2], shift_amt, sizetype);
> + if (maybe_expand_insn (shift_code_qi, 3, ops))
> + return gen_lowpart (mode, ops[0].value);
> }
> }
> + }
>
> - icode = direct_optab_handler (vec_perm_const_optab, mode);
> - if (icode != CODE_FOR_nothing)
> + icode = direct_optab_handler (vec_perm_const_optab, mode);
> + if (icode != CODE_FOR_nothing)
> + {
> + tmp = expand_vec_perm_1 (icode, target, v0, v1, sel);
> + if (tmp)
> + return tmp;
> + }
> +
> + /* Fall back to a constant byte-based permutation. */
> + if (qimode != VOIDmode)
> + {
> + vec = rtvec_alloc (w);
> + for (i = 0; i < e; ++i)
> {
> - tmp = expand_vec_perm_1 (icode, target, v0, v1, sel);
> - if (tmp)
> - return tmp;
> + unsigned int j, this_e;
> +
> + this_e = INTVAL (CONST_VECTOR_ELT (sel, i));
> + this_e &= 2 * e - 1;
> + this_e *= u;
> +
> + for (j = 0; j < u; ++j)
> + RTVEC_ELT (vec, i * u + j) = GEN_INT (this_e + j);
> }
> + sel_qi = gen_rtx_CONST_VECTOR (qimode, vec);
>
> - /* Fall back to a constant byte-based permutation. */
> - if (qimode != VOIDmode)
> + icode = direct_optab_handler (vec_perm_const_optab, qimode);
> + if (icode != CODE_FOR_nothing)
> {
> - vec = rtvec_alloc (w);
> - for (i = 0; i < e; ++i)
> - {
> - unsigned int j, this_e;
> + tmp = gen_reg_rtx (qimode);
> + tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0),
> + gen_lowpart (qimode, v1), sel_qi);
> + if (tmp)
> + return gen_lowpart (mode, tmp);
> + }
> + }
>
> - this_e = INTVAL (CONST_VECTOR_ELT (sel, i));
> - this_e &= 2 * e - 1;
> - this_e *= u;
> + /* Otherwise expand as a fully variable permuation. */
>
> - for (j = 0; j < u; ++j)
> - RTVEC_ELT (vec, i * u + j) = GEN_INT (this_e + j);
> - }
> - sel_qi = gen_rtx_CONST_VECTOR (qimode, vec);
> + icode = direct_optab_handler (vec_perm_optab, mode);
> + if (icode != CODE_FOR_nothing)
> + {
> + rtx tmp = expand_vec_perm_1 (icode, target, v0, v1, sel);
> + if (tmp)
> + return tmp;
> + }
>
> - icode = direct_optab_handler (vec_perm_const_optab, qimode);
> - if (icode != CODE_FOR_nothing)
> - {
> - tmp = mode != qimode ? gen_reg_rtx (qimode) : target;
> - tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0),
> - gen_lowpart (qimode, v1), sel_qi);
> - if (tmp)
> - return gen_lowpart (mode, tmp);
> - }
> + if (qimode != VOIDmode)
> + {
> + icode = direct_optab_handler (vec_perm_optab, qimode);
> + if (icode != CODE_FOR_nothing)
> + {
> + rtx tmp = gen_reg_rtx (qimode);
> + tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0),
> + gen_lowpart (qimode, v1), sel_qi);
> + if (tmp)
> + return gen_lowpart (mode, tmp);
> }
> }
>
> - /* Otherwise expand as a fully variable permuation. */
> + return NULL_RTX;
> +}
> +
> +/* Implement a permutation of vectors v0 and v1 using the permutation
> + vector in SEL and return the result. Use TARGET to hold the result
> + if nonnull and convenient.
> +
> + MODE is the mode of the vectors being permuted (V0 and V1).
> + SEL must have the integer equivalent of MODE and is known to be
> + unsuitable for permutes with a constant permutation vector. */
> +
> +static rtx
> +expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
> +{
> + enum insn_code icode;
> + unsigned int i, w, u;
> + rtx tmp, sel_qi;
> + rtvec vec;
> +
> + w = GET_MODE_SIZE (mode);
> + u = GET_MODE_UNIT_SIZE (mode);
> +
> + if (!target || GET_MODE (target) != mode)
> + target = gen_reg_rtx (mode);
> +
> icode = direct_optab_handler (vec_perm_optab, mode);
> if (icode != CODE_FOR_nothing)
> {
> @@ -5545,50 +5594,47 @@ expand_vec_perm (machine_mode mode, rtx
>
> /* As a special case to aid several targets, lower the element-based
> permutation to a byte-based permutation and try again. */
> - if (qimode == VOIDmode)
> + machine_mode qimode;
> + if (!qimode_for_vec_perm (mode).exists (&qimode))
> return NULL_RTX;
> icode = direct_optab_handler (vec_perm_optab, qimode);
> if (icode == CODE_FOR_nothing)
> return NULL_RTX;
>
> - if (sel_qi == NULL)
> + /* Multiply each element by its byte size. */
> + machine_mode selmode = GET_MODE (sel);
> + if (u == 2)
> + sel = expand_simple_binop (selmode, PLUS, sel, sel,
> + NULL, 0, OPTAB_DIRECT);
> + else
> + sel = expand_simple_binop (selmode, ASHIFT, sel, GEN_INT (exact_log2 (u)),
> + NULL, 0, OPTAB_DIRECT);
> + gcc_assert (sel != NULL);
> +
> + /* Broadcast the low byte each element into each of its bytes. */
> + vec = rtvec_alloc (w);
> + for (i = 0; i < w; ++i)
> {
> - /* Multiply each element by its byte size. */
> - machine_mode selmode = GET_MODE (sel);
> - if (u == 2)
> - sel = expand_simple_binop (selmode, PLUS, sel, sel,
> - NULL, 0, OPTAB_DIRECT);
> - else
> - sel = expand_simple_binop (selmode, ASHIFT, sel,
> - GEN_INT (exact_log2 (u)),
> - NULL, 0, OPTAB_DIRECT);
> - gcc_assert (sel != NULL);
> -
> - /* Broadcast the low byte each element into each of its bytes. */
> - vec = rtvec_alloc (w);
> - for (i = 0; i < w; ++i)
> - {
> - int this_e = i / u * u;
> - if (BYTES_BIG_ENDIAN)
> - this_e += u - 1;
> - RTVEC_ELT (vec, i) = GEN_INT (this_e);
> - }
> - tmp = gen_rtx_CONST_VECTOR (qimode, vec);
> - sel = gen_lowpart (qimode, sel);
> - sel = expand_vec_perm (qimode, sel, sel, tmp, NULL);
> - gcc_assert (sel != NULL);
> -
> - /* Add the byte offset to each byte element. */
> - /* Note that the definition of the indicies here is memory ordering,
> - so there should be no difference between big and little endian. */
> - vec = rtvec_alloc (w);
> - for (i = 0; i < w; ++i)
> - RTVEC_ELT (vec, i) = GEN_INT (i % u);
> - tmp = gen_rtx_CONST_VECTOR (qimode, vec);
> - sel_qi = expand_simple_binop (qimode, PLUS, sel, tmp,
> - sel, 0, OPTAB_DIRECT);
> - gcc_assert (sel_qi != NULL);
> + int this_e = i / u * u;
> + if (BYTES_BIG_ENDIAN)
> + this_e += u - 1;
> + RTVEC_ELT (vec, i) = GEN_INT (this_e);
> }
> + tmp = gen_rtx_CONST_VECTOR (qimode, vec);
> + sel = gen_lowpart (qimode, sel);
> + sel = expand_vec_perm (qimode, sel, sel, tmp, NULL);
> + gcc_assert (sel != NULL);
> +
> + /* Add the byte offset to each byte element. */
> + /* Note that the definition of the indicies here is memory ordering,
> + so there should be no difference between big and little endian. */
> + vec = rtvec_alloc (w);
> + for (i = 0; i < w; ++i)
> + RTVEC_ELT (vec, i) = GEN_INT (i % u);
> + tmp = gen_rtx_CONST_VECTOR (qimode, vec);
> + sel_qi = expand_simple_binop (qimode, PLUS, sel, tmp,
> + sel, 0, OPTAB_DIRECT);
> + gcc_assert (sel_qi != NULL);
>
> tmp = mode != qimode ? gen_reg_rtx (qimode) : target;
> tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0),
>