[PATCH] aarch64: Add extend-as-extract-with-shift pattern [PR96998]
Richard Sandiford
richard.sandiford@arm.com
Thu Sep 17 07:10:22 GMT 2020
Alex Coplan <alex.coplan@arm.com> writes:
> Hi Richard,
>
> On 10/09/2020 19:18, Richard Sandiford wrote:
>> Alex Coplan <alex.coplan@arm.com> writes:
>> > Hello,
>> >
>> > Since r11-2903-g6b3034eaba83935d9f6dfb20d2efbdb34b5b00bf introduced a
>> > canonicalization from mult to shift on address reloads, a missing
>> > pattern in the AArch64 backend was exposed.
>> >
>> > In particular, we were missing the ashift variant of
>> > *add_<optab><mode>_multp2 (this mult variant is redundant and was
>> > removed in r11-3033-g2f8ae301f6a125f50b0a758047fcddae7b68daa8).
>> >
>> > This patch adds that missing pattern (fixing PR96998), updates
>> > aarch64_is_extend_from_extract() to work for the shift pattern (instead
>> > of the redundant mult variant) and updates callers in the cost
>> > calculations to apply the costing for the shift variant instead.
>>
>> Hmm. I think we should instead fix this in combine.
>
> Ok, thanks for the review. Please find a revised patch attached which
> fixes this in combine instead.
Thanks for doing this, looks like a really nice clean-up.
> @@ -7650,20 +7650,29 @@ make_extraction (machine_mode mode, rtx inner, HOST_WIDE_INT pos,
> is_mode = GET_MODE (SUBREG_REG (inner));
> inner = SUBREG_REG (inner);
> }
> - else if (GET_CODE (inner) == ASHIFT
> + else if ((GET_CODE (inner) == ASHIFT || GET_CODE (inner) == MULT)
> && CONST_INT_P (XEXP (inner, 1))
> - && pos_rtx == 0 && pos == 0
> - && len > UINTVAL (XEXP (inner, 1)))
> - {
> - /* We're extracting the least significant bits of an rtx
> - (ashift X (const_int C)), where LEN > C. Extract the
> - least significant (LEN - C) bits of X, giving an rtx
> - whose mode is MODE, then shift it left C times. */
> - new_rtx = make_extraction (mode, XEXP (inner, 0),
> - 0, 0, len - INTVAL (XEXP (inner, 1)),
> - unsignedp, in_dest, in_compare);
> - if (new_rtx != 0)
> - return gen_rtx_ASHIFT (mode, new_rtx, XEXP (inner, 1));
> + && pos_rtx == 0 && pos == 0)
> + {
> + const HOST_WIDE_INT ci = INTVAL (XEXP (inner, 1));
> + const bool mult = GET_CODE (inner) == MULT;
> + const int shift_amt = mult ? exact_log2 (ci) : ci;
Not going to be a problem in practice but: HOST_WIDE_INT would be better
than int here, so that we don't truncate the value for ASHIFT before it
has been tested. Similarly:
> +
> + if (shift_amt > 0 && len > (unsigned)shift_amt)
unsigned HOST_WIDE_INT here.
> + {
> + /* We're extracting the least significant bits of an rtx
> + (ashift X (const_int C)) or (mult X (const_int (2^C))),
> + where LEN > C. Extract the least significant (LEN - C) bits
> + of X, giving an rtx whose mode is MODE, then shift it left
> + C times. */
> + new_rtx = make_extraction (mode, XEXP (inner, 0),
> + 0, 0, len - shift_amt,
> + unsignedp, in_dest, in_compare);
> + if (new_rtx)
> + return mult
> + ? gen_rtx_MULT (mode, new_rtx, XEXP (inner, 1))
> + : gen_rtx_ASHIFT (mode, new_rtx, XEXP (inner, 1));
Easier as gen_rtx_fmt_ee (GET_CODE (inner), mode, …);
The combine parts LGTM otherwise, but Segher should have the
final say.
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index b251f39..56738ae 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -2811,33 +2811,6 @@ aarch64_is_noplt_call_p (rtx sym)
> return false;
> }
>
> -/* Return true if the offsets to a zero/sign-extract operation
> - represent an expression that matches an extend operation. The
> - operands represent the parameters from
> -
> - (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
> -bool
> -aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm,
> - rtx extract_imm)
> -{
> - HOST_WIDE_INT mult_val, extract_val;
> -
> - if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
> - return false;
> -
> - mult_val = INTVAL (mult_imm);
> - extract_val = INTVAL (extract_imm);
> -
> - if (extract_val > 8
> - && extract_val < GET_MODE_BITSIZE (mode)
> - && exact_log2 (extract_val & ~7) > 0
> - && (extract_val & 7) <= 4
> - && mult_val == (1 << (extract_val & 7)))
> - return true;
> -
> - return false;
> -}
> -
> /* Emit an insn that's a simple single-set. Both the operands must be
> known to be valid. */
> inline static rtx_insn *
> @@ -8837,22 +8810,6 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x,
> index = XEXP (XEXP (x, 0), 0);
> shift = INTVAL (XEXP (x, 1));
> }
> - /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
> - else if ((GET_CODE (x) == SIGN_EXTRACT
> - || GET_CODE (x) == ZERO_EXTRACT)
> - && GET_MODE (x) == DImode
> - && GET_CODE (XEXP (x, 0)) == MULT
> - && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
> - && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
> - {
> - type = (GET_CODE (x) == SIGN_EXTRACT)
> - ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
> - index = XEXP (XEXP (x, 0), 0);
> - shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
> - if (INTVAL (XEXP (x, 1)) != 32 + shift
> - || INTVAL (XEXP (x, 2)) != 0)
> - shift = -1;
> - }
> /* (and:DI (mult:DI (reg:DI) (const_int scale))
> (const_int 0xffffffff<<shift)) */
> else if (GET_CODE (x) == AND
> @@ -8868,22 +8825,6 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x,
> if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
> shift = -1;
> }
> - /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
> - else if ((GET_CODE (x) == SIGN_EXTRACT
> - || GET_CODE (x) == ZERO_EXTRACT)
> - && GET_MODE (x) == DImode
> - && GET_CODE (XEXP (x, 0)) == ASHIFT
> - && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
> - && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
> - {
> - type = (GET_CODE (x) == SIGN_EXTRACT)
> - ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
> - index = XEXP (XEXP (x, 0), 0);
> - shift = INTVAL (XEXP (XEXP (x, 0), 1));
> - if (INTVAL (XEXP (x, 1)) != 32 + shift
> - || INTVAL (XEXP (x, 2)) != 0)
> - shift = -1;
> - }
> /* (and:DI (ashift:DI (reg:DI) (const_int shift))
> (const_int 0xffffffff<<shift)) */
> else if (GET_CODE (x) == AND
> @@ -11259,16 +11200,6 @@ aarch64_strip_extend (rtx x, bool strip_shift)
> if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
> return op;
>
> - /* Zero and sign extraction of a widened value. */
> - if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
> - && XEXP (op, 2) == const0_rtx
> - && GET_CODE (XEXP (op, 0)) == MULT
> - && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1),
> - XEXP (op, 1)))
> - return XEXP (XEXP (op, 0), 0);
> -
> - /* It can also be represented (for zero-extend) as an AND with an
> - immediate. */
> if (GET_CODE (op) == AND
> && GET_CODE (XEXP (op, 0)) == MULT
> && CONST_INT_P (XEXP (XEXP (op, 0), 1))
> @@ -11606,31 +11537,11 @@ aarch64_branch_cost (bool speed_p, bool predictable_p)
> /* Return true if the RTX X in mode MODE is a zero or sign extract
> usable in an ADD or SUB (extended register) instruction. */
> static bool
> -aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
> -{
> - /* Catch add with a sign extract.
> - This is add_<optab><mode>_multp2. */
> - if (GET_CODE (x) == SIGN_EXTRACT
> - || GET_CODE (x) == ZERO_EXTRACT)
> - {
> - rtx op0 = XEXP (x, 0);
> - rtx op1 = XEXP (x, 1);
> - rtx op2 = XEXP (x, 2);
> -
> - if (GET_CODE (op0) == MULT
> - && CONST_INT_P (op1)
> - && op2 == const0_rtx
> - && CONST_INT_P (XEXP (op0, 1))
> - && aarch64_is_extend_from_extract (mode,
> - XEXP (op0, 1),
> - op1))
> - {
> - return true;
> - }
> - }
> +aarch64_rtx_arith_op_extract_p (rtx x)
> +{
> /* The simple case <ARITH>, XD, XN, XM, [us]xt.
> No shift. */
> - else if (GET_CODE (x) == SIGN_EXTEND
> + if (GET_CODE (x) == SIGN_EXTEND
> || GET_CODE (x) == ZERO_EXTEND)
Should reindent this line too.
> return REG_P (XEXP (x, 0));
>
> @@ -12319,7 +12230,7 @@ cost_minus:
>
> /* Look for SUB (extended register). */
> if (is_a <scalar_int_mode> (mode, &int_mode)
> - && aarch64_rtx_arith_op_extract_p (op1, int_mode))
> + && aarch64_rtx_arith_op_extract_p (op1))
> {
> if (speed)
> *cost += extra_cost->alu.extend_arith;
> @@ -12399,7 +12310,7 @@ cost_plus:
>
> /* Look for ADD (extended register). */
> if (is_a <scalar_int_mode> (mode, &int_mode)
> - && aarch64_rtx_arith_op_extract_p (op0, int_mode))
> + && aarch64_rtx_arith_op_extract_p (op0))
> {
> if (speed)
> *cost += extra_cost->alu.extend_arith;
int_mode is now unused in both hunks, so can just remove the “, &int_mode”s.
The aarch64 changes are OK with those (incredibly inconsequential)
comments fixed.
Thanks,
Richard
More information about the Gcc-patches
mailing list