[PATCH] aarch64: Add extend-as-extract-with-shift pattern [PR96998]

Richard Sandiford richard.sandiford@arm.com
Thu Sep 17 07:10:22 GMT 2020


Alex Coplan <alex.coplan@arm.com> writes:
> Hi Richard,
>
> On 10/09/2020 19:18, Richard Sandiford wrote:
>> Alex Coplan <alex.coplan@arm.com> writes:
>> > Hello,
>> >
>> > Since r11-2903-g6b3034eaba83935d9f6dfb20d2efbdb34b5b00bf introduced a
>> > canonicalization from mult to shift on address reloads, a missing
>> > pattern in the AArch64 backend was exposed.
>> >
>> > In particular, we were missing the ashift variant of
>> > *add_<optab><mode>_multp2 (this mult variant is redundant and was
>> > removed in r11-3033-g2f8ae301f6a125f50b0a758047fcddae7b68daa8).
>> >
>> > This patch adds that missing pattern (fixing PR96998), updates
>> > aarch64_is_extend_from_extract() to work for the shift pattern (instead
>> > of the redundant mult variant) and updates callers in the cost
>> > calculations to apply the costing for the shift variant instead.
>> 
>> Hmm.  I think we should instead fix this in combine.
>
> Ok, thanks for the review. Please find a revised patch attached which
> fixes this in combine instead.

Thanks for doing this, looks like a really nice clean-up.

> @@ -7650,20 +7650,29 @@ make_extraction (machine_mode mode, rtx inner, HOST_WIDE_INT pos,
>  	is_mode = GET_MODE (SUBREG_REG (inner));
>        inner = SUBREG_REG (inner);
>      }
> -  else if (GET_CODE (inner) == ASHIFT
> +  else if ((GET_CODE (inner) == ASHIFT || GET_CODE (inner) == MULT)
>  	   && CONST_INT_P (XEXP (inner, 1))
> -	   && pos_rtx == 0 && pos == 0
> -	   && len > UINTVAL (XEXP (inner, 1)))
> -    {
> -      /* We're extracting the least significant bits of an rtx
> -	 (ashift X (const_int C)), where LEN > C.  Extract the
> -	 least significant (LEN - C) bits of X, giving an rtx
> -	 whose mode is MODE, then shift it left C times.  */
> -      new_rtx = make_extraction (mode, XEXP (inner, 0),
> -			     0, 0, len - INTVAL (XEXP (inner, 1)),
> -			     unsignedp, in_dest, in_compare);
> -      if (new_rtx != 0)
> -	return gen_rtx_ASHIFT (mode, new_rtx, XEXP (inner, 1));
> +	   && pos_rtx == 0 && pos == 0)
> +    {
> +      const HOST_WIDE_INT ci = INTVAL (XEXP (inner, 1));
> +      const bool mult = GET_CODE (inner) == MULT;
> +      const int shift_amt = mult ? exact_log2 (ci) : ci;

Not going to be a problem in practice but: HOST_WIDE_INT would be better
than int here, so that we don't truncate the value for ASHIFT before it
has been tested.  Similarly:

> +
> +      if (shift_amt > 0 && len > (unsigned)shift_amt)

unsigned HOST_WIDE_INT here.

> +	{
> +	  /* We're extracting the least significant bits of an rtx
> +	     (ashift X (const_int C)) or (mult X (const_int (2^C))),
> +	     where LEN > C.  Extract the least significant (LEN - C) bits
> +	     of X, giving an rtx whose mode is MODE, then shift it left
> +	     C times.  */
> +	  new_rtx = make_extraction (mode, XEXP (inner, 0),
> +				 0, 0, len - shift_amt,
> +				 unsignedp, in_dest, in_compare);
> +	  if (new_rtx)
> +	    return mult
> +	    ? gen_rtx_MULT (mode, new_rtx, XEXP (inner, 1))
> +	    : gen_rtx_ASHIFT (mode, new_rtx, XEXP (inner, 1));

Easier as gen_rtx_fmt_ee (GET_CODE (inner), mode, …);

The combine parts LGTM otherwise, but Segher should have the
final say.

> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index b251f39..56738ae 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -2811,33 +2811,6 @@ aarch64_is_noplt_call_p (rtx sym)
>    return false;
>  }
>  
> -/* Return true if the offsets to a zero/sign-extract operation
> -   represent an expression that matches an extend operation.  The
> -   operands represent the parameters from
> -
> -   (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)).  */
> -bool
> -aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm,
> -				rtx extract_imm)
> -{
> -  HOST_WIDE_INT mult_val, extract_val;
> -
> -  if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
> -    return false;
> -
> -  mult_val = INTVAL (mult_imm);
> -  extract_val = INTVAL (extract_imm);
> -
> -  if (extract_val > 8
> -      && extract_val < GET_MODE_BITSIZE (mode)
> -      && exact_log2 (extract_val & ~7) > 0
> -      && (extract_val & 7) <= 4
> -      && mult_val == (1 << (extract_val & 7)))
> -    return true;
> -
> -  return false;
> -}
> -
>  /* Emit an insn that's a simple single-set.  Both the operands must be
>     known to be valid.  */
>  inline static rtx_insn *
> @@ -8837,22 +8810,6 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x,
>        index = XEXP (XEXP (x, 0), 0);
>        shift = INTVAL (XEXP (x, 1));
>      }
> -  /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
> -  else if ((GET_CODE (x) == SIGN_EXTRACT
> -	    || GET_CODE (x) == ZERO_EXTRACT)
> -	   && GET_MODE (x) == DImode
> -	   && GET_CODE (XEXP (x, 0)) == MULT
> -	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
> -	   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
> -    {
> -      type = (GET_CODE (x) == SIGN_EXTRACT)
> -	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
> -      index = XEXP (XEXP (x, 0), 0);
> -      shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
> -      if (INTVAL (XEXP (x, 1)) != 32 + shift
> -	  || INTVAL (XEXP (x, 2)) != 0)
> -	shift = -1;
> -    }
>    /* (and:DI (mult:DI (reg:DI) (const_int scale))
>       (const_int 0xffffffff<<shift)) */
>    else if (GET_CODE (x) == AND
> @@ -8868,22 +8825,6 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x,
>        if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
>  	shift = -1;
>      }
> -  /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
> -  else if ((GET_CODE (x) == SIGN_EXTRACT
> -	    || GET_CODE (x) == ZERO_EXTRACT)
> -	   && GET_MODE (x) == DImode
> -	   && GET_CODE (XEXP (x, 0)) == ASHIFT
> -	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
> -	   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
> -    {
> -      type = (GET_CODE (x) == SIGN_EXTRACT)
> -	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
> -      index = XEXP (XEXP (x, 0), 0);
> -      shift = INTVAL (XEXP (XEXP (x, 0), 1));
> -      if (INTVAL (XEXP (x, 1)) != 32 + shift
> -	  || INTVAL (XEXP (x, 2)) != 0)
> -	shift = -1;
> -    }
>    /* (and:DI (ashift:DI (reg:DI) (const_int shift))
>       (const_int 0xffffffff<<shift)) */
>    else if (GET_CODE (x) == AND
> @@ -11259,16 +11200,6 @@ aarch64_strip_extend (rtx x, bool strip_shift)
>    if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
>      return op;
>  
> -  /* Zero and sign extraction of a widened value.  */
> -  if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
> -      && XEXP (op, 2) == const0_rtx
> -      && GET_CODE (XEXP (op, 0)) == MULT
> -      && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1),
> -					 XEXP (op, 1)))
> -    return XEXP (XEXP (op, 0), 0);
> -
> -  /* It can also be represented (for zero-extend) as an AND with an
> -     immediate.  */
>    if (GET_CODE (op) == AND
>        && GET_CODE (XEXP (op, 0)) == MULT
>        && CONST_INT_P (XEXP (XEXP (op, 0), 1))
> @@ -11606,31 +11537,11 @@ aarch64_branch_cost (bool speed_p, bool predictable_p)
>  /* Return true if the RTX X in mode MODE is a zero or sign extract
>     usable in an ADD or SUB (extended register) instruction.  */
>  static bool
> -aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
> -{
> -  /* Catch add with a sign extract.
> -     This is add_<optab><mode>_multp2.  */
> -  if (GET_CODE (x) == SIGN_EXTRACT
> -      || GET_CODE (x) == ZERO_EXTRACT)
> -    {
> -      rtx op0 = XEXP (x, 0);
> -      rtx op1 = XEXP (x, 1);
> -      rtx op2 = XEXP (x, 2);
> -
> -      if (GET_CODE (op0) == MULT
> -	  && CONST_INT_P (op1)
> -	  && op2 == const0_rtx
> -	  && CONST_INT_P (XEXP (op0, 1))
> -	  && aarch64_is_extend_from_extract (mode,
> -					     XEXP (op0, 1),
> -					     op1))
> -	{
> -	  return true;
> -	}
> -    }
> +aarch64_rtx_arith_op_extract_p (rtx x)
> +{
>    /* The simple case <ARITH>, XD, XN, XM, [us]xt.
>       No shift.  */
> -  else if (GET_CODE (x) == SIGN_EXTEND
> +  if (GET_CODE (x) == SIGN_EXTEND
>  	   || GET_CODE (x) == ZERO_EXTEND)

Should reindent this line too.

>      return REG_P (XEXP (x, 0));
>  
> @@ -12319,7 +12230,7 @@ cost_minus:
>  
>  	/* Look for SUB (extended register).  */
>  	if (is_a <scalar_int_mode> (mode, &int_mode)
> -	    && aarch64_rtx_arith_op_extract_p (op1, int_mode))
> +	    && aarch64_rtx_arith_op_extract_p (op1))
>  	  {
>  	    if (speed)
>  	      *cost += extra_cost->alu.extend_arith;
> @@ -12399,7 +12310,7 @@ cost_plus:
>  
>  	/* Look for ADD (extended register).  */
>  	if (is_a <scalar_int_mode> (mode, &int_mode)
> -	    && aarch64_rtx_arith_op_extract_p (op0, int_mode))
> +	    && aarch64_rtx_arith_op_extract_p (op0))
>  	  {
>  	    if (speed)
>  	      *cost += extra_cost->alu.extend_arith;

int_mode is now unused in both hunks, so can just remove the “, &int_mode”s.

The aarch64 changes are OK with those (incredibly inconsequential)
comments fixed.

Thanks,
Richard


More information about the Gcc-patches mailing list