[PATCH] aarch64: Add extend-as-extract-with-shift pattern [PR96998]
Alex Coplan
alex.coplan@arm.com
Wed Sep 16 17:08:14 GMT 2020
Hi Richard,
On 10/09/2020 19:18, Richard Sandiford wrote:
> Alex Coplan <alex.coplan@arm.com> writes:
> > Hello,
> >
> > Since r11-2903-g6b3034eaba83935d9f6dfb20d2efbdb34b5b00bf introduced a
> > canonicalization from mult to shift on address reloads, a missing
> > pattern in the AArch64 backend was exposed.
> >
> > In particular, we were missing the ashift variant of
> > *add_<optab><mode>_multp2 (this mult variant is redundant and was
> > removed in r11-3033-g2f8ae301f6a125f50b0a758047fcddae7b68daa8).
> >
> > This patch adds that missing pattern (fixing PR96998), updates
> > aarch64_is_extend_from_extract() to work for the shift pattern (instead
> > of the redundant mult variant) and updates callers in the cost
> > calculations to apply the costing for the shift variant instead.
>
> Hmm. I think we should instead fix this in combine.
Ok, thanks for the review. Please find a revised patch attached which
fixes this in combine instead.
> If we do that, we should be able to remove the handling of
> extract-based addresses in aarch64_classify_index & co.
Nice. I've tried to remove all the redundant extract-based address
handling in the AArch64 backend as a result.
The revised patch also fixes up a comment in combine which appears to
have suffered from bitrot.
Testing:
* Bootstrap and regtest on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, and x86_64-pc-linux-gnu.
* Regtested an x64 -> aarch64-none-elf cross with -mabi=ilp32.
* Checked that we no longer ICE when building linux-next on AArch64.
OK for trunk?
Thanks,
Alex
---
gcc/ChangeLog:
* combine.c (expand_compound_operation): Tweak variable name in
comment to match source.
(make_extraction): Handle mult by power of two in addition to
ashift.
* config/aarch64/aarch64.c (aarch64_is_extend_from_extract): Delete.
(aarch64_classify_index): Remove redundant extend-as-extract handling.
(aarch64_strip_extend): Likewise.
(aarch64_rtx_arith_op_extract_p): Likewise, remove now-unused
parameter. Update callers...
(aarch64_rtx_costs): ... here.
gcc/testsuite/ChangeLog:
* gcc.c-torture/compile/pr96998.c: New test.
-------------- next part --------------
diff --git a/gcc/combine.c b/gcc/combine.c
index c88382e..cd8e544 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -7419,8 +7419,8 @@ expand_compound_operation (rtx x)
}
/* If we reach here, we want to return a pair of shifts. The inner
- shift is a left shift of BITSIZE - POS - LEN bits. The outer
- shift is a right shift of BITSIZE - LEN bits. It is arithmetic or
+ shift is a left shift of MODEWIDTH - POS - LEN bits. The outer
+ shift is a right shift of MODEWIDTH - LEN bits. It is arithmetic or
logical depending on the value of UNSIGNEDP.
If this was a ZERO_EXTEND or ZERO_EXTRACT, this pair of shifts will be
@@ -7650,20 +7650,29 @@ make_extraction (machine_mode mode, rtx inner, HOST_WIDE_INT pos,
is_mode = GET_MODE (SUBREG_REG (inner));
inner = SUBREG_REG (inner);
}
- else if (GET_CODE (inner) == ASHIFT
+ else if ((GET_CODE (inner) == ASHIFT || GET_CODE (inner) == MULT)
&& CONST_INT_P (XEXP (inner, 1))
- && pos_rtx == 0 && pos == 0
- && len > UINTVAL (XEXP (inner, 1)))
- {
- /* We're extracting the least significant bits of an rtx
- (ashift X (const_int C)), where LEN > C. Extract the
- least significant (LEN - C) bits of X, giving an rtx
- whose mode is MODE, then shift it left C times. */
- new_rtx = make_extraction (mode, XEXP (inner, 0),
- 0, 0, len - INTVAL (XEXP (inner, 1)),
- unsignedp, in_dest, in_compare);
- if (new_rtx != 0)
- return gen_rtx_ASHIFT (mode, new_rtx, XEXP (inner, 1));
+ && pos_rtx == 0 && pos == 0)
+ {
+ const HOST_WIDE_INT ci = INTVAL (XEXP (inner, 1));
+ const bool mult = GET_CODE (inner) == MULT;
+ const int shift_amt = mult ? exact_log2 (ci) : ci;
+
+ if (shift_amt > 0 && len > (unsigned)shift_amt)
+ {
+ /* We're extracting the least significant bits of an rtx
+ (ashift X (const_int C)) or (mult X (const_int (2^C))),
+ where LEN > C. Extract the least significant (LEN - C) bits
+ of X, giving an rtx whose mode is MODE, then shift it left
+ C times. */
+ new_rtx = make_extraction (mode, XEXP (inner, 0),
+ 0, 0, len - shift_amt,
+ unsignedp, in_dest, in_compare);
+ if (new_rtx)
+ return mult
+ ? gen_rtx_MULT (mode, new_rtx, XEXP (inner, 1))
+ : gen_rtx_ASHIFT (mode, new_rtx, XEXP (inner, 1));
+ }
}
else if (GET_CODE (inner) == TRUNCATE
/* If trying or potentionally trying to extract
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b251f39..56738ae 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2811,33 +2811,6 @@ aarch64_is_noplt_call_p (rtx sym)
return false;
}
-/* Return true if the offsets to a zero/sign-extract operation
- represent an expression that matches an extend operation. The
- operands represent the parameters from
-
- (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
-bool
-aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm,
- rtx extract_imm)
-{
- HOST_WIDE_INT mult_val, extract_val;
-
- if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
- return false;
-
- mult_val = INTVAL (mult_imm);
- extract_val = INTVAL (extract_imm);
-
- if (extract_val > 8
- && extract_val < GET_MODE_BITSIZE (mode)
- && exact_log2 (extract_val & ~7) > 0
- && (extract_val & 7) <= 4
- && mult_val == (1 << (extract_val & 7)))
- return true;
-
- return false;
-}
-
/* Emit an insn that's a simple single-set. Both the operands must be
known to be valid. */
inline static rtx_insn *
@@ -8837,22 +8810,6 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x,
index = XEXP (XEXP (x, 0), 0);
shift = INTVAL (XEXP (x, 1));
}
- /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
- else if ((GET_CODE (x) == SIGN_EXTRACT
- || GET_CODE (x) == ZERO_EXTRACT)
- && GET_MODE (x) == DImode
- && GET_CODE (XEXP (x, 0)) == MULT
- && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
- && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
- {
- type = (GET_CODE (x) == SIGN_EXTRACT)
- ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
- index = XEXP (XEXP (x, 0), 0);
- shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
- if (INTVAL (XEXP (x, 1)) != 32 + shift
- || INTVAL (XEXP (x, 2)) != 0)
- shift = -1;
- }
/* (and:DI (mult:DI (reg:DI) (const_int scale))
(const_int 0xffffffff<<shift)) */
else if (GET_CODE (x) == AND
@@ -8868,22 +8825,6 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x,
if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
shift = -1;
}
- /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
- else if ((GET_CODE (x) == SIGN_EXTRACT
- || GET_CODE (x) == ZERO_EXTRACT)
- && GET_MODE (x) == DImode
- && GET_CODE (XEXP (x, 0)) == ASHIFT
- && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
- && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
- {
- type = (GET_CODE (x) == SIGN_EXTRACT)
- ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
- index = XEXP (XEXP (x, 0), 0);
- shift = INTVAL (XEXP (XEXP (x, 0), 1));
- if (INTVAL (XEXP (x, 1)) != 32 + shift
- || INTVAL (XEXP (x, 2)) != 0)
- shift = -1;
- }
/* (and:DI (ashift:DI (reg:DI) (const_int shift))
(const_int 0xffffffff<<shift)) */
else if (GET_CODE (x) == AND
@@ -11259,16 +11200,6 @@ aarch64_strip_extend (rtx x, bool strip_shift)
if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
return op;
- /* Zero and sign extraction of a widened value. */
- if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
- && XEXP (op, 2) == const0_rtx
- && GET_CODE (XEXP (op, 0)) == MULT
- && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1),
- XEXP (op, 1)))
- return XEXP (XEXP (op, 0), 0);
-
- /* It can also be represented (for zero-extend) as an AND with an
- immediate. */
if (GET_CODE (op) == AND
&& GET_CODE (XEXP (op, 0)) == MULT
&& CONST_INT_P (XEXP (XEXP (op, 0), 1))
@@ -11606,31 +11537,11 @@ aarch64_branch_cost (bool speed_p, bool predictable_p)
/* Return true if the RTX X in mode MODE is a zero or sign extract
usable in an ADD or SUB (extended register) instruction. */
static bool
-aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
-{
- /* Catch add with a sign extract.
- This is add_<optab><mode>_multp2. */
- if (GET_CODE (x) == SIGN_EXTRACT
- || GET_CODE (x) == ZERO_EXTRACT)
- {
- rtx op0 = XEXP (x, 0);
- rtx op1 = XEXP (x, 1);
- rtx op2 = XEXP (x, 2);
-
- if (GET_CODE (op0) == MULT
- && CONST_INT_P (op1)
- && op2 == const0_rtx
- && CONST_INT_P (XEXP (op0, 1))
- && aarch64_is_extend_from_extract (mode,
- XEXP (op0, 1),
- op1))
- {
- return true;
- }
- }
+aarch64_rtx_arith_op_extract_p (rtx x)
+{
/* The simple case <ARITH>, XD, XN, XM, [us]xt.
No shift. */
- else if (GET_CODE (x) == SIGN_EXTEND
+ if (GET_CODE (x) == SIGN_EXTEND
|| GET_CODE (x) == ZERO_EXTEND)
return REG_P (XEXP (x, 0));
@@ -12319,7 +12230,7 @@ cost_minus:
/* Look for SUB (extended register). */
if (is_a <scalar_int_mode> (mode, &int_mode)
- && aarch64_rtx_arith_op_extract_p (op1, int_mode))
+ && aarch64_rtx_arith_op_extract_p (op1))
{
if (speed)
*cost += extra_cost->alu.extend_arith;
@@ -12399,7 +12310,7 @@ cost_plus:
/* Look for ADD (extended register). */
if (is_a <scalar_int_mode> (mode, &int_mode)
- && aarch64_rtx_arith_op_extract_p (op0, int_mode))
+ && aarch64_rtx_arith_op_extract_p (op0))
{
if (speed)
*cost += extra_cost->alu.extend_arith;
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr96998.c b/gcc/testsuite/gcc.c-torture/compile/pr96998.c
new file mode 100644
index 0000000..a75d5dc
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr96998.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target arm*-*-* aarch64*-*-* } } */
+
+int h(void);
+struct c d;
+struct c {
+ int e[1];
+};
+
+void f(void) {
+ int g;
+ for (;; g = h()) {
+ int *i = &d.e[g];
+ asm("" : "=Q"(*i));
+ }
+}
More information about the Gcc-patches
mailing list