[PATCH] aarch64: Add extend-as-extract-with-shift pattern [PR96998]

Alex Coplan alex.coplan@arm.com
Wed Sep 16 17:08:14 GMT 2020


Hi Richard,

On 10/09/2020 19:18, Richard Sandiford wrote:
> Alex Coplan <alex.coplan@arm.com> writes:
> > Hello,
> >
> > Since r11-2903-g6b3034eaba83935d9f6dfb20d2efbdb34b5b00bf introduced a
> > canonicalization from mult to shift on address reloads, a missing
> > pattern in the AArch64 backend was exposed.
> >
> > In particular, we were missing the ashift variant of
> > *add_<optab><mode>_multp2 (this mult variant is redundant and was
> > removed in r11-3033-g2f8ae301f6a125f50b0a758047fcddae7b68daa8).
> >
> > This patch adds that missing pattern (fixing PR96998), updates
> > aarch64_is_extend_from_extract() to work for the shift pattern (instead
> > of the redundant mult variant) and updates callers in the cost
> > calculations to apply the costing for the shift variant instead.
> 
> Hmm.  I think we should instead fix this in combine.

Ok, thanks for the review. Please find a revised patch attached which
fixes this in combine instead.

> If we do that, we should be able to remove the handling of
> extract-based addresses in aarch64_classify_index & co.

Nice. I've tried to remove all the redundant extract-based address
handling in the AArch64 backend as a result.

The revised patch also fixes up a comment in combine which appears to
have suffered from bitrot.

Testing:
 * Bootstrap and regtest on aarch64-none-linux-gnu,
   arm-none-linux-gnueabihf, and x86_64-pc-linux-gnu.
 * Regtested an x64 -> aarch64-none-elf cross with -mabi=ilp32.
 * Checked that we no longer ICE when building linux-next on AArch64.

OK for trunk?

Thanks,
Alex

---

gcc/ChangeLog:

	* combine.c (expand_compound_operation): Tweak variable name in
	comment to match source.
	(make_extraction): Handle mult by power of two in addition to
	ashift.
	* config/aarch64/aarch64.c (aarch64_is_extend_from_extract): Delete.
	(aarch64_classify_index): Remove redundant extend-as-extract handling.
	(aarch64_strip_extend): Likewise.
	(aarch64_rtx_arith_op_extract_p): Likewise, remove now-unused
	parameter. Update callers...
	(aarch64_rtx_costs): ... here.

gcc/testsuite/ChangeLog:

	* gcc.c-torture/compile/pr96998.c: New test.
-------------- next part --------------
diff --git a/gcc/combine.c b/gcc/combine.c
index c88382e..cd8e544 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -7419,8 +7419,8 @@ expand_compound_operation (rtx x)
     }
 
   /* If we reach here, we want to return a pair of shifts.  The inner
-     shift is a left shift of BITSIZE - POS - LEN bits.  The outer
-     shift is a right shift of BITSIZE - LEN bits.  It is arithmetic or
+     shift is a left shift of MODEWIDTH - POS - LEN bits.  The outer
+     shift is a right shift of MODEWIDTH - LEN bits.  It is arithmetic or
      logical depending on the value of UNSIGNEDP.
 
      If this was a ZERO_EXTEND or ZERO_EXTRACT, this pair of shifts will be
@@ -7650,20 +7650,29 @@ make_extraction (machine_mode mode, rtx inner, HOST_WIDE_INT pos,
 	is_mode = GET_MODE (SUBREG_REG (inner));
       inner = SUBREG_REG (inner);
     }
-  else if (GET_CODE (inner) == ASHIFT
+  else if ((GET_CODE (inner) == ASHIFT || GET_CODE (inner) == MULT)
 	   && CONST_INT_P (XEXP (inner, 1))
-	   && pos_rtx == 0 && pos == 0
-	   && len > UINTVAL (XEXP (inner, 1)))
-    {
-      /* We're extracting the least significant bits of an rtx
-	 (ashift X (const_int C)), where LEN > C.  Extract the
-	 least significant (LEN - C) bits of X, giving an rtx
-	 whose mode is MODE, then shift it left C times.  */
-      new_rtx = make_extraction (mode, XEXP (inner, 0),
-			     0, 0, len - INTVAL (XEXP (inner, 1)),
-			     unsignedp, in_dest, in_compare);
-      if (new_rtx != 0)
-	return gen_rtx_ASHIFT (mode, new_rtx, XEXP (inner, 1));
+	   && pos_rtx == 0 && pos == 0)
+    {
+      const HOST_WIDE_INT ci = INTVAL (XEXP (inner, 1));
+      const bool mult = GET_CODE (inner) == MULT;
+      const int shift_amt = mult ? exact_log2 (ci) : ci;
+
+      if (shift_amt > 0 && len > (unsigned)shift_amt)
+	{
+	  /* We're extracting the least significant bits of an rtx
+	     (ashift X (const_int C)) or (mult X (const_int (2^C))),
+	     where LEN > C.  Extract the least significant (LEN - C) bits
+	     of X, giving an rtx whose mode is MODE, then shift it left
+	     C times.  */
+	  new_rtx = make_extraction (mode, XEXP (inner, 0),
+				 0, 0, len - shift_amt,
+				 unsignedp, in_dest, in_compare);
+	  if (new_rtx)
+	    return mult
+	    ? gen_rtx_MULT (mode, new_rtx, XEXP (inner, 1))
+	    : gen_rtx_ASHIFT (mode, new_rtx, XEXP (inner, 1));
+	}
     }
   else if (GET_CODE (inner) == TRUNCATE
 	   /* If trying or potentionally trying to extract
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b251f39..56738ae 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2811,33 +2811,6 @@ aarch64_is_noplt_call_p (rtx sym)
   return false;
 }
 
-/* Return true if the offsets to a zero/sign-extract operation
-   represent an expression that matches an extend operation.  The
-   operands represent the parameters from
-
-   (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)).  */
-bool
-aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm,
-				rtx extract_imm)
-{
-  HOST_WIDE_INT mult_val, extract_val;
-
-  if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
-    return false;
-
-  mult_val = INTVAL (mult_imm);
-  extract_val = INTVAL (extract_imm);
-
-  if (extract_val > 8
-      && extract_val < GET_MODE_BITSIZE (mode)
-      && exact_log2 (extract_val & ~7) > 0
-      && (extract_val & 7) <= 4
-      && mult_val == (1 << (extract_val & 7)))
-    return true;
-
-  return false;
-}
-
 /* Emit an insn that's a simple single-set.  Both the operands must be
    known to be valid.  */
 inline static rtx_insn *
@@ -8837,22 +8810,6 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x,
       index = XEXP (XEXP (x, 0), 0);
       shift = INTVAL (XEXP (x, 1));
     }
-  /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
-  else if ((GET_CODE (x) == SIGN_EXTRACT
-	    || GET_CODE (x) == ZERO_EXTRACT)
-	   && GET_MODE (x) == DImode
-	   && GET_CODE (XEXP (x, 0)) == MULT
-	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
-	   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
-    {
-      type = (GET_CODE (x) == SIGN_EXTRACT)
-	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
-      index = XEXP (XEXP (x, 0), 0);
-      shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
-      if (INTVAL (XEXP (x, 1)) != 32 + shift
-	  || INTVAL (XEXP (x, 2)) != 0)
-	shift = -1;
-    }
   /* (and:DI (mult:DI (reg:DI) (const_int scale))
      (const_int 0xffffffff<<shift)) */
   else if (GET_CODE (x) == AND
@@ -8868,22 +8825,6 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x,
       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
 	shift = -1;
     }
-  /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
-  else if ((GET_CODE (x) == SIGN_EXTRACT
-	    || GET_CODE (x) == ZERO_EXTRACT)
-	   && GET_MODE (x) == DImode
-	   && GET_CODE (XEXP (x, 0)) == ASHIFT
-	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
-	   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
-    {
-      type = (GET_CODE (x) == SIGN_EXTRACT)
-	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
-      index = XEXP (XEXP (x, 0), 0);
-      shift = INTVAL (XEXP (XEXP (x, 0), 1));
-      if (INTVAL (XEXP (x, 1)) != 32 + shift
-	  || INTVAL (XEXP (x, 2)) != 0)
-	shift = -1;
-    }
   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
      (const_int 0xffffffff<<shift)) */
   else if (GET_CODE (x) == AND
@@ -11259,16 +11200,6 @@ aarch64_strip_extend (rtx x, bool strip_shift)
   if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
     return op;
 
-  /* Zero and sign extraction of a widened value.  */
-  if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
-      && XEXP (op, 2) == const0_rtx
-      && GET_CODE (XEXP (op, 0)) == MULT
-      && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1),
-					 XEXP (op, 1)))
-    return XEXP (XEXP (op, 0), 0);
-
-  /* It can also be represented (for zero-extend) as an AND with an
-     immediate.  */
   if (GET_CODE (op) == AND
       && GET_CODE (XEXP (op, 0)) == MULT
       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
@@ -11606,31 +11537,11 @@ aarch64_branch_cost (bool speed_p, bool predictable_p)
 /* Return true if the RTX X in mode MODE is a zero or sign extract
    usable in an ADD or SUB (extended register) instruction.  */
 static bool
-aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
-{
-  /* Catch add with a sign extract.
-     This is add_<optab><mode>_multp2.  */
-  if (GET_CODE (x) == SIGN_EXTRACT
-      || GET_CODE (x) == ZERO_EXTRACT)
-    {
-      rtx op0 = XEXP (x, 0);
-      rtx op1 = XEXP (x, 1);
-      rtx op2 = XEXP (x, 2);
-
-      if (GET_CODE (op0) == MULT
-	  && CONST_INT_P (op1)
-	  && op2 == const0_rtx
-	  && CONST_INT_P (XEXP (op0, 1))
-	  && aarch64_is_extend_from_extract (mode,
-					     XEXP (op0, 1),
-					     op1))
-	{
-	  return true;
-	}
-    }
+aarch64_rtx_arith_op_extract_p (rtx x)
+{
   /* The simple case <ARITH>, XD, XN, XM, [us]xt.
      No shift.  */
-  else if (GET_CODE (x) == SIGN_EXTEND
+  if (GET_CODE (x) == SIGN_EXTEND
 	   || GET_CODE (x) == ZERO_EXTEND)
     return REG_P (XEXP (x, 0));
 
@@ -12319,7 +12230,7 @@ cost_minus:
 
 	/* Look for SUB (extended register).  */
 	if (is_a <scalar_int_mode> (mode, &int_mode)
-	    && aarch64_rtx_arith_op_extract_p (op1, int_mode))
+	    && aarch64_rtx_arith_op_extract_p (op1))
 	  {
 	    if (speed)
 	      *cost += extra_cost->alu.extend_arith;
@@ -12399,7 +12310,7 @@ cost_plus:
 
 	/* Look for ADD (extended register).  */
 	if (is_a <scalar_int_mode> (mode, &int_mode)
-	    && aarch64_rtx_arith_op_extract_p (op0, int_mode))
+	    && aarch64_rtx_arith_op_extract_p (op0))
 	  {
 	    if (speed)
 	      *cost += extra_cost->alu.extend_arith;
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr96998.c b/gcc/testsuite/gcc.c-torture/compile/pr96998.c
new file mode 100644
index 0000000..a75d5dc
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr96998.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target arm*-*-* aarch64*-*-* } } */
+
+int h(void);
+struct c d;
+struct c {
+  int e[1];
+};
+
+void f(void) {
+  int g;
+  for (;; g = h()) {
+    int *i = &d.e[g];
+    asm("" : "=Q"(*i));
+  }
+}


More information about the Gcc-patches mailing list