This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words
Thank you, following is the revised patch that addresses cortex M3
erratum and new diff for ldmstm.md.
Index: thumb2.md
===================================================================
--- thumb2.md (revision 163363)
+++ thumb2.md (working copy)
@@ -1257,3 +1257,205 @@ (define_peephole2
"
operands[2] = GEN_INT (32 - INTVAL (operands[2]));
")
+
+(define_insn "*thumb2_ldrd"
+ [(parallel [(set (match_operand:SI 0 "s_register_operand" "")
+ (mem:SI (plus:SI
+ (match_operand:SI 2 "s_register_operand" "rk")
+ (match_operand:SI 3 "const_int_operand" "Py"))))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (mem:SI (plus:SI (match_dup 2)
+ (match_operand:SI 4 "const_int_operand" "Py"))))])]
+ "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
+ operands[2], operands[3], operands[4], 1)"
+ "*
+ {
+ HOST_WIDE_INT offset1 = INTVAL (operands[3]);
+ HOST_WIDE_INT offset2 = INTVAL (operands[4]);
+ if (offset1 > offset2)
+ {
+ /* Swap the operands so that memory [base+offset1] is loaded into
+ operands[0]. */
+ rtx tmp = operands[0];
+ operands[0] = operands[1];
+ operands[1] = tmp;
+ tmp = operands[3];
+ operands[3] = operands[4];
+ operands[4] = tmp;
+ offset1 = INTVAL (operands[3]);
+ offset2 = INTVAL (operands[4]);
+ }
+ if (fix_cm3_ldrd && (operands[2] == operands[0]))
+ {
+ if (offset1 <= -256)
+ {
+ output_asm_insn (\"sub\\t%2, %2, %n3\", operands);
+ output_asm_insn (\"ldr\\t%1, [%2, #4]\", operands);
+ output_asm_insn (\"ldr\\t%0, [%2]\", operands);
+ }
+ else
+ {
+ output_asm_insn (\"ldr\\t%1, [%2, %4]\", operands);
+ output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands);
+ }
+ return \"\";
+ }
+ else
+ return \"ldrd\\t%0, %1, [%2, %3]\";
+ }"
+)
+
+(define_insn "*thumb2_ldrd_reg1"
+ [(parallel [(set (match_operand:SI 0 "s_register_operand" "")
+ (mem:SI (match_operand:SI 2 "s_register_operand" "rk")))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (mem:SI (plus:SI (match_dup 2)
+ (match_operand:SI 3 "const_int_operand" "Py"))))])]
+ "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
+ operands[2], 0, operands[3], 1)"
+ "*
+ {
+ HOST_WIDE_INT offset2 = INTVAL (operands[3]);
+ if (offset2 == 4)
+ {
+ if (fix_cm3_ldrd && (operands[2] == operands[0]))
+ {
+ output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands);
+ output_asm_insn (\"ldr\\t%0, [%2]\", operands);
+ return \"\";
+ }
+ return \"ldrd\\t%0, %1, [%2]\";
+ }
+ else
+ {
+ if (fix_cm3_ldrd && (operands[2] == operands[1]))
+ {
+ output_asm_insn (\"ldr\\t%0, [%2]\", operands);
+ output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands);
+ return \"\";
+ }
+ return \"ldrd\\t%1, %0, [%2, %3]\";
+ }
+ }"
+)
+
+(define_insn "*thumb2_ldrd_reg2"
+ [(parallel [(set (match_operand:SI 0 "s_register_operand" "")
+ (mem:SI (plus:SI
+ (match_operand:SI 2 "s_register_operand" "rk")
+ (match_operand:SI 3 "const_int_operand" "Py"))))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (mem:SI (match_dup 2)))])]
+ "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
+ operands[2], operands[3], 0, 1)"
+ "*
+ {
+ HOST_WIDE_INT offset1 = INTVAL (operands[3]);
+ if (offset1 == -4)
+ {
+ if (fix_cm3_ldrd && (operands[2] == operands[0]))
+ {
+ output_asm_insn (\"ldr\\t%1, [%2]\", operands);
+ output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands);
+ return \"\";
+ }
+ return \"ldrd\\t%0, %1, [%2, %3]\";
+ }
+ else
+ {
+ if (fix_cm3_ldrd && (operands[2] == operands[1]))
+ {
+ output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands);
+ output_asm_insn (\"ldr\\t%1, [%2]\", operands);
+ return \"\";
+ }
+ return \"ldrd\\t%1, %0, [%2]\";
+ }
+ }"
+)
+
+(define_peephole2
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (match_operand:SI 2 "memory_operand" ""))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (match_operand:SI 3 "memory_operand" ""))]
+ "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1],
+ operands[2], operands[3], 1)"
+ [(parallel [(set (match_operand:SI 0 "s_register_operand" "")
+ (match_operand:SI 2 "memory_operand" ""))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (match_operand:SI 3 "memory_operand" ""))])]
+ ""
+)
+
+(define_insn "*thumb2_strd"
+ [(parallel [(set (mem:SI
+ (plus:SI (match_operand:SI 2 "s_register_operand" "rk")
+ (match_operand:SI 3 "const_int_operand" "Py")))
+ (match_operand:SI 0 "s_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 2)
+ (match_operand:SI 4 "const_int_operand" "Py")))
+ (match_operand:SI 1 "s_register_operand" ""))])]
+ "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
+ operands[2], operands[3], operands[4], 0)"
+ "*
+ {
+ HOST_WIDE_INT offset1 = INTVAL (operands[3]);
+ HOST_WIDE_INT offset2 = INTVAL (operands[4]);
+ if (offset1 < offset2 )
+ return \"strd\\t%0, %1, [%2, %3]\";
+ else
+ return \"strd\\t%1, %0, [%2, %4]\";
+ }"
+)
+
+(define_insn "*thumb2_strd_reg1"
+ [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))
+ (match_operand:SI 0 "s_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 2)
+ (match_operand:SI 3 "const_int_operand" "Py")))
+ (match_operand:SI 1 "s_register_operand" ""))])]
+ "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
+ operands[2], 0, operands[3], 0)"
+ "*
+ {
+ HOST_WIDE_INT offset2 = INTVAL (operands[3]);
+ if (offset2 == 4)
+ return \"strd\\t%0, %1, [%2]\";
+ else
+ return \"strd\\t%1, %0, [%2, %3]\";
+ }"
+)
+
+(define_insn "*thumb2_strd_reg2"
+ [(parallel [(set (mem:SI (plus:SI
+ (match_operand:SI 2 "s_register_operand" "rk")
+ (match_operand:SI 3 "const_int_operand" "Py")))
+ (match_operand:SI 0 "s_register_operand" ""))
+ (set (mem:SI (match_dup 2))
+ (match_operand:SI 1 "s_register_operand" ""))])]
+ "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
+ operands[2], operands[3], 0, 0)"
+ "*
+ {
+ HOST_WIDE_INT offset1 = INTVAL (operands[3]);
+ if (offset1 == -4)
+ return \"strd\\t%0, %1, [%2, %3]\";
+ else
+ return \"strd\\t%1, %0, [%2]\";
+ }"
+)
+
+(define_peephole2
+ [(set (match_operand:SI 2 "memory_operand" "")
+ (match_operand:SI 0 "s_register_operand" ""))
+ (set (match_operand:SI 3 "memory_operand" "")
+ (match_operand:SI 1 "s_register_operand" ""))]
+ "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1],
+ operands[2], operands[3], 0)"
+ [(parallel [(set (match_operand:SI 2 "memory_operand" "")
+ (match_operand:SI 0 "s_register_operand" ""))
+ (set (match_operand:SI 3 "memory_operand" "")
+ (match_operand:SI 1 "s_register_operand" ""))])]
+ ""
+)
Index: arm.c
===================================================================
--- arm.c (revision 163363)
+++ arm.c (working copy)
@@ -22959,4 +22959,85 @@ arm_expand_sync (enum machine_mode mode,
}
}
+/* Check the legality of operands in an ldrd/strd instruction. */
+bool
+thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base,
+ rtx off1, rtx off2, bool ldrd)
+{
+ HOST_WIDE_INT offset1 = 0;
+ HOST_WIDE_INT offset2 = 0;
+
+ if (off1 != NULL)
+ offset1 = INTVAL (off1);
+ if (off2 != NULL)
+ offset2 = INTVAL (off2);
+
+ if (ldrd && (reg1 == reg2))
+ return false;
+
+ if ((offset1 + 4) == offset2)
+ return true;
+ if ((offset2 + 4) == offset1)
+ return true;
+
+ return false;
+}
+
+/* Check if the two memory accesses can be merged to an ldrd/strd instruction.
+ That is they use the same base register, and the gap between constant
+ offsets should be 4. */
+bool
+thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd)
+{
+ rtx base1, base2, op1;
+ rtx addr1 = XEXP (mem1, 0);
+ rtx addr2 = XEXP (mem2, 0);
+ HOST_WIDE_INT offset1 = 0;
+ HOST_WIDE_INT offset2 = 0;
+
+ if (REG_P (addr1))
+ base1 = addr1;
+ else if (GET_CODE (addr1) == PLUS)
+ {
+ base1 = XEXP (addr1, 0);
+ op1 = XEXP (addr1, 1);
+ if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT))
+ return false;
+ offset1 = INTVAL (op1);
+ }
+ else
+ return false;
+
+ if (REG_P (addr2))
+ base2 = addr2;
+ else if (GET_CODE (addr2) == PLUS)
+ {
+ base2 = XEXP (addr2, 0);
+ op1 = XEXP (addr2, 1);
+ if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT))
+ return false;
+ offset2 = INTVAL (op1);
+ }
+ else
+ return false;
+
+ if (base1 != base2)
+ return false;
+
+ if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0))
+ return false;
+ if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0))
+ return false;
+
+ if (ldrd && ((reg1 == reg2) || (reg1 == base1)))
+ return false;
+
+ if ((offset1 + 4) == offset2)
+ return true;
+ if ((offset2 + 4) == offset1)
+ return true;
+
+ return false;
+}
+
#include "gt-arm.h"
Index: arm-protos.h
===================================================================
--- arm-protos.h (revision 163363)
+++ arm-protos.h (working copy)
@@ -149,7 +149,8 @@ extern void arm_expand_sync (enum machin
extern const char *arm_output_memory_barrier (rtx *);
extern const char *arm_output_sync_insn (rtx, rtx *);
extern unsigned int arm_sync_loop_insns (rtx , rtx *);
-
+extern bool thumb2_check_ldrd_operands (rtx, rtx, rtx, rtx, rtx, bool);
+extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool);
extern bool arm_output_addr_const_extra (FILE *, rtx);
#if defined TREE_CODE
Index: ldmstm.md
===================================================================
--- ldmstm.md (revision 163363)
+++ ldmstm.md (working copy)
@@ -852,7 +852,7 @@ (define_insn "*ldm2_ia"
(set (match_operand:SI 2 "arm_hard_register_operand" "")
(mem:SI (plus:SI (match_dup 3)
(const_int 4))))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
"ldm%(ia%)\t%3, {%1, %2}"
[(set_attr "type" "load2")
(set_attr "predicable" "yes")])
@@ -901,7 +901,7 @@ (define_insn "*stm2_ia"
(match_operand:SI 1 "arm_hard_register_operand" ""))
(set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
(match_operand:SI 2 "arm_hard_register_operand" ""))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
"stm%(ia%)\t%3, {%1, %2}"
[(set_attr "type" "store2")
(set_attr "predicable" "yes")])
@@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db"
(set (match_operand:SI 2 "arm_hard_register_operand" "")
(mem:SI (plus:SI (match_dup 3)
(const_int -4))))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
"ldm%(db%)\t%3, {%1, %2}"
[(set_attr "type" "load2")
(set_attr "predicable" "yes")])
@@ -1067,7 +1067,7 @@ (define_insn "*stm2_db"
(match_operand:SI 1 "arm_hard_register_operand" ""))
(set (mem:SI (plus:SI (match_dup 3) (const_int -4)))
(match_operand:SI 2 "arm_hard_register_operand" ""))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
"stm%(db%)\t%3, {%1, %2}"
[(set_attr "type" "store2")
(set_attr "predicable" "yes")])
Index: constraints.md
===================================================================
--- constraints.md (revision 163363)
+++ constraints.md (working copy)
@@ -31,7 +31,7 @@
;; The following multi-letter normal constraints have been used:
;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di
;; in Thumb-1 state: Pa, Pb, Pc, Pd
-;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px
+;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px, Py
;; The following memory constraints have been used:
;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
@@ -189,6 +189,13 @@ (define_constraint "Px"
(and (match_code "const_int")
(match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1")))
+(define_constraint "Py"
+ "@internal In Thumb-2 state a constant that is a multiple of 4 in the
+ range -1020 to 1024"
+ (and (match_code "const_int")
+ (match_test "TARGET_THUMB2 && ival >= -1020 && ival <= 1024
+ && (ival & 3) == 0")))
+
(define_constraint "G"
"In ARM/Thumb-2 state a valid FPA immediate constant."
(and (match_code "const_double")
Index: pr40457-1.c
===================================================================
--- pr40457-1.c (revision 163363)
+++ pr40457-1.c (working copy)
@@ -1,9 +1,9 @@
-/* { dg-options "-Os" } */
+/* { dg-options "-O2" } */
/* { dg-do compile } */
int bar(int* p)
{
- int x = p[0] + p[1];
+ int x = p[0] + p[1] + p[2];
return x;
}
Index: pr40457-2.c
===================================================================
--- pr40457-2.c (revision 163363)
+++ pr40457-2.c (working copy)
@@ -5,6 +5,7 @@ void foo(int* p)
{
p[0] = 1;
p[1] = 0;
+ p[2] = 2;
}
/* { dg-final { scan-assembler "stm" } } */
Index: pr45335.c
===================================================================
--- pr45335.c (revision 0)
+++ pr45335.c (revision 0)
@@ -0,0 +1,22 @@
+/* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-final { scan-assembler "ldrd" } } */
+/* { dg-final { scan-assembler "strd" } } */
+
+struct S
+{
+ void* p1;
+ void* p2;
+ void* p3;
+ void* p4;
+};
+
+extern printf(char*, ...);
+
+void foo1(struct S* fp, struct S* otherSaveArea)
+{
+ struct S* saveA = fp - 1;
+ printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea);
+ printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n",
+ saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp);
+}
On Tue, Aug 24, 2010 at 9:55 PM, Ramana Radhakrishnan
<ramana.radhakrishnan@arm.com> wrote:
>
> On Tue, 2010-08-24 at 21:09 +0800, Carrot Wei wrote:
>> The patterns in original patch conflict with ldm2/stm2 patterns. In thumb2
>> ldrd/strd instructions are more flexible than ldm2/stm2, we don't have any
>> reason to continue to use ldm2/stm2. In this new patch I removed the thumb2
>> support of ldm2/stm2. The ldm2/stm2 with update patterns are not affected.
>
>
> I can't approve / reject your patch but ...
>
> You need to consider the case for the Cortex M3 where we don't want to
> use ldrd's with overlapping address and destination registers because it
> may trigger a hardware erratum, thus you need to allow ldm2's on the M3
> and *not* generate any ldrd's in such situations. Look at the
> implementation of -mfix-cortexm3-ldrd
>
> Also it's not clear which patterns in ldmstm.md you are modifying
> without applying the patch , could you regenerate your patch with
>
> svn diff --diff-cmd "diff" -x "-aup -F^(define"
>
> so that we know easily which patterns you are modifying.
>
>
> Thanks,
> Ramana
>
>