This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PING][PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words


Ping

On Tue, Oct 5, 2010 at 7:52 PM, Carrot Wei <carrot@google.com> wrote:
> Ping ...
>
> On Sat, Sep 25, 2010 at 9:16 AM, Carrot Wei <carrot@google.com> wrote:
>> Ping again.
>>
>> On Sun, Sep 19, 2010 at 1:59 PM, Carrot Wei <carrot@google.com> wrote:
>>> Ping
>>>
>>> On Sat, Sep 4, 2010 at 8:41 PM, Carrot Wei <carrot@google.com> wrote:
>>>> On Wed, Sep 1, 2010 at 11:22 PM, Richard Earnshaw <rearnsha@arm.com> wrote:
>>>>> If you submit an updated patch, please re-include the changelog entry,
>>>>> even if it's the same.
>>>>>
>>>>> There are two obvious problems with this patch:
>>>>>
>>>>> 1) You presume that ldrd is always cheaper than ldm(2 regs). ?This isn't
>>>>> the case on Cortex-a9. ?I'm not expecting you to work out all the
>>>>> details of when A9 should use LDM and when it should use ldrd, but your
>>>>> code needs to ascertain the costs of each alternative and make a
>>>>> decision based on that answer, not on a static choice.
>>>>>
>>>>> 2) Your code fails to check for volatile mems. ?These must not be
>>>>> transformed and the original load/store instructions must be preserved.
>>>>>
>>>>
>>>> 1. A new function thumb2_prefer_ldmstm is used to choose ldm/stm or ldrd/strd.
>>>> The default behavior is to output ldrd/strd. One should update this function if
>>>> ldm/stm is better.
>>>>
>>>> 2. Function thumb2_legitimate_ldrd_p is updated to check volatile memory access.
>>>>
>>>> Following is the new patch
>>>>
>>>> ChangeLog:
>>>> 2010-09-04 ?Wei Guozhi ?<carrot@google.com>
>>>>
>>>> ? ? ? ?PR target/45335
>>>> ? ? ? ?* gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1,
>>>> ? ? ? ?thumb2_ldrd_reg2 and peephole2): New insn pattern and related
>>>> ? ? ? ?peephole2.
>>>> ? ? ? ?(thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2):
>>>> ? ? ? ?New insn pattern and related peephole2.
>>>> ? ? ? ?* gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function.
>>>> ? ? ? ?(thumb2_check_ldrd_operands): New function.
>>>> ? ? ? ?(thumb2_prefer_ldmstm): New function.
>>>> ? ? ? ?* gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype.
>>>> ? ? ? ?(thumb2_check_ldrd_operands): New prototype.
>>>> ? ? ? ?(thumb2_prefer_ldmstm): New prototype.
>>>> ? ? ? ?* gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db):
>>>> ? ? ? ?Change the ldm/stm patterns with 2 words to ARM only.
>>>> ? ? ? ?* gcc/config/arm/constraints.md (Py): New thumb2 constant constraint
>>>> ? ? ? ?suitable to ldrd/strd instructions.
>>>>
>>>>
>>>> 2010-09-04 ?Wei Guozhi ?<carrot@google.com>
>>>>
>>>> ? ? ? ?PR target/45335
>>>> ? ? ? ?* gcc.target/arm/pr45335.c: New test.
>>>> ? ? ? ?* gcc.target/arm/pr40457-1.c: Changed to load 3 words.
>>>> ? ? ? ?* gcc.target/arm/pr40457-2.c: Changed to store 3 words.
>>>>
>>>>
>>>>
>>>> Index: thumb2.md
>>>> ===================================================================
>>>> --- thumb2.md ? (revision 163853)
>>>> +++ thumb2.md ? (working copy)
>>>> @@ -1257,3 +1257,226 @@ (define_peephole2
>>>> ? "
>>>> ? operands[2] = GEN_INT (32 - INTVAL (operands[2]));
>>>> ? ")
>>>> +
>>>> +(define_insn "*thumb2_ldrd"
>>>> + ?[(parallel [(set (match_operand:SI 0 "s_register_operand" "")
>>>> + ? ? ? ? ? ? ? ? ?(mem:SI (plus:SI
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? (match_operand:SI 2 "s_register_operand" "rk")
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? (match_operand:SI 3 "const_int_operand" "Py"))))
>>>> + ? ? ? ? ? ? (set (match_operand:SI 1 "s_register_operand" "")
>>>> + ? ? ? ? ? ? ? ? ?(mem:SI (plus:SI (match_dup 2)
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?(match_operand:SI 4 "const_int_operand" "Py"))))])]
>>>> + ?"TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[2], operands[3], operands[4], 1)"
>>>> + ?"*
>>>> + ?{
>>>> + ? ?HOST_WIDE_INT offset1 = INTVAL (operands[3]);
>>>> + ? ?HOST_WIDE_INT offset2 = INTVAL (operands[4]);
>>>> + ? ?if (offset1 > offset2)
>>>> + ? ? ?{
>>>> + ? ? ? /* Swap the operands so that memory [base+offset1] is loaded into
>>>> + ? ? ? ? ?operands[0]. ?*/
>>>> + ? ? ? rtx tmp = operands[0];
>>>> + ? ? ? operands[0] = operands[1];
>>>> + ? ? ? operands[1] = tmp;
>>>> + ? ? ? tmp = operands[3];
>>>> + ? ? ? operands[3] = operands[4];
>>>> + ? ? ? operands[4] = tmp;
>>>> + ? ? ? offset1 = INTVAL (operands[3]);
>>>> + ? ? ? offset2 = INTVAL (operands[4]);
>>>> + ? ? ?}
>>>> + ? ?if (thumb2_prefer_ldmstm (operands[0], operands[1],
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[2], operands[3], operands[4], 1))
>>>> + ? ? ?return \"ldmdb\\t%2, {%0, %1}\";
>>>> + ? ?else if (fix_cm3_ldrd && (operands[2] == operands[0]))
>>>> + ? ? ?{
>>>> + ? ? ? if (offset1 <= -256)
>>>> + ? ? ? ? {
>>>> + ? ? ? ? ? output_asm_insn (\"sub\\t%2, %2, %n3\", operands);
>>>> + ? ? ? ? ? output_asm_insn (\"ldr\\t%1, [%2, #4]\", operands);
>>>> + ? ? ? ? ? output_asm_insn (\"ldr\\t%0, [%2]\", operands);
>>>> + ? ? ? ? }
>>>> + ? ? ? else
>>>> + ? ? ? ? {
>>>> + ? ? ? ? ? output_asm_insn (\"ldr\\t%1, [%2, %4]\", operands);
>>>> + ? ? ? ? ? output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands);
>>>> + ? ? ? ? }
>>>> + ? ? ? return \"\";
>>>> + ? ? ?}
>>>> + ? ?else
>>>> + ? ? ?return \"ldrd\\t%0, %1, [%2, %3]\";
>>>> + ?}"
>>>> +)
>>>> +
>>>> +(define_insn "*thumb2_ldrd_reg1"
>>>> + ?[(parallel [(set (match_operand:SI 0 "s_register_operand" "")
>>>> + ? ? ? ? ? ? ? ? ?(mem:SI (match_operand:SI 2 "s_register_operand" "rk")))
>>>> + ? ? ? ? ? ? (set (match_operand:SI 1 "s_register_operand" "")
>>>> + ? ? ? ? ? ? ? ? ?(mem:SI (plus:SI (match_dup 2)
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?(match_operand:SI 3 "const_int_operand" "Py"))))])]
>>>> + ?"TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[2], 0, operands[3], 1)"
>>>> + ?"*
>>>> + ?{
>>>> + ? ?HOST_WIDE_INT offset2 = INTVAL (operands[3]);
>>>> + ? ?if (offset2 == 4)
>>>> + ? ? ?{
>>>> + ? ? ? if (thumb2_prefer_ldmstm (operands[0], operands[1],
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[2], 0, operands[3], 1))
>>>> + ? ? ? ? return \"ldmia\\t%2, {%0, %1}\";
>>>> + ? ? ? if (fix_cm3_ldrd && (operands[2] == operands[0]))
>>>> + ? ? ? ? {
>>>> + ? ? ? ? ? output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands);
>>>> + ? ? ? ? ? output_asm_insn (\"ldr\\t%0, [%2]\", operands);
>>>> + ? ? ? ? ? return \"\";
>>>> + ? ? ? ? }
>>>> + ? ? ? return \"ldrd\\t%0, %1, [%2]\";
>>>> + ? ? ?}
>>>> + ? ?else
>>>> + ? ? ?{
>>>> + ? ? ? if (fix_cm3_ldrd && (operands[2] == operands[1]))
>>>> + ? ? ? ? {
>>>> + ? ? ? ? ? output_asm_insn (\"ldr\\t%0, [%2]\", operands);
>>>> + ? ? ? ? ? output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands);
>>>> + ? ? ? ? }
>>>> + ? ? ? return \"ldrd\\t%1, %0, [%2, %3]\";
>>>> + ? ? ?}
>>>> + ?}"
>>>> +)
>>>> +
>>>> +(define_insn "*thumb2_ldrd_reg2"
>>>> + ?[(parallel [(set (match_operand:SI 0 "s_register_operand" "")
>>>> + ? ? ? ? ? ? ? ? ?(mem:SI (plus:SI
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? (match_operand:SI 2 "s_register_operand" "rk")
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? (match_operand:SI 3 "const_int_operand" "Py"))))
>>>> + ? ? ? ? ? ? (set (match_operand:SI 1 "s_register_operand" "")
>>>> + ? ? ? ? ? ? ? ? ?(mem:SI (match_dup 2)))])]
>>>> + ?"TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[2], operands[3], 0, 1)"
>>>> + ?"*
>>>> + ?{
>>>> + ? ?HOST_WIDE_INT offset1 = INTVAL (operands[3]);
>>>> + ? ?if (offset1 == -4)
>>>> + ? ? ?{
>>>> + ? ? ? if (fix_cm3_ldrd && (operands[2] == operands[0]))
>>>> + ? ? ? ? {
>>>> + ? ? ? ? ? output_asm_insn (\"ldr\\t%1, [%2]\", operands);
>>>> + ? ? ? ? ? output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands);
>>>> + ? ? ? ? ? return \"\";
>>>> + ? ? ? ? }
>>>> + ? ? ? return \"ldrd\\t%0, %1, [%2, %3]\";
>>>> + ? ? ?}
>>>> + ? ?else
>>>> + ? ? ?{
>>>> + ? ? ? if (thumb2_prefer_ldmstm (operands[0], operands[1],
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[2], operands[3], 0, 1))
>>>> + ? ? ? ? return \"ldmia\\t%2, {%1, %0}\";
>>>> + ? ? ? if (fix_cm3_ldrd && (operands[2] == operands[1]))
>>>> + ? ? ? ? {
>>>> + ? ? ? ? ? output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands);
>>>> + ? ? ? ? ? output_asm_insn (\"ldr\\t%1, [%2]\", operands);
>>>> + ? ? ? ? ? return \"\";
>>>> + ? ? ? ? }
>>>> + ? ? ? return \"ldrd\\t%1, %0, [%2]\";
>>>> + ? ? ?}
>>>> + ?}"
>>>> +)
>>>> +
>>>> +(define_peephole2
>>>> + ?[(set (match_operand:SI 0 "s_register_operand" "")
>>>> + ? ? ? (match_operand:SI 2 "memory_operand" ""))
>>>> + ? (set (match_operand:SI 1 "s_register_operand" "")
>>>> + ? ? ? (match_operand:SI 3 "memory_operand" ""))]
>>>> + ?"TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1],
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[2], operands[3], 1)"
>>>> + ?[(parallel [(set (match_operand:SI 0 "s_register_operand" "")
>>>> + ? ? ? ? ? ? ? ? ?(match_operand:SI 2 "memory_operand" ""))
>>>> + ? ? ? ? ? ? (set (match_operand:SI 1 "s_register_operand" "")
>>>> + ? ? ? ? ? ? ? ? ?(match_operand:SI 3 "memory_operand" ""))])]
>>>> + ?""
>>>> +)
>>>> +
>>>> +(define_insn "*thumb2_strd"
>>>> + ?[(parallel [(set (mem:SI
>>>> + ? ? ? ? ? ? ? ? ? ? ? (plus:SI (match_operand:SI 2 "s_register_operand" "rk")
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?(match_operand:SI 3 "const_int_operand" "Py")))
>>>> + ? ? ? ? ? ? ? ? ?(match_operand:SI 0 "s_register_operand" ""))
>>>> + ? ? ? ? ? ? (set (mem:SI (plus:SI (match_dup 2)
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?(match_operand:SI 4 "const_int_operand" "Py")))
>>>> + ? ? ? ? ? ? ? ? ?(match_operand:SI 1 "s_register_operand" ""))])]
>>>> + ?"TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[2], operands[3], operands[4], 0)"
>>>> + ?"*
>>>> + ?{
>>>> + ? ?HOST_WIDE_INT offset1 = INTVAL (operands[3]);
>>>> + ? ?HOST_WIDE_INT offset2 = INTVAL (operands[4]);
>>>> + ? ?if (thumb2_prefer_ldmstm (operands[0], operands[1],
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[2], operands[3], operands[4], 0))
>>>> + ? ? ?return \"stmdb\\t%2, {%0, %1}\";
>>>> + ? ?if (offset1 < offset2 )
>>>> + ? ? ?return \"strd\\t%0, %1, [%2, %3]\";
>>>> + ? ?else
>>>> + ? ? ?return \"strd\\t%1, %0, [%2, %4]\";
>>>> + ?}"
>>>> +)
>>>> +
>>>> +(define_insn "*thumb2_strd_reg1"
>>>> + ?[(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))
>>>> + ? ? ? ? ? ? ? ? ?(match_operand:SI 0 "s_register_operand" ""))
>>>> + ? ? ? ? ? ? (set (mem:SI (plus:SI (match_dup 2)
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? (match_operand:SI 3 "const_int_operand" "Py")))
>>>> + ? ? ? ? ? ? ? ? ?(match_operand:SI 1 "s_register_operand" ""))])]
>>>> + ?"TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[2], 0, operands[3], 0)"
>>>> + ?"*
>>>> + ?{
>>>> + ? ?HOST_WIDE_INT offset2 = INTVAL (operands[3]);
>>>> + ? ?if (offset2 == 4)
>>>> + ? ? ?{
>>>> + ? ? ? if (thumb2_prefer_ldmstm (operands[0], operands[1],
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[2], 0, operands[3], 0))
>>>> + ? ? ? ? return \"stmia\\t%2, {%0, %1}\";
>>>> + ? ? ? return \"strd\\t%0, %1, [%2]\";
>>>> + ? ? ?}
>>>> + ? ?else
>>>> + ? ? ?return \"strd\\t%1, %0, [%2, %3]\";
>>>> + ?}"
>>>> +)
>>>> +
>>>> +(define_insn "*thumb2_strd_reg2"
>>>> + ?[(parallel [(set (mem:SI (plus:SI
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? (match_operand:SI 2 "s_register_operand" "rk")
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? (match_operand:SI 3 "const_int_operand" "Py")))
>>>> + ? ? ? ? ? ? ? ? ?(match_operand:SI 0 "s_register_operand" ""))
>>>> + ? ? ? ? ? ? (set (mem:SI (match_dup 2))
>>>> + ? ? ? ? ? ? ? ? ?(match_operand:SI 1 "s_register_operand" ""))])]
>>>> + ?"TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[2], operands[3], 0, 0)"
>>>> + ?"*
>>>> + ?{
>>>> + ? ?HOST_WIDE_INT offset1 = INTVAL (operands[3]);
>>>> + ? ?if (offset1 == -4)
>>>> + ? ? ?return \"strd\\t%0, %1, [%2, %3]\";
>>>> + ? ?else
>>>> + ? ? ?{
>>>> + ? ? ? if (thumb2_prefer_ldmstm (operands[0], operands[1],
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[2], operands[3], 0, 0))
>>>> + ? ? ? ? return \"stmia\\t%2, {%1, %0}\";
>>>> + ? ? ? return \"strd\\t%1, %0, [%2]\";
>>>> + ? ? ?}
>>>> + ?}"
>>>> +)
>>>> +
>>>> +(define_peephole2
>>>> + ?[(set (match_operand:SI 2 "memory_operand" "")
>>>> + ? ? ? (match_operand:SI 0 "s_register_operand" ""))
>>>> + ? (set (match_operand:SI 3 "memory_operand" "")
>>>> + ? ? ? (match_operand:SI 1 "s_register_operand" ""))]
>>>> + ?"TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1],
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? operands[2], operands[3], 0)"
>>>> + ?[(parallel [(set (match_operand:SI 2 "memory_operand" "")
>>>> + ? ? ? ? ? ? ? ? ?(match_operand:SI 0 "s_register_operand" ""))
>>>> + ? ? ? ? ? ? (set (match_operand:SI 3 "memory_operand" "")
>>>> + ? ? ? ? ? ? ? ? ?(match_operand:SI 1 "s_register_operand" ""))])]
>>>> + ?""
>>>> +)
>>>> Index: arm.c
>>>> ===================================================================
>>>> --- arm.c ? ? ? (revision 163853)
>>>> +++ arm.c ? ? ? (working copy)
>>>> @@ -22976,4 +22976,125 @@ arm_expand_sync (enum machine_mode mode,
>>>> ? ? }
>>>> ?}
>>>>
>>>> +/* Check the legality of operands in an ldrd/strd instruction. ?*/
>>>> +bool
>>>> +thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base,
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? rtx off1, rtx off2, bool ldrd)
>>>> +{
>>>> + ?HOST_WIDE_INT offset1 = 0;
>>>> + ?HOST_WIDE_INT offset2 = 0;
>>>> +
>>>> + ?if (off1 != NULL)
>>>> + ? ?offset1 = INTVAL (off1);
>>>> + ?if (off2 != NULL)
>>>> + ? ?offset2 = INTVAL (off2);
>>>> +
>>>> + ?if (ldrd && (reg1 == reg2))
>>>> + ? ?return false;
>>>> +
>>>> + ?if ((offset1 + 4) == offset2)
>>>> + ? ?return true;
>>>> + ?if ((offset2 + 4) == offset1)
>>>> + ? ?return true;
>>>> +
>>>> + ?return false;
>>>> +}
>>>> +
>>>> +/* Check if the two memory accesses can be merged to an ldrd/strd instruction.
>>>> + ? That is they use the same base register, and the gap between constant
>>>> + ? offsets should be 4. ?*/
>>>> +bool
>>>> +thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd)
>>>> +{
>>>> + ?rtx base1, base2, op1;
>>>> + ?rtx addr1 = XEXP (mem1, 0);
>>>> + ?rtx addr2 = XEXP (mem2, 0);
>>>> + ?HOST_WIDE_INT offset1 = 0;
>>>> + ?HOST_WIDE_INT offset2 = 0;
>>>> +
>>>> + ?if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
>>>> + ? ?return false;
>>>> +
>>>> + ?if (REG_P (addr1))
>>>> + ? ?base1 = addr1;
>>>> + ?else if (GET_CODE (addr1) == PLUS)
>>>> + ? ?{
>>>> + ? ? ?base1 = XEXP (addr1, 0);
>>>> + ? ? ?op1 = XEXP (addr1, 1);
>>>> + ? ? ?if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT))
>>>> + ? ? ? return false;
>>>> + ? ? ?offset1 = INTVAL (op1);
>>>> + ? ?}
>>>> + ?else
>>>> + ? ?return false;
>>>> +
>>>> + ?if (REG_P (addr2))
>>>> + ? ?base2 = addr2;
>>>> + ?else if (GET_CODE (addr2) == PLUS)
>>>> + ? ?{
>>>> + ? ? ?base2 = XEXP (addr2, 0);
>>>> + ? ? ?op1 = XEXP (addr2, 1);
>>>> + ? ? ?if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT))
>>>> + ? ? ? return false;
>>>> + ? ? ?offset2 = INTVAL (op1);
>>>> + ? ?}
>>>> + ?else
>>>> + ? ?return false;
>>>> +
>>>> + ?if (base1 != base2)
>>>> + ? ?return false;
>>>> +
>>>> + ?if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0))
>>>> + ? ?return false;
>>>> + ?if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0))
>>>> + ? ?return false;
>>>> +
>>>> + ?if (ldrd && ((reg1 == reg2) || (reg1 == base1)))
>>>> + ? ?return false;
>>>> +
>>>> + ?if ((offset1 + 4) == offset2)
>>>> + ? ?return true;
>>>> + ?if ((offset2 + 4) == offset1)
>>>> + ? ?return true;
>>>> +
>>>> + ?return false;
>>>> +}
>>>> +
>>>> +/* Check if the insn can be expressed as ldm/stm with less cost. ?*/
>>>> +bool
>>>> +thumb2_prefer_ldmstm (rtx reg1, rtx reg2, rtx base,
>>>> + ? ? ? ? ? ? ? ? ? ? rtx off1, rtx off2, bool ldrd)
>>>> +{
>>>> + ?HOST_WIDE_INT offset1 = 0;
>>>> + ?HOST_WIDE_INT offset2 = 0;
>>>> +
>>>> + ?if (off1 != NULL)
>>>> + ? ?offset1 = INTVAL (off1);
>>>> + ?if (off2 != NULL)
>>>> + ? ?offset2 = INTVAL (off2);
>>>> +
>>>> + ?if (offset1 > offset2)
>>>> + ?{
>>>> + ? ?rtx tmp;
>>>> + ? ?HOST_WIDE_INT t = offset1;
>>>> + ? ?offset1 = offset2;
>>>> + ? ?offset2 = t;
>>>> + ? ?tmp = reg1;
>>>> + ? ?reg1 = reg2;
>>>> + ? ?reg2 = tmp;
>>>> + ?}
>>>> +
>>>> + ?/* The offset of ldmdb is -8, the offset of ldmia is 0. ?*/
>>>> + ?if ((offset1 != -8) && (offset1 != 0))
>>>> + ? ?return false;
>>>> +
>>>> + ?/* Lower register corresponds to lower memory. ?*/
>>>> + ?if (REGNO (reg1) > REGNO (reg2))
>>>> + ? ?return false;
>>>> +
>>>> + ?/* Now ldm/stm is possible. Check for special cases ldm/stm has lower
>>>> + ? ? cost. ?*/
>>>> + ?return false;
>>>> +}
>>>> +
>>>> ?#include "gt-arm.h"
>>>>
>>>> Index: arm-protos.h
>>>> ===================================================================
>>>> --- arm-protos.h ? ? ? ?(revision 163853)
>>>> +++ arm-protos.h ? ? ? ?(working copy)
>>>> @@ -149,7 +149,9 @@ extern void arm_expand_sync (enum machin
>>>> ?extern const char *arm_output_memory_barrier (rtx *);
>>>> ?extern const char *arm_output_sync_insn (rtx, rtx *);
>>>> ?extern unsigned int arm_sync_loop_insns (rtx , rtx *);
>>>> -
>>>> +extern bool thumb2_check_ldrd_operands (rtx, rtx, rtx, rtx, rtx, bool);
>>>> +extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool);
>>>> +extern bool thumb2_prefer_ldmstm (rtx, rtx, rtx, rtx, rtx, bool);
>>>> ?extern bool arm_output_addr_const_extra (FILE *, rtx);
>>>>
>>>> ?#if defined TREE_CODE
>>>> Index: ldmstm.md
>>>> ===================================================================
>>>> --- ldmstm.md ? (revision 163853)
>>>> +++ ldmstm.md ? (working copy)
>>>> @@ -852,7 +852,7 @@ (define_insn "*ldm2_ia"
>>>> ? ? ?(set (match_operand:SI 2 "arm_hard_register_operand" "")
>>>> ? ? ? ? ? (mem:SI (plus:SI (match_dup 3)
>>>> ? ? ? ? ? ? ? ? ? (const_int 4))))])]
>>>> - ?"TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
>>>> + ?"TARGET_ARM && XVECLEN (operands[0], 0) == 2"
>>>> ? "ldm%(ia%)\t%3, {%1, %2}"
>>>> ? [(set_attr "type" "load2")
>>>> ? ?(set_attr "predicable" "yes")])
>>>> @@ -901,7 +901,7 @@ (define_insn "*stm2_ia"
>>>> ? ? ? ? ? (match_operand:SI 1 "arm_hard_register_operand" ""))
>>>> ? ? ?(set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
>>>> ? ? ? ? ? (match_operand:SI 2 "arm_hard_register_operand" ""))])]
>>>> - ?"TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
>>>> + ?"TARGET_ARM && XVECLEN (operands[0], 0) == 2"
>>>> ? "stm%(ia%)\t%3, {%1, %2}"
>>>> ? [(set_attr "type" "store2")
>>>> ? ?(set_attr "predicable" "yes")])
>>>> @@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db"
>>>> ? ? ?(set (match_operand:SI 2 "arm_hard_register_operand" "")
>>>> ? ? ? ? ? (mem:SI (plus:SI (match_dup 3)
>>>> ? ? ? ? ? ? ? ? ? (const_int -4))))])]
>>>> - ?"TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
>>>> + ?"TARGET_ARM && XVECLEN (operands[0], 0) == 2"
>>>> ? "ldm%(db%)\t%3, {%1, %2}"
>>>> ? [(set_attr "type" "load2")
>>>> ? ?(set_attr "predicable" "yes")])
>>>> @@ -1067,7 +1067,7 @@ (define_insn "*stm2_db"
>>>> ? ? ? ? ? (match_operand:SI 1 "arm_hard_register_operand" ""))
>>>> ? ? ?(set (mem:SI (plus:SI (match_dup 3) (const_int -4)))
>>>> ? ? ? ? ? (match_operand:SI 2 "arm_hard_register_operand" ""))])]
>>>> - ?"TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
>>>> + ?"TARGET_ARM && XVECLEN (operands[0], 0) == 2"
>>>> ? "stm%(db%)\t%3, {%1, %2}"
>>>> ? [(set_attr "type" "store2")
>>>> ? ?(set_attr "predicable" "yes")])
>>>> Index: constraints.md
>>>> ===================================================================
>>>> --- constraints.md ? ? ?(revision 163853)
>>>> +++ constraints.md ? ? ?(working copy)
>>>> @@ -31,7 +31,7 @@
>>>> ?;; The following multi-letter normal constraints have been used:
>>>> ?;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz
>>>> ?;; in Thumb-1 state: Pa, Pb, Pc, Pd
>>>> -;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px
>>>> +;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px, Py
>>>>
>>>> ?;; The following memory constraints have been used:
>>>> ?;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
>>>> @@ -189,6 +189,13 @@ (define_constraint "Px"
>>>> ? (and (match_code "const_int")
>>>> ? ? ? ?(match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1")))
>>>>
>>>> +(define_constraint "Py"
>>>> + ?"@internal In Thumb-2 state a constant that is a multiple of 4 in the
>>>> + ? range -1020 to 1024"
>>>> + ?(and (match_code "const_int")
>>>> + ? ? ? (match_test "TARGET_THUMB2 && ival >= -1020 && ival <= 1024
>>>> + ? ? ? ? ? ? ? ? ? && (ival & 3) == 0")))
>>>> +
>>>> ?(define_constraint "G"
>>>> ?"In ARM/Thumb-2 state a valid FPA immediate constant."
>>>> ?(and (match_code "const_double")
>>>>
>>>>
>>>> Index: pr40457-1.c
>>>> ===================================================================
>>>> --- pr40457-1.c (revision 163853)
>>>> +++ pr40457-1.c (working copy)
>>>> @@ -1,9 +1,9 @@
>>>> -/* { dg-options "-Os" } ?*/
>>>> +/* { dg-options "-O2" } ?*/
>>>> ?/* { dg-do compile } */
>>>>
>>>> ?int bar(int* p)
>>>> ?{
>>>> - ?int x = p[0] + p[1];
>>>> + ?int x = p[0] + p[1] + p[2];
>>>> ? return x;
>>>> ?}
>>>>
>>>> Index: pr40457-2.c
>>>> ===================================================================
>>>> --- pr40457-2.c (revision 163853)
>>>> +++ pr40457-2.c (working copy)
>>>> @@ -5,6 +5,7 @@ void foo(int* p)
>>>> ?{
>>>> ? p[0] = 1;
>>>> ? p[1] = 0;
>>>> + ?p[2] = 2;
>>>> ?}
>>>>
>>>> ?/* { dg-final { scan-assembler "stm" } } */
>>>> Index: pr45335.c
>>>> ===================================================================
>>>> --- pr45335.c ? (revision 0)
>>>> +++ pr45335.c ? (revision 0)
>>>> @@ -0,0 +1,22 @@
>>>> +/* { dg-options "-mthumb -O2" } */
>>>> +/* { dg-require-effective-target arm_thumb2_ok } */
>>>> +/* { dg-final { scan-assembler "ldrd" } } */
>>>> +/* { dg-final { scan-assembler "strd" } } */
>>>> +
>>>> +struct S
>>>> +{
>>>> + ? ?void* p1;
>>>> + ? ?void* p2;
>>>> + ? ?void* p3;
>>>> + ? ?void* p4;
>>>> +};
>>>> +
>>>> +extern printf(char*, ...);
>>>> +
>>>> +void foo1(struct S* fp, struct S* otherSaveArea)
>>>> +{
>>>> + ? ?struct S* saveA = fp - 1;
>>>> + ? ?printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea);
>>>> + ? ?printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n",
>>>> + ? ? ? ?saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp);
>>>> +}
>>>>
>>>
>>
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]