This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words


On Wed, Sep 1, 2010 at 11:22 PM, Richard Earnshaw <rearnsha@arm.com> wrote:
> If you submit an updated patch, please re-include the changelog entry,
> even if it's the same.
>
> There are two obvious problems with this patch:
>
> 1) You presume that ldrd is always cheaper than ldm(2 regs). ?This isn't
> the case on Cortex-a9. ?I'm not expecting you to work out all the
> details of when A9 should use LDM and when it should use ldrd, but your
> code needs to ascertain the costs of each alternative and make a
> decision based on that answer, not on a static choice.
>
> 2) Your code fails to check for volatile mems. ?These must not be
> transformed and the original load/store instructions must be preserved.
>

1. A new function thumb2_prefer_ldmstm is used to choose ldm/stm or ldrd/strd.
The default behavior is to output ldrd/strd. One should update this function if
ldm/stm is better.

2. Function thumb2_legitimate_ldrd_p is updated to check volatile memory access.

Following is the new patch

ChangeLog:
2010-09-04  Wei Guozhi  <carrot@google.com>

        PR target/45335
        * gcc/config/arm/thumb2.md (thumb2_ldrd, thumb2_ldrd_reg1,
        thumb2_ldrd_reg2 and peephole2): New insn pattern and related
        peephole2.
        (thumb2_strd, thumb2_strd_reg1, thumb2_strd_reg2 and peephole2):
        New insn pattern and related peephole2.
        * gcc/config/arm/arm.c (thumb2_legitimate_ldrd_p): New function.
        (thumb2_check_ldrd_operands): New function.
        (thumb2_prefer_ldmstm): New function.
        * gcc/config/arm/arm-protos.h (thumb2_legitimate_ldrd_p): New prototype.
        (thumb2_check_ldrd_operands): New prototype.
        (thumb2_prefer_ldmstm): New prototype.
        * gcc/config/arm/ldmstm.md (ldm2_ia, stm2_ia, ldm2_db, stm2_db):
        Change the ldm/stm patterns with 2 words to ARM only.
        * gcc/config/arm/constraints.md (Py): New thumb2 constant constraint
        suitable to ldrd/strd instructions.


2010-09-04  Wei Guozhi  <carrot@google.com>

        PR target/45335
        * gcc.target/arm/pr45335.c: New test.
        * gcc.target/arm/pr40457-1.c: Changed to load 3 words.
        * gcc.target/arm/pr40457-2.c: Changed to store 3 words.



Index: thumb2.md
===================================================================
--- thumb2.md   (revision 163853)
+++ thumb2.md   (working copy)
@@ -1257,3 +1257,226 @@ (define_peephole2
   "
   operands[2] = GEN_INT (32 - INTVAL (operands[2]));
   ")
+
+(define_insn "*thumb2_ldrd"
+  [(parallel [(set (match_operand:SI 0 "s_register_operand" "")
+                  (mem:SI (plus:SI
+                               (match_operand:SI 2 "s_register_operand" "rk")
+                               (match_operand:SI 3 "const_int_operand" "Py"))))
+             (set (match_operand:SI 1 "s_register_operand" "")
+                  (mem:SI (plus:SI (match_dup 2)
+                            (match_operand:SI 4 "const_int_operand" "Py"))))])]
+  "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
+                               operands[2], operands[3], operands[4], 1)"
+  "*
+  {
+    HOST_WIDE_INT offset1 = INTVAL (operands[3]);
+    HOST_WIDE_INT offset2 = INTVAL (operands[4]);
+    if (offset1 > offset2)
+      {
+       /* Swap the operands so that memory [base+offset1] is loaded into
+          operands[0].  */
+       rtx tmp = operands[0];
+       operands[0] = operands[1];
+       operands[1] = tmp;
+       tmp = operands[3];
+       operands[3] = operands[4];
+       operands[4] = tmp;
+       offset1 = INTVAL (operands[3]);
+       offset2 = INTVAL (operands[4]);
+      }
+    if (thumb2_prefer_ldmstm (operands[0], operands[1],
+                             operands[2], operands[3], operands[4], 1))
+      return \"ldmdb\\t%2, {%0, %1}\";
+    else if (fix_cm3_ldrd && (operands[2] == operands[0]))
+      {
+       if (offset1 <= -256)
+         {
+           output_asm_insn (\"sub\\t%2, %2, %n3\", operands);
+           output_asm_insn (\"ldr\\t%1, [%2, #4]\", operands);
+           output_asm_insn (\"ldr\\t%0, [%2]\", operands);
+         }
+       else
+         {
+           output_asm_insn (\"ldr\\t%1, [%2, %4]\", operands);
+           output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands);
+         }
+       return \"\";
+      }
+    else
+      return \"ldrd\\t%0, %1, [%2, %3]\";
+  }"
+)
+
+(define_insn "*thumb2_ldrd_reg1"
+  [(parallel [(set (match_operand:SI 0 "s_register_operand" "")
+                  (mem:SI (match_operand:SI 2 "s_register_operand" "rk")))
+             (set (match_operand:SI 1 "s_register_operand" "")
+                  (mem:SI (plus:SI (match_dup 2)
+                            (match_operand:SI 3 "const_int_operand" "Py"))))])]
+  "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
+                                               operands[2], 0, operands[3], 1)"
+  "*
+  {
+    HOST_WIDE_INT offset2 = INTVAL (operands[3]);
+    if (offset2 == 4)
+      {
+       if (thumb2_prefer_ldmstm (operands[0], operands[1],
+                                 operands[2], 0, operands[3], 1))
+         return \"ldmia\\t%2, {%0, %1}\";
+       if (fix_cm3_ldrd && (operands[2] == operands[0]))
+         {
+           output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands);
+           output_asm_insn (\"ldr\\t%0, [%2]\", operands);
+           return \"\";
+         }
+       return \"ldrd\\t%0, %1, [%2]\";
+      }
+    else
+      {
+       if (fix_cm3_ldrd && (operands[2] == operands[1]))
+         {
+           output_asm_insn (\"ldr\\t%0, [%2]\", operands);
+           output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands);
+         }
+       return \"ldrd\\t%1, %0, [%2, %3]\";
+      }
+  }"
+)
+
+(define_insn "*thumb2_ldrd_reg2"
+  [(parallel [(set (match_operand:SI 0 "s_register_operand" "")
+                  (mem:SI (plus:SI
+                               (match_operand:SI 2 "s_register_operand" "rk")
+                               (match_operand:SI 3 "const_int_operand" "Py"))))
+             (set (match_operand:SI 1 "s_register_operand" "")
+                  (mem:SI (match_dup 2)))])]
+  "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
+                                               operands[2], operands[3], 0, 1)"
+  "*
+  {
+    HOST_WIDE_INT offset1 = INTVAL (operands[3]);
+    if (offset1 == -4)
+      {
+       if (fix_cm3_ldrd && (operands[2] == operands[0]))
+         {
+           output_asm_insn (\"ldr\\t%1, [%2]\", operands);
+           output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands);
+           return \"\";
+         }
+       return \"ldrd\\t%0, %1, [%2, %3]\";
+      }
+    else
+      {
+       if (thumb2_prefer_ldmstm (operands[0], operands[1],
+                                 operands[2], operands[3], 0, 1))
+         return \"ldmia\\t%2, {%1, %0}\";
+       if (fix_cm3_ldrd && (operands[2] == operands[1]))
+         {
+           output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands);
+           output_asm_insn (\"ldr\\t%1, [%2]\", operands);
+           return \"\";
+         }
+       return \"ldrd\\t%1, %0, [%2]\";
+      }
+  }"
+)
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+       (match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+       (match_operand:SI 3 "memory_operand" ""))]
+  "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1],
+                                             operands[2], operands[3], 1)"
+  [(parallel [(set (match_operand:SI 0 "s_register_operand" "")
+                  (match_operand:SI 2 "memory_operand" ""))
+             (set (match_operand:SI 1 "s_register_operand" "")
+                  (match_operand:SI 3 "memory_operand" ""))])]
+  ""
+)
+
+(define_insn "*thumb2_strd"
+  [(parallel [(set (mem:SI
+                       (plus:SI (match_operand:SI 2 "s_register_operand" "rk")
+                                (match_operand:SI 3 "const_int_operand" "Py")))
+                  (match_operand:SI 0 "s_register_operand" ""))
+             (set (mem:SI (plus:SI (match_dup 2)
+                                (match_operand:SI 4 "const_int_operand" "Py")))
+                  (match_operand:SI 1 "s_register_operand" ""))])]
+  "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
+                               operands[2], operands[3], operands[4], 0)"
+  "*
+  {
+    HOST_WIDE_INT offset1 = INTVAL (operands[3]);
+    HOST_WIDE_INT offset2 = INTVAL (operands[4]);
+    if (thumb2_prefer_ldmstm (operands[0], operands[1],
+                             operands[2], operands[3], operands[4], 0))
+      return \"stmdb\\t%2, {%0, %1}\";
+    if (offset1 < offset2 )
+      return \"strd\\t%0, %1, [%2, %3]\";
+    else
+      return \"strd\\t%1, %0, [%2, %4]\";
+  }"
+)
+
+(define_insn "*thumb2_strd_reg1"
+  [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))
+                  (match_operand:SI 0 "s_register_operand" ""))
+             (set (mem:SI (plus:SI (match_dup 2)
+                               (match_operand:SI 3 "const_int_operand" "Py")))
+                  (match_operand:SI 1 "s_register_operand" ""))])]
+  "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
+                                               operands[2], 0, operands[3], 0)"
+  "*
+  {
+    HOST_WIDE_INT offset2 = INTVAL (operands[3]);
+    if (offset2 == 4)
+      {
+       if (thumb2_prefer_ldmstm (operands[0], operands[1],
+                                 operands[2], 0, operands[3], 0))
+         return \"stmia\\t%2, {%0, %1}\";
+       return \"strd\\t%0, %1, [%2]\";
+      }
+    else
+      return \"strd\\t%1, %0, [%2, %3]\";
+  }"
+)
+
+(define_insn "*thumb2_strd_reg2"
+  [(parallel [(set (mem:SI (plus:SI
+                               (match_operand:SI 2 "s_register_operand" "rk")
+                               (match_operand:SI 3 "const_int_operand" "Py")))
+                  (match_operand:SI 0 "s_register_operand" ""))
+             (set (mem:SI (match_dup 2))
+                  (match_operand:SI 1 "s_register_operand" ""))])]
+  "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
+                                               operands[2], operands[3], 0, 0)"
+  "*
+  {
+    HOST_WIDE_INT offset1 = INTVAL (operands[3]);
+    if (offset1 == -4)
+      return \"strd\\t%0, %1, [%2, %3]\";
+    else
+      {
+       if (thumb2_prefer_ldmstm (operands[0], operands[1],
+                                 operands[2], operands[3], 0, 0))
+         return \"stmia\\t%2, {%1, %0}\";
+       return \"strd\\t%1, %0, [%2]\";
+      }
+  }"
+)
+
+(define_peephole2
+  [(set (match_operand:SI 2 "memory_operand" "")
+       (match_operand:SI 0 "s_register_operand" ""))
+   (set (match_operand:SI 3 "memory_operand" "")
+       (match_operand:SI 1 "s_register_operand" ""))]
+  "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1],
+                                             operands[2], operands[3], 0)"
+  [(parallel [(set (match_operand:SI 2 "memory_operand" "")
+                  (match_operand:SI 0 "s_register_operand" ""))
+             (set (match_operand:SI 3 "memory_operand" "")
+                  (match_operand:SI 1 "s_register_operand" ""))])]
+  ""
+)
Index: arm.c
===================================================================
--- arm.c       (revision 163853)
+++ arm.c       (working copy)
@@ -22976,4 +22976,125 @@ arm_expand_sync (enum machine_mode mode,
     }
 }

+/* Check the legality of operands in an ldrd/strd instruction.  */
+bool
+thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base,
+                           rtx off1, rtx off2, bool ldrd)
+{
+  HOST_WIDE_INT offset1 = 0;
+  HOST_WIDE_INT offset2 = 0;
+
+  if (off1 != NULL)
+    offset1 = INTVAL (off1);
+  if (off2 != NULL)
+    offset2 = INTVAL (off2);
+
+  if (ldrd && (reg1 == reg2))
+    return false;
+
+  if ((offset1 + 4) == offset2)
+    return true;
+  if ((offset2 + 4) == offset1)
+    return true;
+
+  return false;
+}
+
+/* Check if the two memory accesses can be merged to an ldrd/strd instruction.
+   That is they use the same base register, and the gap between constant
+   offsets should be 4.  */
+bool
+thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd)
+{
+  rtx base1, base2, op1;
+  rtx addr1 = XEXP (mem1, 0);
+  rtx addr2 = XEXP (mem2, 0);
+  HOST_WIDE_INT offset1 = 0;
+  HOST_WIDE_INT offset2 = 0;
+
+  if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
+    return false;
+
+  if (REG_P (addr1))
+    base1 = addr1;
+  else if (GET_CODE (addr1) == PLUS)
+    {
+      base1 = XEXP (addr1, 0);
+      op1 = XEXP (addr1, 1);
+      if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT))
+       return false;
+      offset1 = INTVAL (op1);
+    }
+  else
+    return false;
+
+  if (REG_P (addr2))
+    base2 = addr2;
+  else if (GET_CODE (addr2) == PLUS)
+    {
+      base2 = XEXP (addr2, 0);
+      op1 = XEXP (addr2, 1);
+      if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT))
+       return false;
+      offset2 = INTVAL (op1);
+    }
+  else
+    return false;
+
+  if (base1 != base2)
+    return false;
+
+  if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0))
+    return false;
+  if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0))
+    return false;
+
+  if (ldrd && ((reg1 == reg2) || (reg1 == base1)))
+    return false;
+
+  if ((offset1 + 4) == offset2)
+    return true;
+  if ((offset2 + 4) == offset1)
+    return true;
+
+  return false;
+}
+
+/* Check if the insn can be expressed as ldm/stm with less cost.  */
+bool
+thumb2_prefer_ldmstm (rtx reg1, rtx reg2, rtx base,
+                     rtx off1, rtx off2, bool ldrd)
+{
+  HOST_WIDE_INT offset1 = 0;
+  HOST_WIDE_INT offset2 = 0;
+
+  if (off1 != NULL)
+    offset1 = INTVAL (off1);
+  if (off2 != NULL)
+    offset2 = INTVAL (off2);
+
+  if (offset1 > offset2)
+  {
+    rtx tmp;
+    HOST_WIDE_INT t = offset1;
+    offset1 = offset2;
+    offset2 = t;
+    tmp = reg1;
+    reg1 = reg2;
+    reg2 = tmp;
+  }
+
+  /* The offset of ldmdb is -8, the offset of ldmia is 0.  */
+  if ((offset1 != -8) && (offset1 != 0))
+    return false;
+
+  /* Lower register corresponds to lower memory.  */
+  if (REGNO (reg1) > REGNO (reg2))
+    return false;
+
+  /* Now ldm/stm is possible. Check for special cases ldm/stm has lower
+     cost.  */
+  return false;
+}
+
 #include "gt-arm.h"

Index: arm-protos.h
===================================================================
--- arm-protos.h        (revision 163853)
+++ arm-protos.h        (working copy)
@@ -149,7 +149,9 @@ extern void arm_expand_sync (enum machin
 extern const char *arm_output_memory_barrier (rtx *);
 extern const char *arm_output_sync_insn (rtx, rtx *);
 extern unsigned int arm_sync_loop_insns (rtx , rtx *);
-
+extern bool thumb2_check_ldrd_operands (rtx, rtx, rtx, rtx, rtx, bool);
+extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool);
+extern bool thumb2_prefer_ldmstm (rtx, rtx, rtx, rtx, rtx, bool);
 extern bool arm_output_addr_const_extra (FILE *, rtx);

 #if defined TREE_CODE
Index: ldmstm.md
===================================================================
--- ldmstm.md   (revision 163853)
+++ ldmstm.md   (working copy)
@@ -852,7 +852,7 @@ (define_insn "*ldm2_ia"
      (set (match_operand:SI 2 "arm_hard_register_operand" "")
           (mem:SI (plus:SI (match_dup 3)
                   (const_int 4))))])]
-  "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
   "ldm%(ia%)\t%3, {%1, %2}"
   [(set_attr "type" "load2")
    (set_attr "predicable" "yes")])
@@ -901,7 +901,7 @@ (define_insn "*stm2_ia"
           (match_operand:SI 1 "arm_hard_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
           (match_operand:SI 2 "arm_hard_register_operand" ""))])]
-  "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
   "stm%(ia%)\t%3, {%1, %2}"
   [(set_attr "type" "store2")
    (set_attr "predicable" "yes")])
@@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db"
      (set (match_operand:SI 2 "arm_hard_register_operand" "")
           (mem:SI (plus:SI (match_dup 3)
                   (const_int -4))))])]
-  "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
   "ldm%(db%)\t%3, {%1, %2}"
   [(set_attr "type" "load2")
    (set_attr "predicable" "yes")])
@@ -1067,7 +1067,7 @@ (define_insn "*stm2_db"
           (match_operand:SI 1 "arm_hard_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 3) (const_int -4)))
           (match_operand:SI 2 "arm_hard_register_operand" ""))])]
-  "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
   "stm%(db%)\t%3, {%1, %2}"
   [(set_attr "type" "store2")
    (set_attr "predicable" "yes")])
Index: constraints.md
===================================================================
--- constraints.md      (revision 163853)
+++ constraints.md      (working copy)
@@ -31,7 +31,7 @@
 ;; The following multi-letter normal constraints have been used:
 ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz
 ;; in Thumb-1 state: Pa, Pb, Pc, Pd
-;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px
+;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px, Py

 ;; The following memory constraints have been used:
 ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
@@ -189,6 +189,13 @@ (define_constraint "Px"
   (and (match_code "const_int")
        (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1")))

+(define_constraint "Py"
+  "@internal In Thumb-2 state a constant that is a multiple of 4 in the
+   range -1020 to 1024"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= -1020 && ival <= 1024
+                   && (ival & 3) == 0")))
+
 (define_constraint "G"
  "In ARM/Thumb-2 state a valid FPA immediate constant."
  (and (match_code "const_double")


Index: pr40457-1.c
===================================================================
--- pr40457-1.c (revision 163853)
+++ pr40457-1.c (working copy)
@@ -1,9 +1,9 @@
-/* { dg-options "-Os" }  */
+/* { dg-options "-O2" }  */
 /* { dg-do compile } */

 int bar(int* p)
 {
-  int x = p[0] + p[1];
+  int x = p[0] + p[1] + p[2];
   return x;
 }

Index: pr40457-2.c
===================================================================
--- pr40457-2.c (revision 163853)
+++ pr40457-2.c (working copy)
@@ -5,6 +5,7 @@ void foo(int* p)
 {
   p[0] = 1;
   p[1] = 0;
+  p[2] = 2;
 }

 /* { dg-final { scan-assembler "stm" } } */
Index: pr45335.c
===================================================================
--- pr45335.c   (revision 0)
+++ pr45335.c   (revision 0)
@@ -0,0 +1,22 @@
+/* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-final { scan-assembler "ldrd" } } */
+/* { dg-final { scan-assembler "strd" } } */
+
+struct S
+{
+    void* p1;
+    void* p2;
+    void* p3;
+    void* p4;
+};
+
+extern printf(char*, ...);
+
+void foo1(struct S* fp, struct S* otherSaveArea)
+{
+    struct S* saveA = fp - 1;
+    printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea);
+    printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n",
+        saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp);
+}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]