This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PING] [AArch64] Peepholes to generate ldp and stp instructions




You are better off CCing the maintainers for such reviews. Let me do that for you. I cannot approve or reject this patch but I have a few comments as below.

On 10/29/13 09:22, Hurugalawadi, Naveen wrote:

diff -uprN '-x*.orig' mainline-orig/gcc/config/aarch64/aarch64.md gcc-4.8.0/gcc/config/aarch64/aarch64.md
--- mainline-orig/gcc/config/aarch64/aarch64.md	2013-10-28 17:15:52.363975264 +0530
+++ gcc-4.8.0/gcc/config/aarch64/aarch64.md	2013-10-29 10:11:09.656082201 +0530
@@ -1068,6 +1068,27 @@
     (set_attr "mode" "<MODE>")]
  )

+(define_peephole2
+  [(set (match_operand:GPI 0 "register_operand")
+	(match_operand:GPI 1 "aarch64_mem_pair_operand"))
+   (set (match_operand:GPI 2 "register_operand")
+	(match_operand:GPI 3 "memory_operand"))]
+  "GET_CODE (operands[1]) == MEM
+   && GET_CODE (XEXP (operands[1], 0)) == PLUS
+   && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == REG

Use the predicates REG_P , CONST_INT_P and so on. Don't do this in this form.

+   && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
+   && GET_MODE (operands[0]) == GET_MODE (XEXP (XEXP (operands[1], 0), 0))
+   && REGNO (operands[0]) != REGNO (operands[2])
+   && REGNO_REG_CLASS (REGNO (operands[0]))
+      == REGNO_REG_CLASS (REGNO (operands[2]))
+   && rtx_equal_p (XEXP (operands[3], 0),
+		   plus_constant (Pmode, XEXP (operands[1], 0),
+				  GET_MODE_SIZE (<MODE>mode)))
+   && optimize_size"

Why use optimize_size here ? I would have thought it was always good to generate ldp and stp instructions. If you want to make it a core specific decision make it a tuning decision but don't disable just based on size.

Additionally if this check is common between all the patterns it would be better to factor this out into a predicate or a function in its own right.

+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (set (match_dup 2) (match_dup 3))])]
+)
+
  ;; Operands 0 and 2 are tied together by the final condition; so we allow
  ;; fairly lax checking on the second memory operation.
  (define_insn "store_pair<mode>"
@@ -1085,6 +1106,27 @@
     (set_attr "mode" "<MODE>")]
  )

+(define_peephole2
+  [(set (match_operand:GPI 0 "aarch64_mem_pair_operand")
+	(match_operand:GPI 1 "register_operand"))
+   (set (match_operand:GPI 2 "memory_operand")
+	(match_operand:GPI 3 "register_operand"))]
+  "GET_CODE (operands[0]) == MEM
+   && GET_CODE (XEXP (operands[0], 0)) == PLUS
+   && GET_CODE (XEXP (XEXP (operands[0], 0), 0)) == REG
+   && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == CONST_INT
+   && GET_MODE (operands[1]) == GET_MODE (XEXP (XEXP (operands[0], 0), 0))
+   && REGNO (operands[1]) != REGNO (operands[3])
+   && REGNO_REG_CLASS (REGNO (operands[1]))
+      == REGNO_REG_CLASS (REGNO (operands[3]))
+   && rtx_equal_p (XEXP (operands[2], 0),
+		   plus_constant (Pmode, XEXP (operands[0], 0),
+				  GET_MODE_SIZE (<MODE>mode)))
+   && optimize_size"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (set (match_dup 2) (match_dup 3))])]
+)
+
  ;; Operands 1 and 3 are tied together by the final condition; so we allow
  ;; fairly lax checking on the second memory operation.
  (define_insn "load_pair<mode>"
@@ -1102,6 +1144,28 @@
     (set_attr "mode" "<MODE>")]
  )

+(define_peephole2
+  [(set (match_operand:GPF 0 "register_operand")
+	(match_operand:GPF 1 "aarch64_mem_pair_operand"))
+   (set (match_operand:GPF 2 "register_operand")
+	(match_operand:GPF 3 "memory_operand"))]
+  "GET_CODE (operands[1]) == MEM
+   && GET_CODE (XEXP (operands[1], 0)) == PLUS
+   && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == REG
+   && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
+   && GET_MODE (operands[0]) == GET_MODE (XEXP (XEXP (operands[1], 0), 0))
+   && REGNO (operands[0]) != REGNO (operands[2])
+   && REGNO (operands[0]) >= 32 && REGNO (operands[2]) >= 32

I'm not sure what this means here but this check doesn't look right - 32 is V0_REGNUM - why are you checking that here ? If you really need to do that check then use V0_REGNUM or it may be better to see if this really is a SIMD regnum ? Can you not use something like FP_REGNUM_P here instead ?


+   && REGNO_REG_CLASS (REGNO (operands[0]))
+      == REGNO_REG_CLASS (REGNO (operands[2]))
+   && rtx_equal_p (XEXP (operands[3], 0),
+		   plus_constant (Pmode, XEXP (operands[1], 0),
+				  GET_MODE_SIZE (<MODE>mode)))
+   && optimize_size"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (set (match_dup 2) (match_dup 3))])]
+)
+
  ;; Operands 0 and 2 are tied together by the final condition; so we allow
  ;; fairly lax checking on the second memory operation.
  (define_insn "store_pair<mode>"
@@ -1119,6 +1183,28 @@
     (set_attr "mode" "<MODE>")]
  )

+(define_peephole2
+  [(set (match_operand:GPF 0 "aarch64_mem_pair_operand")
+	(match_operand:GPF 1 "register_operand"))
+   (set (match_operand:GPF 2 "memory_operand")
+	(match_operand:GPF 3 "register_operand"))]
+  "GET_CODE (operands[0]) == MEM
+   && GET_CODE (XEXP (operands[0], 0)) == PLUS
+   && GET_CODE (XEXP (XEXP (operands[0], 0), 0)) == REG
+   && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == CONST_INT
+   && GET_MODE (operands[1]) == GET_MODE (XEXP (XEXP (operands[0], 0), 0))
+   && REGNO (operands[1]) != REGNO (operands[3])
+   && REGNO (operands[1]) >= 32 && REGNO (operands[3]) >= 32
+   && REGNO_REG_CLASS (REGNO (operands[1]))
+      == REGNO_REG_CLASS (REGNO (operands[3]))
+   && rtx_equal_p (XEXP (operands[2], 0),
+		   plus_constant (Pmode, XEXP (operands[0], 0),
+				  GET_MODE_SIZE (<MODE>mode)))
+   && optimize_size"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (set (match_dup 2) (match_dup 3))])]
+)
+
  ;; Load pair with writeback.  This is primarily used in function epilogues
  ;; when restoring [fp,lr]
  (define_insn "loadwb_pair<GPI:mode>_<P:mode>"
diff -uprN '-x*.orig' mainline-orig/gcc/testsuite/gcc.target/aarch64/ldp-stp.c gcc-4.8.0/gcc/testsuite/gcc.target/aarch64/ldp-stp.c
--- mainline-orig/gcc/testsuite/gcc.target/aarch64/ldp-stp.c	1970-01-01 05:30:00.000000000 +0530
+++ gcc-4.8.0/gcc/testsuite/gcc.target/aarch64/ldp-stp.c	2013-10-28 19:01:11.695986357 +0530
@@ -0,0 +1,33 @@
+/* { dg-options "-Os" } */
+
+extern void abort (void);
+
+typedef struct
+{
+  long int x, y;
+} ldst;
+
+void
+f (ldst p0, ldst p1, ldst p2, ldst p3, ldst p4, ldst p5)
+{
+  if (p2.x != 1 || p2.y != -1
+      || p3.x != -1 || p3.y != 1 || p4.x != 0 || p4.y != -1)
+    abort ();
+}
+
+void
+foo ()
+{
+  ldst p0, p1, p2, p3, p4, p5;
+
+  p4.x = 0;
+  p4.y = -1;
+
+  p5.x = 1;
+  p5.y = 0;
+
+  f (p0, p1, p2, p3, p4, p5);
+}
+
+/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "stp\tx\[0-9\]+, x\[0-9\]" 3 } } */



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]