This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] ARM/NEON: vld1q_dup_s64 builtin


On 11.05.2012 16:48, Ramana Radhakrishnan wrote:
I would change the iterator from VQX to VQ in the pattern above (you
can also simplify the setting of neon_type in that case as well as
change that to be a vec_duplicate as below and get rid of any
lingering definitions of UNSPEC_VLD1_DUP if they exist), define a
separate pattern that expressed this as a define_insn_and_split as
below.

  (define_insn_and_split "neon_vld1_dupv2di"
    [(set (match_operand:V2DI 0 "s_register_operand" "=w")
      (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
    "TARGET_NEON"
    "#"
    "&&  reload_completed"
    [(const_int 0)]
    {
     rtx tmprtx = gen_lowpart (DImode, operands[0]);
     emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
     emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
     DONE;
     }
(set_attr "length" "8")
(set_attr "neon_type" "<fromearlierpattern">)
)

Do you want to try this and see what you get ?

Thanks for this example and suggestion, it does work.


I'd rather have an extra regression test in gcc.target/arm that was a run time test. for e.g. take a look at gcc.target/arm/neon-vadds64.c .

Here is an updated patch:


2012-05-16 Christophe Lyon <christophe.lyon@st.com>

    * gcc/config/arm/neon.md (neon_vld1_dup): Restrict to VQ
    operands.
    (neon_vld1_dupv2di): New, fixes vld1q_dup_s64.
    * gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c: New test.

Index: gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c
===================================================================
--- gcc.orig/gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c    (revision 0)
+++ gcc.new/gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c    (revision 0)
@@ -0,0 +1,24 @@
+/* Test the `vld1q_s64' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O0" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include <stdlib.h>
+
+int main (void)
+{
+  int64x1_t input[2] = {(int64x1_t)0x0123456776543210LL,
+            (int64x1_t)0x89abcdeffedcba90LL};
+  int64x1_t output[2] = {0, 0};
+  int64x2_t var = vld1q_dup_s64(input);
+
+  vst1q_s64(output, var);
+  if (output[0] != (int64x1_t)0x0123456776543210LL)
+    abort();
+  if (output[1] != (int64x1_t)0x0123456776543210LL)
+    abort();
+  return 0;
+}
Index: gcc/config/arm/neon.md
===================================================================
--- gcc.orig/gcc/config/arm/neon.md    (revision 2659)
+++ gcc.new/gcc/config/arm/neon.md    (working copy)
@@ -4195,20 +4195,32 @@
 )

 (define_insn "neon_vld1_dup<mode>"
-  [(set (match_operand:VQX 0 "s_register_operand" "=w")
-        (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
+  [(set (match_operand:VQ 0 "s_register_operand" "=w")
+        (unspec:VQ [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
                     UNSPEC_VLD1_DUP))]
   "TARGET_NEON"
 {
-  if (GET_MODE_NUNITS (<MODE>mode) > 2)
     return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
-  else
-    return "vld1.<V_sz_elem>\t%h0, %A1";
 }
   [(set (attr "neon_type")
-      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
-                    (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")
-                    (const_string "neon_vld1_1_2_regs")))]
+      (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes"))]
+)
+
+(define_insn_and_split "neon_vld1_dupv2di"
+   [(set (match_operand:V2DI 0 "s_register_operand" "=w")
+    (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
+   "TARGET_NEON"
+   "#"
+   "&& reload_completed"
+   [(const_int 0)]
+   {
+    rtx tmprtx = gen_lowpart (DImode, operands[0]);
+    emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
+    emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
+    DONE;
+    }
+  [(set_attr "length" "8")
+   (set (attr "neon_type") (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes"))]
 )

(define_expand "vec_store_lanes<mode><mode>"




Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]