This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][ARM] Fix PR 55426


Hi all,

In this PR the 128-bit load-duplicate intrinsics in neon.exp ICE on big-endian with an unrecognisable insn error:

neon-vld1_dupQ.c:24:1: error: unrecognizable insn:
(insn 94 93 31 (set (subreg:DI (reg:V2DI 95 d16 [orig:137 D.14400 ] [137]) 0)
        (subreg:DI (reg:V2DI 95 d16 [orig:137 D.14400 ] [137]) 8))

The problem seems to be that the neon_vld1_dupv2di splitter generates subregs after reload with gen_lowpart and gen_highpart. Since that splitter always matches after reload, we already know the hard register numbers, so we can just manipulate those directly to extract the two doubleword parts of a quadword reg.

While we're at it, we might as well use a more general move instruction when the alignment is natural to potentially take advantage of more complex addressing modes. We're allowed to do that because the vld1Q_dup*64 intrinsics describe a behaviour and do not guarantee that a particular instruction will be used.

Therefore the vld1Q_dup*64 tests are updated to be run-time tests instead to test the functionality. New *_misaligned tests are added, however, to make sure that we still generate vld1.64 when the address is explicitly unaligned, since vld1.64 is the only instruction that can handle that.

Did an armeb-none-linux-gnueabihf build.
The vld1Q_dup*64* tests now pass on big and little endian.
arm-none-linux-gnueabihf bootstrap on Chromebook successful.


This is a regression since 4.7. I've tested this on trunk. Will test this on the 4.8 and 4.7 branches.

Ok for those branches if no regressions?

Thanks,
Kyrill


2014-02-11  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

    PR target/55426
    * config/arm/neon.md (neon_vld1_dupv2di): Do not generate
    low and high part subregs, use hard reg numbers.
    * config/arm/arm.c (arm_mem_aligned_p): New function.
    (arm_init_neon_builtins): Allow for memory operands
    in load operations.
    * config/arm/arm-protos.h (arm_mem_aligned_p): Declare
    extern.
    * config/arm/constraints.md (Uo): New constraint.

2014-02-11  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

    PR target/55426
    * gcc.target/arm/neon/vld1Q_dupp64.c: Change to run-time test.
    * gcc.target/arm/neon/vld1Q_dups64.c: Likewise.
    * gcc.target/arm/neon/vld1Q_dupu64.c: Likewise.
    * gcc.target/arm/neon/vld1Q_dupp64_misaligned.c: New test.
    * gcc.target/arm/neon/vld1Q_dups64_misaligned.c: Likewise.
    * gcc.target/arm/neon/vld1Q_dupu64_misaligned.c: Likewise.
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 13874ee..56f46e3 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -95,6 +95,7 @@ extern enum reg_class coproc_secondary_reload_class (enum machine_mode, rtx,
 extern bool arm_tls_referenced_p (rtx);
 
 extern int arm_coproc_mem_operand (rtx, bool);
+extern bool arm_mem_aligned_p (rtx, unsigned int);
 extern int neon_vector_mem_operand (rtx, int, bool);
 extern int neon_struct_mem_operand (rtx);
 
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index fc81bf6..33c829d 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -12499,6 +12499,14 @@ arm_coproc_mem_operand (rtx op, bool wb)
   return FALSE;
 }
 
+/* Return true if the MEM RTX x has the given alignment.  */
+bool
+arm_mem_aligned_p (rtx x, unsigned int alignment)
+{
+  gcc_assert (MEM_P (x));
+  return MEM_ALIGN (x) == alignment;
+}
+
 /* Return TRUE if OP is a memory operand which we can load or store a vector
    to/from. TYPE is one of the following values:
     0 - Vector load/stor (vldr)
@@ -23644,7 +23652,9 @@ arm_init_neon_builtins (void)
 		    /* Neon load patterns always have the memory
 		       operand in the operand 1 position.  */
 		    gcc_assert (insn_data[d->code].operand[k].predicate
-				== neon_struct_operand);
+				  == neon_struct_operand
+			        || insn_data[d->code].operand[k].predicate
+			             == memory_operand);
 
 		    switch (d->mode)
 		      {
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 85dd116..86947dd 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -381,6 +381,14 @@
  (and (match_code "mem")
       (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2, true)")))
 
+(define_memory_constraint "Uo"
+ "@internal
+  In ARM/Thumb-2 state a valid address for Neon element and structure
+  load/store instructions or normal load on doubleword alignment."
+ (and (match_code "mem")
+      (match_test "TARGET_32BIT && (arm_mem_aligned_p (op, DOUBLEWORD_ALIGNMENT)
+                                   || neon_vector_mem_operand (op, 2, true))")))
+
 (define_memory_constraint "Us"
  "@internal
   In ARM/Thumb-2 state a valid address for non-offset loads/stores of
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 2f06e42..e4490ba 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -4406,19 +4406,28 @@
 
 (define_insn_and_split "neon_vld1_dupv2di"
    [(set (match_operand:V2DI 0 "s_register_operand" "=w")
-    (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
+    (vec_duplicate:V2DI (match_operand:DI 1 "memory_operand" "Uo")))]
    "TARGET_NEON"
    "#"
    "&& reload_completed"
    [(const_int 0)]
    {
-    rtx tmprtx = gen_lowpart (DImode, operands[0]);
-    emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
-    emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
-    DONE;
-    }
-  [(set_attr "length" "8")
-   (set_attr "type" "neon_load1_all_lanes_q")]
+     rtx lo_reg = gen_rtx_REG (DImode, REGNO (operands[0]));
+     rtx hi_reg = gen_rtx_REG (DImode, REGNO (operands[0]) + 2);
+
+     /* If the alignment is not natural, we have to use vld1.
+        In all other cases we can try to take advantage of a more general
+        move operation.  */
+
+     if (arm_mem_aligned_p (operands[1], DOUBLEWORD_ALIGNMENT))
+       emit_move_insn (lo_reg, operands[1]);
+     else
+       emit_insn (gen_neon_vld1_dupdi (lo_reg, operands[1]));
+
+     emit_move_insn (hi_reg, lo_reg);
+     DONE;
+   }
+  [(set_attr "length" "8")]
 )
 
 (define_expand "vec_store_lanes<mode><mode>"
diff --git a/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp64.c b/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp64.c
index 2d504c1..a606eef 100644
--- a/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp64.c
+++ b/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp64.c
@@ -1,19 +1,35 @@
-/* Test the `vld1Q_dupp64' ARM Neon intrinsic.  */
-/* This file was autogenerated by neon-testgen.  */
+/* Test the `vld1Q_dup_p64' ARM Neon intrinsic.
+   If the argument to vld1q_dup_p64 is naturally aligned the compiler has
+   the freedom to choose any instruction equivalent to vld1.64, therefore we
+   do not scan for it explicitly but instead test the runtime functionality.
+   */
 
-/* { dg-do assemble } */
+/* { dg-do run } */
 /* { dg-require-effective-target arm_crypto_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2" } */
 /* { dg-add-options arm_crypto } */
 
 #include "arm_neon.h"
 
-void test_vld1Q_dupp64 (void)
-{
-  poly64x2_t out_poly64x2_t;
+extern void abort (void);
 
-  out_poly64x2_t = vld1q_dup_p64 (0);
+poly64x2_t
+test_vld1Q_dupp64 (poly64_t* p)
+{
+   return vld1q_dup_p64 (p);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { cleanup-saved-temps } } */
+int
+main (void)
+{
+  uint64_t c = 0xf00ba1;
+  poly64_t pol = vcreate_p64 (c);
+  poly64x2_t b = test_vld1Q_dupp64 (&pol);
+
+  if (vreinterpret_u64_p64 (vget_high_p64 (b)) != c
+      || vreinterpret_u64_p64 (vget_low_p64 (b) != c))
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp64_misaligned.c b/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp64_misaligned.c
new file mode 100644
index 0000000..3a916ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp64_misaligned.c
@@ -0,0 +1,43 @@
+/* Test the `vld1Q_dup_p64' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_crypto } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+struct __attribute__ ((__packed__)) my_struct
+{
+  int i;
+  poly64_t str;
+};
+
+poly64x2_t
+test_vld1Q_dupp64 (struct my_struct* s)
+{
+   return vld1q_dup_p64 (&(s->str));
+}
+
+int
+main (void)
+{
+  uint64_t c = 0xf00ba1;
+  poly64_t pol = vcreate_p64 (c);
+  struct my_struct st;
+  st.i = -1;
+  st.str = pol;
+  poly64x2_t b = test_vld1Q_dupp64 (&st);
+
+  if (vreinterpret_u64_p64 (vget_high_p64 (b)) != c
+      || vreinterpret_u64_p64 (vget_low_p64 (b) != c))
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64.c b/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64.c
index 4fceee8..a7e57f4 100644
--- a/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64.c
+++ b/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64.c
@@ -1,19 +1,33 @@
-/* Test the `vld1Q_dups64' ARM Neon intrinsic.  */
-/* This file was autogenerated by neon-testgen.  */
+/* Test the `vld1Q_dup_s64' ARM Neon intrinsic.
+   If the argument to vld1q_dup_s64 is naturally aligned the compiler has
+   the freedom to choose any instruction equivalent to vld1.64, therefore we
+   do not scan for it explicitly but instead test the runtime functionality.
+   */
 
-/* { dg-do assemble } */
-/* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -save-temps" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
-void test_vld1Q_dups64 (void)
-{
-  int64x2_t out_int64x2_t;
+extern void abort (void);
 
-  out_int64x2_t = vld1q_dup_s64 (0);
+int64x2_t
+test_vld1Q_dups64 (int64_t* p)
+{
+   return vld1q_dup_s64 (p);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { cleanup-saved-temps } } */
+int
+main (void)
+{
+  int64_t c = 0xf00ba1;
+  int64x2_t b = test_vld1Q_dups64 (&c);
+
+  if (vget_high_s64 (b) != c
+      || vget_low_s64 (b) != c)
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64_misaligned.c b/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64_misaligned.c
new file mode 100644
index 0000000..2079a66
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64_misaligned.c
@@ -0,0 +1,41 @@
+/* Test the `vld1Q_dup_s64' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+struct __attribute__ ((__packed__)) my_struct
+{
+  int i;
+  int64_t str;
+};
+
+int64x2_t
+test_vld1Q_dups64 (struct my_struct* s)
+{
+   return vld1q_dup_s64 (&(s->str));
+}
+
+int
+main (void)
+{
+  int64_t c = 0xf00ba1;
+  struct my_struct st;
+  st.i = -1;
+  st.str = c;
+  int64x2_t b = test_vld1Q_dups64 (&st);
+
+  if (vget_high_s64 (b) != c
+      || vget_low_s64 (b) != c)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu64.c b/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu64.c
index ef0a382..985670f 100644
--- a/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu64.c
+++ b/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu64.c
@@ -1,19 +1,33 @@
-/* Test the `vld1Q_dupu64' ARM Neon intrinsic.  */
-/* This file was autogenerated by neon-testgen.  */
+/* Test the `vld1Q_dup_u64' ARM Neon intrinsic.
+   If the argument to vld1q_dup_u64 is naturally aligned the compiler has
+   the freedom to choose any instruction equivalent to vld1.64, therefore we
+   do not scan for it explicitly but instead test the runtime functionality.
+   */
 
-/* { dg-do assemble } */
-/* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -save-temps" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
-void test_vld1Q_dupu64 (void)
-{
-  uint64x2_t out_uint64x2_t;
+extern void abort (void);
 
-  out_uint64x2_t = vld1q_dup_u64 (0);
+uint64x2_t
+test_vld1Q_dupu64 (uint64_t* p)
+{
+   return vld1q_dup_u64 (p);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { cleanup-saved-temps } } */
+int
+main (void)
+{
+  uint64_t c = 0xf00ba1;
+  uint64x2_t b = test_vld1Q_dupu64 (&c);
+
+  if (vget_high_u64 (b) != c
+      || vget_low_u64 (b) != c)
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu64_misaligned.c b/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu64_misaligned.c
new file mode 100644
index 0000000..5468792
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu64_misaligned.c
@@ -0,0 +1,42 @@
+/* Test the `vld1Q_dup_u64' ARM Neon intrinsic.  */
+/* This file was autogenerated by neon-testgen.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+struct __attribute__ ((__packed__)) my_struct
+{
+  int i;
+  uint64_t str;
+};
+
+uint64x2_t
+test_vld1Q_dupu64 (struct my_struct* s)
+{
+   return vld1q_dup_u64 (&(s->str));
+}
+
+int
+main (void)
+{
+  uint64_t c = 0xf00ba1;
+  struct my_struct st;
+  st.i = -1;
+  st.str = c;
+  uint64x2_t b = test_vld1Q_dupu64 (&st);
+
+  if (vget_high_u64 (b) != c
+      || vget_low_u64 (b) != c)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]