[AArch64] vmov_n changes

author Alex Velenko <Alex.Velenko@arm.com>

Fri, 22 Nov 2013 15:39:45 +0000 (15:39 +0000)

committer James Greenhalgh <jgreenhalgh@gcc.gnu.org>

Fri, 22 Nov 2013 15:39:45 +0000 (15:39 +0000)
author Alex Velenko <Alex.Velenko@arm.com>
Fri, 22 Nov 2013 15:39:45 +0000 (15:39 +0000)
committer James Greenhalgh <jgreenhalgh@gcc.gnu.org>
Fri, 22 Nov 2013 15:39:45 +0000 (15:39 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 600da98e87870e227e95dbfed1a9bb2de4028e4e..fcfae83e0cd4790a8400a1fa010645a0e42c4a83 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,30 @@
+2013-11-22  Alex Velenko  <Alex.Velenko@arm.com>
+
+       * config/aarch64/arm_neon.h (vmov_n_f32): Implemented in C.
+       (vmov_n_f64): Likewise.
+       (vmov_n_p8): Likewise.
+       (vmov_n_p16): Likewise.
+       (vmov_n_s8): Likewise.
+       (vmov_n_s16): Likewise.
+       (vmov_n_s32): Likewise.
+       (vmov_n_s64): Likewise.
+       (vmov_n_u8): Likewise.
+       (vmov_n_u16): Likewise.
+       (vmov_n_u32): Likewise.
+       (vmov_n_u64): Likewise.
+       (vmovq_n_f32): Likewise.
+       (vmovq_n_f64): Likewise.
+       (vmovq_n_p8): Likewise.
+       (vmovq_n_p16): Likewise.
+       (vmovq_n_s8): Likewise.
+       (vmovq_n_s16): Likewise.
+       (vmovq_n_s32): Likewise.
+       (vmovq_n_s64): Likewise.
+       (vmovq_n_u8): Likewise.
+       (vmovq_n_u16): Likewise.
+       (vmovq_n_u32): Likewise.
+       (vmovq_n_u64): Likewise.
+
  2013-11-22  Tejas Belagod  <tejas.belagod@arm.com>
  
         * config/aarch64/aarch64-simd.md (vec_pack_trunc_<mode>,
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h

index f03d001013a0e4af18121b9b3e90e00001e17b93..dc561701e97cd5133b070ce2c74f3b8194a43e60 100644 (file)
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -8314,127 +8314,6 @@ vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
    return result;
  }
  
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vmov_n_f32 (float32_t a)
-{
-  float32x2_t result;
-  __asm__ ("dup %0.2s, %w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vmov_n_p8 (uint32_t a)
-{
-  poly8x8_t result;
-  __asm__ ("dup %0.8b,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vmov_n_p16 (uint32_t a)
-{
-  poly16x4_t result;
-  __asm__ ("dup %0.4h,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vmov_n_s8 (int32_t a)
-{
-  int8x8_t result;
-  __asm__ ("dup %0.8b,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vmov_n_s16 (int32_t a)
-{
-  int16x4_t result;
-  __asm__ ("dup %0.4h,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vmov_n_s32 (int32_t a)
-{
-  int32x2_t result;
-  __asm__ ("dup %0.2s,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vmov_n_s64 (int64_t a)
-{
-  int64x1_t result;
-  __asm__ ("ins %0.d[0],%x1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vmov_n_u8 (uint32_t a)
-{
-  uint8x8_t result;
-  __asm__ ("dup %0.8b,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vmov_n_u16 (uint32_t a)
-{
-  uint16x4_t result;
-  __asm__ ("dup %0.4h,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vmov_n_u32 (uint32_t a)
-{
-  uint32x2_t result;
-  __asm__ ("dup %0.2s,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vmov_n_u64 (uint64_t a)
-{
-  uint64x1_t result;
-  __asm__ ("ins %0.d[0],%x1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
  __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
  vmovl_high_s8 (int8x16_t a)
  {
@@ -8699,133 +8578,6 @@ vmovn_u64 (uint64x2_t a)
    return result;
  }
  
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vmovq_n_f32 (float32_t a)
-{
-  float32x4_t result;
-  __asm__ ("dup %0.4s, %w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vmovq_n_f64 (float64_t a)
-{
-  return (float64x2_t) {a, a};
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vmovq_n_p8 (uint32_t a)
-{
-  poly8x16_t result;
-  __asm__ ("dup %0.16b,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vmovq_n_p16 (uint32_t a)
-{
-  poly16x8_t result;
-  __asm__ ("dup %0.8h,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vmovq_n_s8 (int32_t a)
-{
-  int8x16_t result;
-  __asm__ ("dup %0.16b,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vmovq_n_s16 (int32_t a)
-{
-  int16x8_t result;
-  __asm__ ("dup %0.8h,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vmovq_n_s32 (int32_t a)
-{
-  int32x4_t result;
-  __asm__ ("dup %0.4s,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vmovq_n_s64 (int64_t a)
-{
-  int64x2_t result;
-  __asm__ ("dup %0.2d,%x1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vmovq_n_u8 (uint32_t a)
-{
-  uint8x16_t result;
-  __asm__ ("dup %0.16b,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vmovq_n_u16 (uint32_t a)
-{
-  uint16x8_t result;
-  __asm__ ("dup %0.8h,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vmovq_n_u32 (uint32_t a)
-{
-  uint32x4_t result;
-  __asm__ ("dup %0.4s,%w1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vmovq_n_u64 (uint64_t a)
-{
-  uint64x2_t result;
-  __asm__ ("dup %0.2d,%x1"
-           : "=w"(result)
-           : "r"(a)
-           : /* No clobbers */);
-  return result;
-}
-
  __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
  vmul_n_f32 (float32x2_t a, float32_t b)
  {
@@ -20983,6 +20735,152 @@ vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
    return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
  }
  
+/* vmov_n_  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmov_n_f32 (float32_t __a)
+{
+  return vdup_n_f32 (__a);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vmov_n_f64 (float64_t __a)
+{
+  return __a;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vmov_n_p8 (poly8_t __a)
+{
+  return vdup_n_p8 (__a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vmov_n_p16 (poly16_t __a)
+{
+  return vdup_n_p16 (__a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmov_n_s8 (int8_t __a)
+{
+  return vdup_n_s8 (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmov_n_s16 (int16_t __a)
+{
+  return vdup_n_s16 (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmov_n_s32 (int32_t __a)
+{
+  return vdup_n_s32 (__a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vmov_n_s64 (int64_t __a)
+{
+  return __a;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmov_n_u8 (uint8_t __a)
+{
+  return vdup_n_u8 (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmov_n_u16 (uint16_t __a)
+{
+    return vdup_n_u16 (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmov_n_u32 (uint32_t __a)
+{
+   return vdup_n_u32 (__a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vmov_n_u64 (uint64_t __a)
+{
+   return __a;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmovq_n_f32 (float32_t __a)
+{
+  return vdupq_n_f32 (__a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmovq_n_f64 (float64_t __a)
+{
+  return vdupq_n_f64 (__a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vmovq_n_p8 (poly8_t __a)
+{
+  return vdupq_n_p8 (__a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vmovq_n_p16 (poly16_t __a)
+{
+  return vdupq_n_p16 (__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmovq_n_s8 (int8_t __a)
+{
+  return vdupq_n_s8 (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmovq_n_s16 (int16_t __a)
+{
+  return vdupq_n_s16 (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmovq_n_s32 (int32_t __a)
+{
+  return vdupq_n_s32 (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmovq_n_s64 (int64_t __a)
+{
+  return vdupq_n_s64 (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmovq_n_u8 (uint8_t __a)
+{
+  return vdupq_n_u8 (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmovq_n_u16 (uint16_t __a)
+{
+  return vdupq_n_u16 (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmovq_n_u32 (uint32_t __a)
+{
+  return vdupq_n_u32 (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmovq_n_u64 (uint64_t __a)
+{
+  return vdupq_n_u64 (__a);
+}
+
  /* vmul_lane  */
  
  __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 3095cffa11d353862f30569d90501dfb5126850f..270c53cbc5bec236c6907c105524f54bd543f316 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2013-11-22  Alex Velenko  <Alex.Velenko@arm.com>
+
+       * gcc.target/aarch64/vmov_n_1.c: New testcase.
+
  2013-11-22  Richard Biener  <rguenther@suse.de>
  
         * gcc.dg/torture/20131122-0.c: New testcase.
diff --git a/gcc/testsuite/gcc.target/aarch64/vmov_n_1.c b/gcc/testsuite/gcc.target/aarch64/vmov_n_1.c

new file mode 100644 (file)

index 0000000..b9d094a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vmov_n_1.c
@@ -0,0 +1,349 @@
+/* Test vmov_n works correctly.  */
+/* { dg-do run } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+#define INHIB_OPTIMIZATION asm volatile ("" : : : "memory")
+
+#define CONCAT(a, b) a##b
+#define CONCAT1(a, b) CONCAT (a, b)
+#define REG_INFEX64 _
+#define REG_INFEX128 q_
+#define REG_INFEX(reg_len) REG_INFEX##reg_len
+#define POSTFIX_N(reg_len, data_len, data_type)        \
+  CONCAT1 (REG_INFEX (reg_len), n_##data_type##data_len)
+#define LANE_POSTFIX(reg_len, data_len, data_type) \
+  CONCAT1 (REG_INFEX (reg_len),lane_##data_type##data_len)
+
+/* Test values consist of bytes with following hex values.
+   For example:
+   TEST1 for int16_t will be 0xaaaa
+   TEST1 for int32_t will be 0xaaaaaaaa
+   etc.  */
+
+#define TEST1h aa
+#define TEST2h 55
+#define TEST3h ff
+#define TEST4h 00
+#define TEST5h cc
+#define TEST6h 33
+
+#define TESTh_8(x) TEST##x##h
+#define TESTh_16(x) CONCAT1 (TESTh_8 (x), TESTh_8 (x))
+#define TESTh_32(x) CONCAT1 (TESTh_16 (x), TESTh_16 (x))
+#define TESTh_64(x) CONCAT1 (TESTh_32 (x), TESTh_32 (x))
+
+#define TEST_8(x) CONCAT1 (0x, TESTh_8 (x))
+#define TEST_16(x) CONCAT1 (0x, TESTh_16 (x))
+#define TEST_32(x) CONCAT1 (0x, TESTh_32 (x))
+#define TEST_64(x) CONCAT1 (0x, TESTh_64 (x))
+
+#define TEST(test, data_len) \
+  CONCAT1 (TEST, _##data_len) (test)
+
+#define GET_ELEMENT(reg_len, data_len, data_type)              \
+  CONCAT1 (vget, LANE_POSTFIX (reg_len, data_len, data_type))
+
+#define VMOV_INST(reg_len, data_len, data_type)                        \
+  CONCAT1 (vmov, POSTFIX_N (reg_len, data_len, data_type))
+
+#define VMOV_OBSCURE_INST(reg_len, data_len, data_type)                \
+  CONCAT1 (VMOV_INST (reg_len, data_len, data_type), _obscure)
+
+#define RUN_TEST(reg_len, data_len, data_type,                         \
+                test, n, a, b, c)                                      \
+{                                                                      \
+  int i;                                                               \
+  INHIB_OPTIMIZATION;                                                  \
+  (a) = TEST (test, data_len);                                         \
+  INHIB_OPTIMIZATION;                                                  \
+  (b) = VMOV_OBSCURE_INST (reg_len, data_len, data_type) (&(a));       \
+  (c) = TEST (test, data_len);                                         \
+  for (i = 0; i < n; i++)                                              \
+    {                                                                  \
+      INHIB_OPTIMIZATION;                                              \
+      a = GET_ELEMENT (reg_len, data_len, data_type) (b, i);           \
+      if ((a) != (c))                                                  \
+       return 1;                                                       \
+    }                                                                  \
+}
+
+#define TYPE_f32 float32_t
+#define TYPE_64_f32 float32x2_t
+#define TYPE_128_f32 float32x4_t
+
+#define TYPE_f64 float64_t
+#define TYPE_64_f64 float64x1_t
+#define TYPE_128_f64 float64x2_t
+
+#define TYPE_s8 int8_t
+#define TYPE_64_s8 int8x8_t
+#define TYPE_128_s8 int8x16_t
+
+#define TYPE_s16 int16_t
+#define TYPE_64_s16 int16x4_t
+#define TYPE_128_s16 int16x8_t
+
+#define TYPE_s32 int32_t
+#define TYPE_64_s32 int32x2_t
+#define TYPE_128_s32 int32x4_t
+
+#define TYPE_s64 int64_t
+#define TYPE_64_s64 int64x1_t
+#define TYPE_128_s64 int64x2_t
+
+#define TYPE_u8 uint8_t
+#define TYPE_64_u8 uint8x8_t
+#define TYPE_128_u8 uint8x16_t
+
+#define TYPE_u16 uint16_t
+#define TYPE_64_u16 uint16x4_t
+#define TYPE_128_u16 uint16x8_t
+
+#define TYPE_u32 uint32_t
+#define TYPE_64_u32 uint32x2_t
+#define TYPE_128_u32 uint32x4_t
+
+#define TYPE_u64 uint64_t
+#define TYPE_64_u64 uint64x1_t
+#define TYPE_128_u64 uint64x2_t
+
+#define TYPE_p8 poly8_t
+#define TYPE_64_p8 poly8x8_t
+#define TYPE_128_p8 poly8x16_t
+
+#define TYPE_p16 poly16_t
+#define TYPE_64_p16 poly16x4_t
+#define TYPE_128_p16 poly16x8_t
+
+#define DIV64_8  8
+#define DIV64_16 4
+#define DIV64_32 2
+#define DIV64_64 1
+
+#define DIV128_8  16
+#define DIV128_16 8
+#define DIV128_32 4
+#define DIV128_64 2
+
+#define DIV(reg_len, data_len)                 \
+CONCAT1 (CONCAT1 (DIV, reg_len),               \
+        CONCAT1 (_, data_len))
+
+#define VECTOR_TYPE(reg_len, data_len, data_type)      \
+CONCAT1 (CONCAT1 (CONCAT1 (TYPE_,reg_len),             \
+                 CONCAT1 (_,data_type)),               \
+        data_len)
+
+#define SIMPLE_TYPE(data_len, data_type)       \
+CONCAT1 (TYPE_,                                        \
+        CONCAT1 (data_type,                    \
+                 data_len))
+
+#define OBSCURE_FUNC_NAME(reg_len, data_type, data_len)                \
+CONCAT1 (CONCAT1 (vmov,                                                \
+                 POSTFIX_N (reg_len, data_len, data_type)),    \
+        _obscure)
+
+#define OBSCURE_FUNC(reg_len, data_len, data_type)     \
+VECTOR_TYPE (reg_len, data_len, data_type)             \
+__attribute__ ((noinline))                             \
+OBSCURE_FUNC_NAME (reg_len, data_type, data_len)       \
+ (SIMPLE_TYPE (data_len, data_type) *ap)               \
+{                                                      \
+  SIMPLE_TYPE (data_len, data_type) register a;                \
+  INHIB_OPTIMIZATION;                                  \
+  a = *ap;                                             \
+  INHIB_OPTIMIZATION;                                  \
+  return VMOV_INST (reg_len, data_len, data_type) (a); \
+}
+
+#define TESTFUNC_NAME(reg_len, data_type, data_len)    \
+CONCAT1 (test_vmov,                                    \
+        POSTFIX_N (reg_len, data_len, data_type))
+
+#define TESTFUNC(reg_len, data_len, data_type) \
+int                                            \
+TESTFUNC_NAME (reg_len, data_type, data_len) ()        \
+{                                              \
+  SIMPLE_TYPE (data_len, data_type) a;         \
+  VECTOR_TYPE (reg_len, data_len, data_type) b;        \
+  SIMPLE_TYPE (data_len, data_type) c;         \
+                                               \
+  RUN_TEST (reg_len, data_len, data_type, 1,   \
+           DIV (reg_len, data_len), a, b, c);  \
+  RUN_TEST (reg_len, data_len, data_type, 2,   \
+           DIV (reg_len, data_len), a, b, c);  \
+  RUN_TEST (reg_len, data_len, data_type, 3,   \
+           DIV (reg_len, data_len), a, b, c);  \
+  RUN_TEST (reg_len, data_len, data_type, 4,   \
+           DIV (reg_len, data_len), a, b, c);  \
+  RUN_TEST (reg_len, data_len, data_type, 5,   \
+           DIV (reg_len, data_len), a, b, c);  \
+  RUN_TEST (reg_len, data_len, data_type, 6,   \
+           DIV (reg_len, data_len), a, b, c);  \
+  return 0;                                    \
+}
+
+OBSCURE_FUNC (64, 32, f)
+TESTFUNC (64, 32, f)
+/* "dup  Vd.2s, Rn" is less preferable then "dup  Vd.2s, Vn.s[lane]".  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[\[0-9\]+\\\]" 1 } } */
+
+OBSCURE_FUNC (64, 64, f)
+TESTFUNC (64, 64, f)
+/* "fmov  Dd, Rn" is generated instead of "dup  Dd, Rn".
+   No assembley scan included.  */
+
+OBSCURE_FUNC (64, 8, p)
+TESTFUNC (64, 8, p)
+/* Generates "dup  Vd.8b, Rn". Scan found near s8 version.  */
+
+OBSCURE_FUNC (64, 16, p)
+TESTFUNC (64, 16, p)
+/* Generates "dup  Vd.4h, Rn". Scan found near s16 version.  */
+
+OBSCURE_FUNC (64, 8, s)
+TESTFUNC (64, 8, s)
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, w\[0-9\]+" 3 } } */
+
+OBSCURE_FUNC (64, 16, s)
+TESTFUNC (64, 16, s)
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, w\[0-9\]+" 3 } } */
+
+OBSCURE_FUNC (64, 32, s)
+TESTFUNC (64, 32, s)
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, w\[0-9\]+" 2 } } */
+
+OBSCURE_FUNC (64, 64, s)
+TESTFUNC (64, 64, s)
+/* "fmov  Dd, Rn" is generated instead of "dup  Dd, Rn".
+   No assembley scan included.  */
+
+OBSCURE_FUNC (64, 8, u)
+TESTFUNC (64, 8, u)
+/* Generates "dup  Vd.8b, Rn". Scan found near s8 version.  */
+
+OBSCURE_FUNC (64, 16, u)
+TESTFUNC (64, 16, u)
+/* Generates "dup  Vd.4h, Rn". Scan found near s16 version.  */
+
+OBSCURE_FUNC (64, 32, u)
+TESTFUNC (64, 32, u)
+/* Generates "dup  Vd.2s, Rn". Scan found near s32 version.  */
+
+OBSCURE_FUNC (64, 64, u)
+TESTFUNC (64, 64, u)
+/* "fmov  Dd, Rn" is generated instead of "dup  Dd, Rn".
+   No assembley scan included.  */
+
+OBSCURE_FUNC (128, 32, f)
+TESTFUNC (128, 32, f)
+/* "dup  Vd.4s, Rn" is less preferable then "dup  Vd.4s, Vn.s[lane]".  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[\[0-9\]+\\\]" 1 } } */
+
+OBSCURE_FUNC (128, 64, f)
+TESTFUNC (128, 64, f)
+/* "dup  Vd.2d, Rn" is less preferable then "dup  Vd.2d, Vn.d[lane]".  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 1 } } */
+
+OBSCURE_FUNC (128, 8, p)
+TESTFUNC (128, 8, p)
+/* Generates "dup  Vd.16b, Rn". Scan found near s8 version.  */
+
+OBSCURE_FUNC (128, 16, p)
+TESTFUNC (128, 16, p)
+/* Generates "dup  Vd.8h, Rn". Scan found near s16 version.  */
+
+OBSCURE_FUNC (128, 8, s)
+TESTFUNC (128, 8, s)
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, w\[0-9\]+" 3 } } */
+
+OBSCURE_FUNC (128, 16, s)
+TESTFUNC (128, 16, s)
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, w\[0-9\]+" 3 } } */
+
+OBSCURE_FUNC (128, 32, s)
+TESTFUNC (128, 32, s)
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, w\[0-9\]+" 2 } } */
+
+OBSCURE_FUNC (128, 64, s)
+TESTFUNC (128, 64, s)
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, x\[0-9\]+" 2 } } */
+
+OBSCURE_FUNC (128, 8, u)
+TESTFUNC (128, 8, u)
+/* Generates "dup  Vd.16b, Rn". Scan found near s8 version.  */
+
+OBSCURE_FUNC (128, 16, u)
+TESTFUNC (128, 16, u)
+/* Generates "dup  Vd.8h, Rn". Scan found near s16 version.  */
+
+OBSCURE_FUNC (128, 32, u)
+TESTFUNC (128, 32, u)
+/* Generates "dup  Vd.4s, Rn". Scan found near s32 version.  */
+
+OBSCURE_FUNC (128, 64, u)
+TESTFUNC (128, 64, u)
+/* Generates "dup  Vd.2d, Rn". Scan found near s64 version.  */
+
+int
+main (int argc, char **argv)
+{
+  if (test_vmov_n_f32 ())
+    abort ();
+  if (test_vmov_n_f64 ())
+    abort ();
+  if (test_vmov_n_p8 ())
+    abort ();
+  if (test_vmov_n_p16 ())
+    abort ();
+  if (test_vmov_n_s8 ())
+    abort ();
+  if (test_vmov_n_s16 ())
+    abort ();
+  if (test_vmov_n_s32 ())
+    abort ();
+  if (test_vmov_n_s64 ())
+    abort ();
+  if (test_vmov_n_u8 ())
+    abort ();
+  if (test_vmov_n_u16 ())
+    abort ();
+  if (test_vmov_n_u32 ())
+    abort ();
+  if (test_vmov_n_u64 ())
+    abort ();
+
+  if (test_vmovq_n_f32 ())
+    abort ();
+  if (test_vmovq_n_f64 ())
+    abort ();
+  if (test_vmovq_n_p8 ())
+    abort ();
+  if (test_vmovq_n_p16 ())
+    abort ();
+  if (test_vmovq_n_s8 ())
+    abort ();
+  if (test_vmovq_n_s16 ())
+    abort ();
+  if (test_vmovq_n_s32 ())
+    abort ();
+  if (test_vmovq_n_s64 ())
+    abort ();
+  if (test_vmovq_n_u8 ())
+    abort ();
+  if (test_vmovq_n_u16 ())
+    abort ();
+  if (test_vmovq_n_u32 ())
+    abort ();
+  if (test_vmovq_n_u64 ())
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */
author	Alex Velenko <Alex.Velenko@arm.com>
	Fri, 22 Nov 2013 15:39:45 +0000 (15:39 +0000)
committer	James Greenhalgh <jgreenhalgh@gcc.gnu.org>
	Fri, 22 Nov 2013 15:39:45 +0000 (15:39 +0000)
gcc/ChangeLog		patch \| blob \| blame \| history
gcc/config/aarch64/arm_neon.h		patch \| blob \| blame \| history
gcc/testsuite/ChangeLog		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/vmov_n_1.c	[new file with mode: 0644]	patch \| blob