This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH][AArch64] Vneg NEON intrinsics modified
- From: Alex Velenko <Alex dot Velenko at arm dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Marcus dot Shawcroft at arm dot com
- Date: Tue, 08 Oct 2013 17:10:10 +0100
- Subject: [PATCH][AArch64] Vneg NEON intrinsics modified
- Authentication-results: sourceware.org; auth=none
Hi,
This patch implements the behavior of the following
neon intrinsics using C:
vneg[q]_f[32,64]
vneg[q]_s[8,16,32,64]
Regression tests for listed intrinsics included.
I ran a full regression test for aarch64-none-elf
with no regressions.
Ok?
Thanks,
Alex
gcc/testsuite/
2013-10-08 Alex Velenko <Alex.Velenko@arm.com>
* gcc.target/aarch64/vneg_f.c: New testcase.
* gcc.target/aarch64/vneg_s.c: New testcase.
gcc/
2013-10-08 Alex Velenko <Alex.Velenko@arm.com>
* config/aarch64/arm_neon.h (vneg_f32): Asm replaced with C.
(vneg_f64): New intrinsic.
(vneg_s8): Asm replaced with C.
(vneg_s16): Likewise.
(vneg_s32): Likewise.
(vneg_s64): New intrinsic.
(vnegq_f32): Asm replaced with C.
(vnegq_f64): Likewise.
(vnegq_s8): Likewise.
(vnegq_s16): Likewise.
(vnegq_s32): Likewise.
(vnegq_s64): Likewise.
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index cb5860206a1812f347a77d4a6e06519f8c3a696f..1bd098d2a9c3a204c0fb57ee3ef31cbb5f328d8e 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -9785,115 +9785,6 @@ vmvnq_u32 (uint32x4_t a)
return result;
}
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vneg_f32 (float32x2_t a)
-{
- float32x2_t result;
- __asm__ ("fneg %0.2s,%1.2s"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vneg_s8 (int8x8_t a)
-{
- int8x8_t result;
- __asm__ ("neg %0.8b,%1.8b"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vneg_s16 (int16x4_t a)
-{
- int16x4_t result;
- __asm__ ("neg %0.4h,%1.4h"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vneg_s32 (int32x2_t a)
-{
- int32x2_t result;
- __asm__ ("neg %0.2s,%1.2s"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vnegq_f32 (float32x4_t a)
-{
- float32x4_t result;
- __asm__ ("fneg %0.4s,%1.4s"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vnegq_f64 (float64x2_t a)
-{
- float64x2_t result;
- __asm__ ("fneg %0.2d,%1.2d"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vnegq_s8 (int8x16_t a)
-{
- int8x16_t result;
- __asm__ ("neg %0.16b,%1.16b"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vnegq_s16 (int16x8_t a)
-{
- int16x8_t result;
- __asm__ ("neg %0.8h,%1.8h"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vnegq_s32 (int32x4_t a)
-{
- int32x4_t result;
- __asm__ ("neg %0.4s,%1.4s"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vnegq_s64 (int64x2_t a)
-{
- int64x2_t result;
- __asm__ ("neg %0.2d,%1.2d"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vpadal_s8 (int16x4_t a, int8x8_t b)
@@ -21241,6 +21132,80 @@ vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
}
+/* vneg */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vneg_f32 (float32x2_t __a)
+{
+ return -__a;
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vneg_f64 (float64x1_t __a)
+{
+ return -__a;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vneg_s8 (int8x8_t __a)
+{
+ return -__a;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vneg_s16 (int16x4_t __a)
+{
+ return -__a;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vneg_s32 (int32x2_t __a)
+{
+ return -__a;
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vneg_s64 (int64x1_t __a)
+{
+ return -__a;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vnegq_f32 (float32x4_t __a)
+{
+ return -__a;
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vnegq_f64 (float64x2_t __a)
+{
+ return -__a;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vnegq_s8 (int8x16_t __a)
+{
+ return -__a;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vnegq_s16 (int16x8_t __a)
+{
+ return -__a;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vnegq_s32 (int32x4_t __a)
+{
+ return -__a;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vnegq_s64 (int64x2_t __a)
+{
+ return -__a;
+}
+
/* vqabs */
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
diff --git a/gcc/testsuite/gcc.target/aarch64/vneg_f.c b/gcc/testsuite/gcc.target/aarch64/vneg_f.c
new file mode 100644
index 0000000000000000000000000000000000000000..1eaf21d34eb57b4e7e5388a4686fe6341197447a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vneg_f.c
@@ -0,0 +1,273 @@
+/* Test vneg works correctly. */
+/* { dg-do run } */
+/* { dg-options "--save-temps" } */
+
+#include <arm_neon.h>
+
+#define FLT_EPSILON __FLT_EPSILON__
+#define DBL_EPSILON __DBL_EPSILON__
+#define FLT_MAX __FLT_MAX__
+#define FLT_MIN __FLT_MIN__
+#define DBL_MAX __DBL_MAX__
+#define DBL_MIN __DBL_MIN__
+
+#define TEST0 0
+/* 6 digits of pi. */
+#define TEST1 3.14159
+/* 6 digits of -e. */
+#define TEST2 -2.71828
+/* 2^25, float has 24 significand bits
+ according to Single-precision floating-point format. */
+#define TEST3_FLT 33554432
+/* 2^54, double has 53 significand bits
+ according to Double-precision floating-point format. */
+#define TEST3_DBL 18014398509481984
+
+extern void abort (void);
+
+#define FLT_INFINITY (__builtin_inff ())
+#define DBL_INFINITY (__builtin_inf ())
+
+#ifndef NAN
+#define NAN (0.0 / 0.0)
+#endif
+
+#define CONCAT(a, b) a##b
+#define CONCAT1(a, b) CONCAT (a, b)
+#define REG_INFEX64 _
+#define REG_INFEX128 q_
+#define REG_INFEX(reg_len) REG_INFEX##reg_len
+#define POSTFIX(reg_len, data_len) \
+ CONCAT1 (REG_INFEX (reg_len), f##data_len)
+
+#define DATA_TYPE_32 float
+#define DATA_TYPE_64 double
+#define DATA_TYPE(data_len) DATA_TYPE_##data_len
+
+#define INDEX64_32 [i]
+#define INDEX64_64
+#define INDEX128_32 [i]
+#define INDEX128_64 [i]
+#define INDEX(reg_len, data_len) \
+ CONCAT1 (INDEX, reg_len##_##data_len)
+
+#define LOAD_INST(reg_len, data_len) \
+ CONCAT1 (vld1, POSTFIX (reg_len, data_len))
+#define NEG_INST(reg_len, data_len) \
+ CONCAT1 (vneg, POSTFIX (reg_len, data_len))
+
+#define INHIB_OPTIMIZATION asm volatile ("" : : : "memory")
+
+#define RUN_TEST(test_set, reg_len, data_len, n, a, b) \
+ { \
+ int i; \
+ (a) = LOAD_INST (reg_len, data_len) (test_set); \
+ (b) = NEG_INST (reg_len, data_len) (a); \
+ for (i = 0; i < n; i++) \
+ { \
+ DATA_TYPE (data_len) diff; \
+ INHIB_OPTIMIZATION; \
+ diff \
+ = a INDEX (reg_len, data_len) \
+ + b INDEX (reg_len, data_len); \
+ if (diff > EPSILON) \
+ return 1; \
+ } \
+ }
+
+#define TEST3 TEST3_FLT
+#define EPSILON FLT_EPSILON
+#define VAR_MIN FLT_MIN
+#define VAR_MAX FLT_MAX
+#define INFINITY FLT_INFINITY
+
+int
+test_vneg_f32 ()
+{
+ float test_set0[2] = { TEST0, TEST1 };
+ float test_set1[2] = { TEST2, TEST3 };
+ float test_set2[2] = { VAR_MAX, VAR_MIN };
+ float test_set3[2] = { INFINITY, NAN };
+
+ float32x2_t a;
+ float32x2_t b;
+
+ RUN_TEST (test_set0, 64, 32, 2, a, b);
+ RUN_TEST (test_set1, 64, 32, 2, a, b);
+ RUN_TEST (test_set2, 64, 32, 2, a, b);
+ RUN_TEST (test_set3, 64, 32, 0, a, b);
+
+ /* Since last test cannot be checked in a uniform way by adding
+ negation result to original value, the number of lanes to be
+ checked in RUN_TEST is 0 (last argument). Instead, result
+ will be checked manually. */
+
+ if (b[0] != -INFINITY)
+ return 1;
+
+ if (!__builtin_isnan (b[1]))
+ return 1;
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "fneg\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" 4 } } */
+
+#undef TEST3
+#undef EPSILON
+#undef VAR_MIN
+#undef VAR_MAX
+#undef INFINITY
+
+#define TEST3 TEST3_DBL
+#define EPSILON DBL_EPSILON
+#define VAR_MIN DBL_MIN
+#define VAR_MAX DBL_MAX
+#define INFINITY DBL_INFINITY
+
+int
+test_vneg_f64 ()
+{
+ float64x1_t a;
+ float64x1_t b;
+
+ double test_set0[1] = { TEST0 };
+ double test_set1[1] = { TEST1 };
+ double test_set2[1] = { TEST2 };
+ double test_set3[1] = { TEST3 };
+ double test_set4[1] = { VAR_MAX };
+ double test_set5[1] = { VAR_MIN };
+ double test_set6[1] = { INFINITY };
+ double test_set7[1] = { NAN };
+
+ RUN_TEST (test_set0, 64, 64, 1, a, b);
+ RUN_TEST (test_set1, 64, 64, 1, a, b);
+ RUN_TEST (test_set2, 64, 64, 1, a, b);
+ RUN_TEST (test_set3, 64, 64, 1, a, b);
+ RUN_TEST (test_set4, 64, 64, 1, a, b);
+ RUN_TEST (test_set5, 64, 64, 1, a, b);
+ RUN_TEST (test_set6, 64, 64, 0, a, b);
+
+ /* Since last test cannot be checked in a uniform way by adding
+ negation result to original value, the number of lanes to be
+ checked in RUN_TEST is 0 (last argument). Instead, result
+ will be checked manually. */
+
+ if (b != -INFINITY)
+ return 1;
+
+ /* Same as above. */
+
+ RUN_TEST (test_set7, 64, 64, 0, a, b);
+
+ if (!__builtin_isnan (b))
+ return 1;
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "fneg\\td\[0-9\]+, d\[0-9\]+" 8 } } */
+
+#undef TEST3
+#undef EPSILON
+#undef VAR_MIN
+#undef VAR_MAX
+#undef INFINITY
+
+#define TEST3 TEST3_FLT
+#define EPSILON FLT_EPSILON
+#define VAR_MIN FLT_MIN
+#define VAR_MAX FLT_MAX
+#define INFINITY FLT_INFINITY
+
+int
+test_vnegq_f32 ()
+{
+ float32x4_t a;
+ float32x4_t b;
+
+ float test_set0[4] = { TEST0, TEST1, TEST2, TEST3 };
+ float test_set1[4] = { FLT_MAX, FLT_MIN, INFINITY, NAN };
+
+ RUN_TEST (test_set0, 128, 32, 4, a, b);
+ RUN_TEST (test_set1, 128, 32, 2, a, b);
+
+ /* Since last test cannot be fully checked in a uniform way by
+ adding negation result to original value, the number of lanes
+ to be checked in RUN_TEST is 0 (last argument). Instead, result
+ will be checked manually. */
+
+ if (b[2] != -INFINITY)
+ return 1;
+
+ if (!__builtin_isnan (b[3]))
+ return 1;
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "fneg\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 2 } } */
+
+#undef TEST3
+#undef EPSILON
+#undef VAR_MIN
+#undef VAR_MAX
+#undef INFINITY
+
+#define TEST3 TEST3_DBL
+#define EPSILON DBL_EPSILON
+#define VAR_MIN DBL_MIN
+#define VAR_MAX DBL_MAX
+#define INFINITY DBL_INFINITY
+
+int
+test_vnegq_f64 ()
+{
+ float64x2_t a;
+ float64x2_t b;
+
+ double test_set0[2] = { TEST0, TEST1 };
+ double test_set1[2] = { TEST2, TEST3 };
+ double test_set2[2] = { FLT_MAX, FLT_MIN };
+ double test_set3[2] = { INFINITY, NAN };
+
+ RUN_TEST (test_set0, 128, 64, 2, a, b);
+ RUN_TEST (test_set1, 128, 64, 2, a, b);
+ RUN_TEST (test_set2, 128, 64, 2, a, b);
+ RUN_TEST (test_set3, 128, 64, 0, a, b);
+
+ /* Since last test cannot be checked in a uniform way by adding
+ negation result to original value, the number of lanes to be
+ checked in RUN_TEST is 0 (last argument). Instead, result
+ will be checked manually. */
+
+ if (b[0] != -INFINITY)
+ return 1;
+
+ if (!__builtin_isnan (b[1]))
+ return 1;
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "fneg\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" 4 } } */
+
+int
+main (int argc, char **argv)
+{
+ if (test_vneg_f32 ())
+ abort ();
+
+ if (test_vneg_f64 ())
+ abort ();
+
+ if (test_vnegq_f32 ())
+ abort ();
+
+ if (test_vnegq_f64 ())
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vneg_s.c b/gcc/testsuite/gcc.target/aarch64/vneg_s.c
new file mode 100644
index 0000000000000000000000000000000000000000..accbf14074b9f9569f7e3662b6571075421f6a27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vneg_s.c
@@ -0,0 +1,309 @@
+/* Test vneg works correctly. */
+/* { dg-do run } */
+/* { dg-options "-std=gnu99 -O3 -Wno-div-by-zero --save-temps" } */
+
+#include <arm_neon.h>
+#include <limits.h>
+
+/* Used to force a variable to a SIMD register. */
+#define force_simd(V1) asm volatile ("mov %d0, %1.d[0]" \
+ : "=w"(V1) \
+ : "w"(V1) \
+ : /* No clobbers */);
+#define INHIB_OPTIMIZATION asm volatile ("" : : : "memory")
+
+#define TEST0 0
+#define TEST1 1
+#define TEST2 -1
+#define TEST3 10
+#define TEST4 -10
+#define TEST5 0
+
+#define ANSW0 0
+#define ANSW1 -1
+#define ANSW2 1
+#define ANSW3 -10
+#define ANSW4 10
+#define ANSW5 0
+
+extern void abort (void);
+
+#define CONCAT(a, b) a##b
+#define CONCAT1(a, b) CONCAT (a, b)
+#define REG_INFEX64 _
+#define REG_INFEX128 q_
+#define REG_INFEX(reg_len) REG_INFEX##reg_len
+#define POSTFIX(reg_len, data_len) \
+ CONCAT1 (REG_INFEX (reg_len), s##data_len)
+#define DATA_TYPE_32 float
+#define DATA_TYPE_64 double
+#define DATA_TYPE(data_len) DATA_TYPE_##data_len
+#define INDEX64_8 [i]
+#define INDEX64_16 [i]
+#define INDEX64_32 [i]
+#define INDEX64_64
+#define INDEX128_8 [i]
+#define INDEX128_16 [i]
+#define INDEX128_32 [i]
+#define INDEX128_64 [i]
+
+#define FORCE_SIMD_INST64_8(data)
+#define FORCE_SIMD_INST64_16(data)
+#define FORCE_SIMD_INST64_32(data)
+#define FORCE_SIMD_INST64_64(data) force_simd (data)
+#define FORCE_SIMD_INST128_8(data)
+#define FORCE_SIMD_INST128_16(data)
+#define FORCE_SIMD_INST128_32(data)
+#define FORCE_SIMD_INST128_64(data)
+
+#define INDEX(reg_len, data_len) \
+ CONCAT1 (INDEX, reg_len##_##data_len)
+#define FORCE_SIMD_INST(reg_len, data_len, data) \
+ CONCAT1 (FORCE_SIMD_INST, reg_len##_##data_len) (data)
+#define LOAD_INST(reg_len, data_len) \
+ CONCAT1 (vld1, POSTFIX (reg_len, data_len))
+#define NEG_INST(reg_len, data_len) \
+ CONCAT1 (vneg, POSTFIX (reg_len, data_len))
+
+#define RUN_TEST(test_set, answ_set, reg_len, data_len, n, a, b) \
+ { \
+ int i; \
+ INHIB_OPTIMIZATION; \
+ (a) = LOAD_INST (reg_len, data_len) (test_set); \
+ (b) = LOAD_INST (reg_len, data_len) (answ_set); \
+ FORCE_SIMD_INST (reg_len, data_len, a) \
+ a = NEG_INST (reg_len, data_len) (a); \
+ FORCE_SIMD_INST (reg_len, data_len, a) \
+ for (i = 0; i < n; i++) \
+ { \
+ INHIB_OPTIMIZATION; \
+ if (a INDEX (reg_len, data_len) \
+ != b INDEX (reg_len, data_len)) \
+ return 1; \
+ } \
+ }
+
+int
+test_vneg_s8 ()
+{
+ int8x8_t a;
+ int8x8_t b;
+
+ int8_t test_set0[8] = {
+ TEST0, TEST1, TEST2, TEST3, TEST4, TEST5, SCHAR_MAX, SCHAR_MIN
+ };
+ int8_t answ_set0[8] = {
+ ANSW0, ANSW1, ANSW2, ANSW3, ANSW4, ANSW5, SCHAR_MIN + 1, SCHAR_MIN
+ };
+
+ RUN_TEST (test_set0, answ_set0, 64, 8, 8, a, b);
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b" 1 } } */
+
+int
+test_vneg_s16 ()
+{
+ int16x4_t a;
+ int16x4_t b;
+
+ int16_t test_set0[4] = { TEST0, TEST1, TEST2, TEST3 };
+ int16_t test_set1[4] = { TEST4, TEST5, SHRT_MAX, SHRT_MIN };
+
+ int16_t answ_set0[4] = { ANSW0, ANSW1, ANSW2, ANSW3 };
+ int16_t answ_set1[4] = { ANSW4, ANSW5, SHRT_MIN + 1, SHRT_MIN };
+
+ RUN_TEST (test_set0, answ_set0, 64, 16, 4, a, b);
+ RUN_TEST (test_set1, answ_set1, 64, 16, 4, a, b);
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.4h, v\[0-9\]+\.4h" 2 } } */
+
+int
+test_vneg_s32 ()
+{
+ int32x2_t a;
+ int32x2_t b;
+
+ int32_t test_set0[2] = { TEST0, TEST1 };
+ int32_t test_set1[2] = { TEST2, TEST3 };
+ int32_t test_set2[2] = { TEST4, TEST5 };
+ int32_t test_set3[2] = { INT_MAX, INT_MIN };
+
+ int32_t answ_set0[2] = { ANSW0, ANSW1 };
+ int32_t answ_set1[2] = { ANSW2, ANSW3 };
+ int32_t answ_set2[2] = { ANSW4, ANSW5 };
+ int32_t answ_set3[2] = { INT_MIN + 1, INT_MIN };
+
+ RUN_TEST (test_set0, answ_set0, 64, 32, 2, a, b);
+ RUN_TEST (test_set1, answ_set1, 64, 32, 2, a, b);
+ RUN_TEST (test_set2, answ_set2, 64, 32, 2, a, b);
+ RUN_TEST (test_set3, answ_set3, 64, 32, 2, a, b);
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" 4 } } */
+
+int
+test_vneg_s64 ()
+{
+ int64x1_t a;
+ int64x1_t b;
+
+ int64_t test_set0[1] = { TEST0 };
+ int64_t test_set1[1] = { TEST1 };
+ int64_t test_set2[1] = { TEST2 };
+ int64_t test_set3[1] = { TEST3 };
+ int64_t test_set4[1] = { TEST4 };
+ int64_t test_set5[1] = { TEST5 };
+ int64_t test_set6[1] = { LLONG_MAX };
+ int64_t test_set7[1] = { LLONG_MIN };
+
+ int64_t answ_set0[1] = { ANSW0 };
+ int64_t answ_set1[1] = { ANSW1 };
+ int64_t answ_set2[1] = { ANSW2 };
+ int64_t answ_set3[1] = { ANSW3 };
+ int64_t answ_set4[1] = { ANSW4 };
+ int64_t answ_set5[1] = { ANSW5 };
+ int64_t answ_set6[1] = { LLONG_MIN + 1 };
+ int64_t answ_set7[1] = { LLONG_MIN };
+
+ RUN_TEST (test_set0, answ_set0, 64, 64, 1, a, b);
+ RUN_TEST (test_set1, answ_set1, 64, 64, 1, a, b);
+ RUN_TEST (test_set2, answ_set2, 64, 64, 1, a, b);
+ RUN_TEST (test_set3, answ_set3, 64, 64, 1, a, b);
+ RUN_TEST (test_set4, answ_set4, 64, 64, 1, a, b);
+ RUN_TEST (test_set5, answ_set5, 64, 64, 1, a, b);
+ RUN_TEST (test_set6, answ_set6, 64, 64, 1, a, b);
+ RUN_TEST (test_set7, answ_set7, 64, 64, 1, a, b);
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 8 } } */
+
+int
+test_vnegq_s8 ()
+{
+ int8x16_t a;
+ int8x16_t b;
+
+ int8_t test_set0[16] = {
+ TEST0, TEST1, TEST2, TEST3, TEST4, TEST5, SCHAR_MAX, SCHAR_MIN,
+ 4, 8, 15, 16, 23, 42, -1, -2
+ };
+
+ int8_t answ_set0[16] = {
+ ANSW0, ANSW1, ANSW2, ANSW3, ANSW4, ANSW5, SCHAR_MIN + 1, SCHAR_MIN,
+ -4, -8, -15, -16, -23, -42, 1, 2
+ };
+
+ RUN_TEST (test_set0, answ_set0, 128, 8, 8, a, b);
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+
+int
+test_vnegq_s16 ()
+{
+ int16x8_t a;
+ int16x8_t b;
+
+ int16_t test_set0[8] = {
+ TEST0, TEST1, TEST2, TEST3, TEST4, TEST5, SHRT_MAX, SHRT_MIN
+ };
+ int16_t answ_set0[8] = {
+ ANSW0, ANSW1, ANSW2, ANSW3, ANSW4, ANSW5, SHRT_MIN + 1, SHRT_MIN
+ };
+
+ RUN_TEST (test_set0, answ_set0, 128, 16, 8, a, b);
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */
+
+int
+test_vnegq_s32 ()
+{
+ int32x4_t a;
+ int32x4_t b;
+
+ int32_t test_set0[4] = { TEST0, TEST1, TEST2, TEST3 };
+ int32_t test_set1[4] = { TEST4, TEST5, INT_MAX, INT_MIN };
+
+ int32_t answ_set0[4] = { ANSW0, ANSW1, ANSW2, ANSW3 };
+ int32_t answ_set1[4] = { ANSW4, ANSW5, INT_MIN + 1, INT_MIN };
+
+ RUN_TEST (test_set0, answ_set0, 128, 32, 4, a, b);
+ RUN_TEST (test_set1, answ_set1, 128, 32, 4, a, b);
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 2 } } */
+
+int
+test_vnegq_s64 ()
+{
+ int64x2_t a;
+ int64x2_t b;
+
+ int64_t test_set0[2] = { TEST0, TEST1 };
+ int64_t test_set1[2] = { TEST2, TEST3 };
+ int64_t test_set2[2] = { TEST4, TEST5 };
+ int64_t test_set3[2] = { LLONG_MAX, LLONG_MIN };
+
+ int64_t answ_set0[2] = { ANSW0, ANSW1 };
+ int64_t answ_set1[2] = { ANSW2, ANSW3 };
+ int64_t answ_set2[2] = { ANSW4, ANSW5 };
+ int64_t answ_set3[2] = { LLONG_MIN + 1, LLONG_MIN };
+
+ RUN_TEST (test_set0, answ_set0, 128, 64, 2, a, b);
+ RUN_TEST (test_set1, answ_set1, 128, 64, 2, a, b);
+ RUN_TEST (test_set2, answ_set2, 128, 64, 2, a, b);
+ RUN_TEST (test_set3, answ_set3, 128, 64, 2, a, b);
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" 4 } } */
+
+int
+main (int argc, char **argv)
+{
+ if (test_vneg_s8 ())
+ abort ();
+
+ if (test_vneg_s16 ())
+ abort ();
+
+ if (test_vneg_s32 ())
+ abort ();
+
+ if (test_vneg_s64 ())
+ abort ();
+
+ if (test_vnegq_s8 ())
+ abort ();
+
+ if (test_vnegq_s16 ())
+ abort ();
+
+ if (test_vnegq_s32 ())
+ abort ();
+
+ if (test_vnegq_s64 ())
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */