diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c new file mode 100644 index 0000000000000000000000000000000000000000..a80e10146a6e45b44c3a09701da949a8e9aa7653 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c @@ -0,0 +1,409 @@ +/* Test vdup_lane intrinsics work correctly. */ +/* { dg-do run } */ +/* { dg-options "-O3 --save-temps" } */ + +#include + +extern void abort (void); + +#define INHIB_OPTIMIZATION asm volatile ("" : : : "memory") + +#define force_simd(V1) asm volatile ("orr %0.16b, %1.16b, %1.16b" \ + : "=w"(V1) \ + : "w"(V1) \ + : /* No clobbers */); + +int +__attribute__ ((noinline)) test_vdup_lane_f32 () +{ + float32x2_t a; + float32x2_t b; + int i = 0; + float32_t c[2] = { 0.0E0 , 3.14 }; + float32_t d[2]; + a = vld1_f32 (c); + INHIB_OPTIMIZATION; + force_simd (a) + b = vdup_lane_f32 (a, 0); + vst1_f32 (d, b); + INHIB_OPTIMIZATION; + for (; i < 2; i++) + { + if (c[0] != d[i]) + return 1; + } + + INHIB_OPTIMIZATION; + force_simd (a) + b = vdup_lane_f32 (a, 1); + vst1_f32 (d, b); + INHIB_OPTIMIZATION; + for (; i < 2; i++) + { + if (c[1] != d[i]) + return 1; + } + + return 0; +} + +/* Covers test_vdup_lane_f32 and test_vdup_lane_s32. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */ + +int +__attribute__ ((noinline)) test_vdupq_lane_f32 () +{ + float32x2_t a; + float32x4_t b; + int i = 0; + float32_t c[2] = { 0.0E0 , 3.14 }; + float32_t d[4]; + a = vld1_f32 (c); + INHIB_OPTIMIZATION; + force_simd (a) + b = vdupq_lane_f32 (a, 0); + vst1q_f32 (d, b); + INHIB_OPTIMIZATION; + for (; i < 4; i++) + { + if (c[0] != d[i]) + return 1; + } + + INHIB_OPTIMIZATION; + force_simd (a) + b = vdupq_lane_f32 (a, 1); + vst1q_f32 (d, b); + INHIB_OPTIMIZATION; + for (; i < 4; i++) + { + if (c[1] != d[i]) + return 1; + } + return 0; +} + +/* Covers test_vdupq_lane_f32 and test_vdupq_lane_s32. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */ + +int +__attribute__ ((noinline)) test_vdup_lane_s8 () +{ + int8x8_t a; + int8x8_t b; + int i = 0; + /* Only two first cases are interesting. */ + int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + int8_t d[8]; + a = vld1_s8 (c); + INHIB_OPTIMIZATION; + force_simd (a) + b = vdup_lane_s8 (a, 0); + vst1_s8 (d, b); + INHIB_OPTIMIZATION; + for (; i < 8; i++) + { + if (c[0] != d[i]) + return 1; + } + + INHIB_OPTIMIZATION; + force_simd (a) + force_simd (a) + b = vdup_lane_s8 (a, 4); + vst1_s8 (d, b); + INHIB_OPTIMIZATION; + for (; i < 8; i++) + { + if (c[4] != d[i]) + return 1; + } + return 0; +} + +/* Covers test_vdup_lane_s8. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[4\\\]" 1 } } */ + +int +__attribute__ ((noinline)) test_vdupq_lane_s8 () +{ + int8x8_t a; + int8x16_t b; + int i = 0; + /* Only two first cases are interesting. */ + int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + int8_t d[16]; + a = vld1_s8 (c); + INHIB_OPTIMIZATION; + force_simd (a) + b = vdupq_lane_s8 (a, 0); + vst1q_s8 (d, b); + INHIB_OPTIMIZATION; + for (; i < 16; i++) + { + if (c[0] != d[i]) + return 1; + } + + INHIB_OPTIMIZATION; + force_simd (a) + b = vdupq_lane_s8 (a, 4); + vst1q_s8 (d, b); + INHIB_OPTIMIZATION; + for (; i < 16; i++) + { + if (c[4] != d[i]) + return 1; + } + + return 0; +} + +/* Covers test_vdupq_lane_s8. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[4\\\]" 1 } } */ + +int +__attribute__ ((noinline)) test_vdup_lane_s16 () +{ + int16x4_t a; + int16x4_t b; + int i = 0; + /* Only two first cases are interesting. */ + int16_t c[4] = { 0, 1, 2, 3 }; + int16_t d[4]; + a = vld1_s16 (c); + INHIB_OPTIMIZATION; + force_simd (a) + b = vdup_lane_s16 (a, 0); + vst1_s16 (d, b); + INHIB_OPTIMIZATION; + for (; i < 4; i++) + { + if (c[0] != d[i]) + return 1; + } + + INHIB_OPTIMIZATION; + force_simd (a) + b = vdup_lane_s16 (a, 2); + vst1_s16 (d, b); + INHIB_OPTIMIZATION; + for (; i < 4; i++) + { + if (c[2] != d[i]) + return 1; + } + + return 0; +} + +/* Covers test_vdup_lane_s16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */ +/* Covers test_vdup_lane_s16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[2\\\]" 1 } } */ + +int +__attribute__ ((noinline)) test_vdupq_lane_s16 () +{ + int16x4_t a; + int16x8_t b; + int i = 0; + /* Only two first cases are interesting. */ + int16_t c[4] = { 0, 1, 2, 3 }; + int16_t d[8]; + a = vld1_s16 (c); + INHIB_OPTIMIZATION; + force_simd (a) + b = vdupq_lane_s16 (a, 0); + vst1q_s16 (d, b); + INHIB_OPTIMIZATION; + for (; i < 8; i++) + { + if (c[0] != d[i]) + return 1; + } + + INHIB_OPTIMIZATION; + force_simd (a) + b = vdupq_lane_s16 (a, 2); + vst1q_s16 (d, b); + INHIB_OPTIMIZATION; + for (; i < 8; i++) + { + if (c[2] != d[i]) + return 1; + } + return 0; +} + +/* Covers test_vdupq_lane_s16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */ +/* Covers test_vdupq_lane_s16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[2\\\]" 1 } } */ + +int +__attribute__ ((noinline)) test_vdup_lane_s32 () +{ + int32x2_t a; + int32x2_t b; + int i = 0; + int32_t c[2] = { 0, 1 }; + int32_t d[2]; + a = vld1_s32 (c); + INHIB_OPTIMIZATION; + force_simd (a) + b = vdup_lane_s32 (a, 0); + vst1_s32 (d, b); + INHIB_OPTIMIZATION; + for (; i < 2; i++) + { + if (c[0] != d[i]) + return 1; + } + + INHIB_OPTIMIZATION; + force_simd (a) + b = vdup_lane_s32 (a, 1); + vst1_s32 (d, b); + INHIB_OPTIMIZATION; + for (; i < 2; i++) + { + if (c[1] != d[i]) + return 1; + } + + return 0; +} + +int +__attribute__ ((noinline)) test_vdupq_lane_s32 () +{ + int32x2_t a; + int32x4_t b; + int i = 0; + int32_t c[2] = { 0, 1 }; + int32_t d[4]; + a = vld1_s32 (c); + INHIB_OPTIMIZATION; + force_simd (a) + b = vdupq_lane_s32 (a, 0); + vst1q_s32 (d, b); + INHIB_OPTIMIZATION; + for (; i < 4; i++) + { + if (c[0] != d[i]) + return 1; + } + + INHIB_OPTIMIZATION; + force_simd (a) + b = vdupq_lane_s32 (a, 1); + vst1q_s32 (d, b); + INHIB_OPTIMIZATION; + for (; i < 4; i++) + { + if (c[1] != d[i]) + return 1; + } + + return 0; +} + +int +__attribute__ ((noinline)) test_vdup_lane_s64 () +{ + uint64x1_t a; + uint64x1_t b; + uint64_t c[1]; + uint64_t d[1]; + + c[0] = 0; + a = vld1_s64 (c); + INHIB_OPTIMIZATION; + force_simd (a) + b = vdup_lane_s64 (a, 0); + vst1_s64 (d, b); + INHIB_OPTIMIZATION; + if (c[0] != d[0]) + return 1; + + c[0] = 1; + a = vld1_s64 (c); + INHIB_OPTIMIZATION; + force_simd (a) + b = vdup_lane_s64 (a, 0); + vst1_s64 (d, b); + INHIB_OPTIMIZATION; + if (c[0] != d[0]) + return 1; + return 0; +} + +int +__attribute__ ((noinline)) test_vdupq_lane_s64 () +{ + int64x1_t a; + int64x2_t b; + int i = 0; + int64_t c[1]; + int64_t d[2]; + c[0] = 0; + a = vld1_s64 (c); + INHIB_OPTIMIZATION; + force_simd (a) + b = vdupq_lane_s64 (a, 0); + vst1q_s64 (d, b); + INHIB_OPTIMIZATION; + for (; i < 2; i++) + { + if (c[0] != d[i]) + return 1; + } + i = 0; + c[0] = 1; + a = vld1_s64 (c); + INHIB_OPTIMIZATION; + force_simd (a) + b = vdupq_lane_s64 (a, 0); + vst1q_s64 (d, b); + INHIB_OPTIMIZATION; + for (; i < 2; i++) + { + if (c[0] != d[i]) + return 1; + } + + return 0; +} + +int +main () +{ + if (test_vdup_lane_f32 ()) + abort (); + if (test_vdup_lane_s8 ()) + abort (); + if (test_vdup_lane_s16 ()) + abort (); + if (test_vdup_lane_s32 ()) + abort (); + if (test_vdup_lane_s64 ()) + abort (); + if (test_vdupq_lane_f32 ()) + abort (); + if (test_vdupq_lane_s8 ()) + abort (); + if (test_vdupq_lane_s16 ()) + abort (); + if (test_vdupq_lane_s32 ()) + abort (); + if (test_vdupq_lane_s64 ()) + abort (); + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c new file mode 100644 index 0000000000000000000000000000000000000000..319ce4575c8f78c75ad5ece3acc12ab6e7b2ac24 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c @@ -0,0 +1,259 @@ +/* Test vdup_lane intrinsics work correctly. */ +/* { dg-do run } */ +/* { dg-options "-O3 --save-temps" } */ + +#include + +#define INHIB_OPTIMIZATION asm volatile ("" : : : "memory") + +#define force_simd(V1) asm volatile ("orr %0.16b, %1.16b, %1.16b" \ + : "=w"(V1) \ + : "w"(V1) \ + : /* No clobbers */); + +extern void abort (void); + +int __attribute__ ((noinline)) +test_vdups_lane_f32 () +{ + float32x2_t a; + float32_t b; + float32_t c[2] = { 0.0, 1.0 }; + a = vld1_f32 (c); + INHIB_OPTIMIZATION; + b = vdups_lane_f32 (a, 0); + INHIB_OPTIMIZATION; + force_simd (b) + if (c[0] != b) + return 1; + INHIB_OPTIMIZATION; + b = vdups_lane_f32 (a, 1); + INHIB_OPTIMIZATION; + force_simd (b) + if (c[1] != b) + return 1; + return 0; +} + +/* Covers vdups_lane_f32, vdups_lane_s32, vdups_lane_u32. */ +/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[0\\\]" 3 } } */ +/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[1\\\]" 3 } } */ + +#212 "/work/tempdev//src/gcc/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c" +int __attribute__ ((noinline)) +test_vdupd_lane_f64 () +{ + float64x1_t a; + float64_t b; + float64_t c[1] = { 0.0 }; + a = vld1_f64 (c); + INHIB_OPTIMIZATION; + b = vdupd_lane_f64 (a, 0); + INHIB_OPTIMIZATION; + force_simd (b) + if (c[0] != b) + return 1; + return 0; +} + +/* Covers vdups_lane_f64, vdups_lane_s64, vdups_lane_u64. */ +/* Attempts to make the compiler generate + "dup\\td\[0-9\]+, v\[0-9\]+\.d\\\[\[0\\\]" + are not practical. */ + +int __attribute__ ((noinline)) +test_vdupb_lane_s8 () +{ + int8x8_t a; + int8_t b; + int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + a = vld1_s8 (c); + INHIB_OPTIMIZATION; + b = vdupb_lane_s8 (a, 0); + INHIB_OPTIMIZATION; + force_simd (b) + if (c[0] != b) + return 1; + INHIB_OPTIMIZATION; + b = vdupb_lane_s8 (a, 4); + INHIB_OPTIMIZATION; + force_simd (b) + if (c[4] != b) + return 1; + + return 0; +} + +/* Covers vdupb_lane_s8, vdupb_lane_u8. */ +/* { dg-final { scan-assembler-times "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[0\\\]" 2 } } */ +/* { dg-final { scan-assembler-times "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[4\\\]" 2 } } */ + +int __attribute__ ((noinline)) +test_vdupb_lane_u8 () +{ + uint8x8_t a; + uint8_t b; + uint8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + a = vld1_u8 (c); + INHIB_OPTIMIZATION; + b = vdupb_lane_u8 (a, 0); + INHIB_OPTIMIZATION; + force_simd (b) + if (c[0] != b) + return 1; + INHIB_OPTIMIZATION; + b = vdupb_lane_u8 (a, 4); + INHIB_OPTIMIZATION; + force_simd (b) + if (c[4] != b) + return 1; + return 0; +} + +int __attribute__ ((noinline)) +test_vduph_lane_u16 () +{ + uint16x4_t a; + uint16_t b; + uint16_t c[4] = { 0, 1, 2, 3 }; + a = vld1_u16 (c); + INHIB_OPTIMIZATION; + b = vduph_lane_u16 (a, 0); + INHIB_OPTIMIZATION; + force_simd (b) + if (c[0] != b) + return 1; + INHIB_OPTIMIZATION; + b = vduph_lane_u16 (a, 2); + INHIB_OPTIMIZATION; + force_simd (b) + if (c[2] != b) + return 1; + return 0; +} + +/* Covers vduph_lane_h16, vduph_lane_h16. */ +/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[0\\\]" 2 } } */ +/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[2\\\]" 2 } } */ +int __attribute__ ((noinline)) +test_vduph_lane_s16 () +{ + int16x4_t a; + int16_t b; + int16_t c[4] = { 0, 1, 2, 3 }; + a = vld1_s16 (c); + INHIB_OPTIMIZATION; + b = vduph_lane_s16 (a, 0); + INHIB_OPTIMIZATION; + force_simd (b) + if (c[0] != b) + return 1; + INHIB_OPTIMIZATION; + b = vduph_lane_s16 (a, 2); + INHIB_OPTIMIZATION; + force_simd (b) + if (c[2] != b) + return 1; + return 0; +} + +int __attribute__ ((noinline)) +test_vdups_lane_s32 () +{ + int32x2_t a; + int32_t b; + int32_t c[2] = { 0, 1 }; + a = vld1_s32 (c); + INHIB_OPTIMIZATION; + b = vdups_lane_s32 (a, 0); + INHIB_OPTIMIZATION; + force_simd (b) + if (c[0] != b) + return 1; + INHIB_OPTIMIZATION; + b = vdups_lane_s32 (a, 1); + INHIB_OPTIMIZATION; + force_simd (b) + if (c[1] != b) + return 1; + return 0; +} + +int __attribute__ ((noinline)) +test_vdups_lane_u32 () +{ + uint32x2_t a; + uint32_t b; + uint32_t c[2] = { 0, 1 }; + a = vld1_u32 (c); + INHIB_OPTIMIZATION; + b = vdups_lane_u32 (a, 0); + INHIB_OPTIMIZATION; + force_simd (b) + if (c[0] != b) + return 1; + INHIB_OPTIMIZATION; + b = vdups_lane_u32 (a, 1); + INHIB_OPTIMIZATION; + force_simd (b) + if (c[1] != b) + return 1; + return 0; +} +int __attribute__ ((noinline)) +test_vdupd_lane_u64 () +{ + uint64x1_t a; + uint64_t b; + uint64_t c[1] = { 0 }; + a = vld1_u64 (c); + INHIB_OPTIMIZATION; + b = vdupd_lane_u64 (a, 0); + INHIB_OPTIMIZATION; + force_simd (b) + if (c[0] != b) + return 1; + return 0; +} + +int __attribute__ ((noinline)) +test_vdupd_lane_s64 () +{ + int64x1_t a; + int64_t b; + int64_t c[1] = { 0 }; + a = vld1_s64 (c); + INHIB_OPTIMIZATION; + b = vdupd_lane_s64 (a, 0); + INHIB_OPTIMIZATION; + force_simd (b) + if (c[0] != b) + return 1; + return 0; +} + +int +main () +{ + if (test_vdups_lane_f32 ()) + abort (); + if (test_vdupb_lane_s8 ()) + abort (); + if (test_vdupb_lane_u8 ()) + abort (); + if (test_vduph_lane_s16 ()) + abort (); + if (test_vduph_lane_u16 ()) + abort (); + if (test_vdups_lane_s32 ()) + abort (); + if (test_vdups_lane_u32 ()) + abort (); + if (test_vdupd_lane_s64 ()) + abort (); + if (test_vdupd_lane_u64 ()) + abort (); + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c b/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c new file mode 100644 index 0000000000000000000000000000000000000000..06bee4cdd2532dd4302e1c4437a74970e4bc2966 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c @@ -0,0 +1,659 @@ +/* Test vdup_lane intrinsics work correctly. */ +/* { dg-do run } */ +/* { dg-options "-O3 --save-temps" } */ + +#include + +extern void abort (void); + +#define INHIB_OPTIMIZATION asm volatile ("" : : : "memory") + +#define force_gp(V1) asm volatile ("orr %0, %1, %1" \ + : "=r"(V1) \ + : "r"(V1) \ + : /* No clobbers */); + +int __attribute__ ((noinline)) +test_vdup_n_f32 () +{ + float32_t a; + float32x2_t b; + float32_t c[2]; + int i = 0; + INHIB_OPTIMIZATION; + a = (1.0); + force_gp (a) + INHIB_OPTIMIZATION; + b = vdup_n_f32 (a); + INHIB_OPTIMIZATION; + vst1_f32 (c, b); + for (; i < 2; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +/* No asm check. */ +/* Cannot force floating point value in general purpose regester. */ + +int __attribute__ ((noinline)) +test_vdupq_n_f32 () +{ + float32_t a; + float32x4_t b; + float32_t c[4]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1.0; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdupq_n_f32 (a); + INHIB_OPTIMIZATION; + vst1q_f32 (c, b); + INHIB_OPTIMIZATION; + for (; i < 4; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +/* No asm check. */ +/* Cannot force floating point value in general purpose regester. */ + +int __attribute__ ((noinline)) +test_vdup_n_f64 () +{ + float64_t a; + float64x1_t b; + float64_t c[1]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1.0; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdup_n_f64 (a); + INHIB_OPTIMIZATION; + vst1_f64 (c, b); + INHIB_OPTIMIZATION; + for (; i < 1; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +/* No asm check. */ +/* Cannot force floating point value in general purpose regester. */ + +int __attribute__ ((noinline)) +test_vdupq_n_f64 () +{ + float64_t a; + float64x2_t b; + float64_t c[2]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1.0; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdupq_n_f64 (a); + INHIB_OPTIMIZATION; + vst1q_f64 (c, b); + INHIB_OPTIMIZATION; + for (; i < 2; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +/* No asm check. */ +/* Cannot force floating point value in general purpose regester. */ + +int __attribute__ ((noinline)) +test_vdup_n_p8 () +{ + poly8_t a; + poly8x8_t b; + poly8_t c[8]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdup_n_p8 (a); + INHIB_OPTIMIZATION; + vst1_p8 (c, b); + INHIB_OPTIMIZATION; + for (; i < 8; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +/* Covers test_vdup_n_p8, test_vdup_n_s8, test_vdup_n_u8. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, w\[0-9\]+" 3 } } */ + +int __attribute__ ((noinline)) +test_vdupq_n_p8 () +{ + poly8_t a; + poly8x16_t b; + poly8_t c[16]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdupq_n_p8 (a); + INHIB_OPTIMIZATION; + vst1q_p8 (c, b); + INHIB_OPTIMIZATION; + for (; i < 16; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +/* Covers test_vdupq_n_p8, test_vdupq_n_s8, test_vdupq_n_u8. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, w\[0-9\]+" 3 } } */ + +int __attribute__ ((noinline)) +test_vdup_n_s8 () +{ + int8_t a; + int8x8_t b; + int8_t c[8]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdup_n_s8 (a); + INHIB_OPTIMIZATION; + vst1_s8 (c, b); + INHIB_OPTIMIZATION; + for (; i < 8; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +int __attribute__ ((noinline)) +test_vdupq_n_s8 () +{ + int8_t a; + int8x16_t b; + int8_t c[16]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdupq_n_s8 (a); + INHIB_OPTIMIZATION; + vst1q_s8 (c, b); + INHIB_OPTIMIZATION; + for (; i < 16; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +int __attribute__ ((noinline)) +test_vdup_n_u8 () +{ + uint8_t a; + uint8x8_t b; + uint8_t c[8]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdup_n_u8 (a); + INHIB_OPTIMIZATION; + vst1_u8 (c, b); + INHIB_OPTIMIZATION; + for (; i < 8; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +int __attribute__ ((noinline)) +test_vdupq_n_u8 () +{ + uint8_t a; + uint8x16_t b; + uint8_t c[16]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdupq_n_u8 (a); + INHIB_OPTIMIZATION; + vst1q_u8 (c, b); + INHIB_OPTIMIZATION; + for (; i < 16; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +int __attribute__ ((noinline)) +test_vdup_n_p16 () +{ + poly16_t a; + poly16x4_t b; + poly16_t c[4]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdup_n_p16 (a); + INHIB_OPTIMIZATION; + vst1_p16 (c, b); + INHIB_OPTIMIZATION; + for (; i < 4; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +/* Covers test_vdup_n_p16, test_vdup_n_s16, test_vdup_n_u16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, w\[0-9\]+" 3 } } */ + +int __attribute__ ((noinline)) +test_vdupq_n_p16 () +{ + poly16_t a; + poly16x8_t b; + poly16_t c[8]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdupq_n_p16 (a); + INHIB_OPTIMIZATION; + vst1q_p16 (c, b); + INHIB_OPTIMIZATION; + for (; i < 8; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +/* Covers test_vdupq_n_p16, test_vdupq_n_s16, test_vdupq_n_u16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, w\[0-9\]+" 3 } } */ + +int __attribute__ ((noinline)) +test_vdup_n_s16 () +{ + int16_t a; + int16x4_t b; + int16_t c[4]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdup_n_s16 (a); + INHIB_OPTIMIZATION; + vst1_s16 (c, b); + INHIB_OPTIMIZATION; + for (; i < 4; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +int __attribute__ ((noinline)) +test_vdupq_n_s16 () +{ + int16_t a; + int16x8_t b; + int16_t c[8]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdupq_n_s16 (a); + INHIB_OPTIMIZATION; + vst1q_s16 (c, b); + INHIB_OPTIMIZATION; + for (; i < 8; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +int __attribute__ ((noinline)) +test_vdup_n_u16 () +{ + uint16_t a; + uint16x4_t b; + uint16_t c[4]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdup_n_u16 (a); + INHIB_OPTIMIZATION; + vst1_u16 (c, b); + INHIB_OPTIMIZATION; + for (; i < 4; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +int __attribute__ ((noinline)) +test_vdupq_n_u16 () +{ + uint16_t a; + uint16x8_t b; + uint16_t c[8]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdupq_n_u16 (a); + INHIB_OPTIMIZATION; + vst1q_u16 (c, b); + INHIB_OPTIMIZATION; + for (; i < 8; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +int __attribute__ ((noinline)) +test_vdup_n_s32 () +{ + int32_t a; + int32x2_t b; + int32_t c[2]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdup_n_s32 (a); + INHIB_OPTIMIZATION; + vst1_s32 (c, b); + INHIB_OPTIMIZATION; + for (; i < 2; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +/* Covers test_vdup_n_s32, test_vdup_n_u32. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, w\[0-9\]+" 2 } } */ + +int __attribute__ ((noinline)) +test_vdupq_n_s32 () +{ + int32_t a; + int32x4_t b; + int32_t c[4]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdupq_n_s32 (a); + INHIB_OPTIMIZATION; + vst1q_s32 (c, b); + INHIB_OPTIMIZATION; + for (; i < 4; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +/* Covers test_vdupq_n_s32, test_vdupq_n_u32. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, w\[0-9\]+" 2 } } */ + +int __attribute__ ((noinline)) +test_vdup_n_u32 () +{ + uint32_t a; + uint32x2_t b; + uint32_t c[2]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdup_n_u32 (a); + INHIB_OPTIMIZATION; + vst1_u32 (c, b); + INHIB_OPTIMIZATION; + for (; i < 2; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + + +int __attribute__ ((noinline)) +test_vdupq_n_u32 () +{ + uint32_t a; + uint32x4_t b; + uint32_t c[4]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdupq_n_u32 (a); + INHIB_OPTIMIZATION; + vst1q_u32 (c, b); + INHIB_OPTIMIZATION; + for (; i < 4; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +int __attribute__ ((noinline)) +test_vdup_n_s64 () +{ + int64_t a; + int64x1_t b; + int64_t c[1]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdup_n_s64 (a); + INHIB_OPTIMIZATION; + vst1_s64 (c, b); + INHIB_OPTIMIZATION; + for (; i < 1; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +/* Covers test_vdup_n_s64, test_vdup_n_u64. */ +/* Attempts to make the compiler generate "dup\\td\[0-9\]+, x\[0-9\]+" + are not practical. */ + +int __attribute__ ((noinline)) +test_vdupq_n_s64 () +{ + int64_t a; + int64x2_t b; + int64_t c[2]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdupq_n_s64 (a); + INHIB_OPTIMIZATION; + vst1q_s64 (c, b); + INHIB_OPTIMIZATION; + for (; i < 2; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +/* Covers test_vdupq_n_s64, test_vdupq_n_u64. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, x\[0-9\]+" 2 } } */ + +int __attribute__ ((noinline)) +test_vdup_n_u64 () +{ + uint64_t a; + uint64x1_t b; + uint64_t c[1]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdup_n_u64 (a); + INHIB_OPTIMIZATION; + vst1_u64 (c, b); + INHIB_OPTIMIZATION; + for (; i < 1; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +int __attribute__ ((noinline)) +test_vdupq_n_u64 () +{ + uint64_t a; + uint64x2_t b; + uint64_t c[2]; + int i = 0; + INHIB_OPTIMIZATION; + a = 1; + force_gp (a) + INHIB_OPTIMIZATION; + b = vdupq_n_u64 (a); + INHIB_OPTIMIZATION; + vst1q_u64 (c, b); + INHIB_OPTIMIZATION; + for (; i < 2; i++) + { + if (a != c[i]) + return 1; + } + return 0; +} + +int +main () +{ + if (test_vdup_n_f32 ()) + abort (); + if (test_vdup_n_f64 ()) + abort (); + if (test_vdup_n_p8 ()) + abort (); + if (test_vdup_n_u8 ()) + abort (); + if (test_vdup_n_s8 ()) + abort (); + if (test_vdup_n_p16 ()) + abort (); + if (test_vdup_n_s16 ()) + abort (); + if (test_vdup_n_u16 ()) + abort (); + if (test_vdup_n_s32 ()) + abort (); + if (test_vdup_n_u32 ()) + abort (); + if (test_vdup_n_s64 ()) + abort (); + if (test_vdup_n_u64 ()) + abort (); + if (test_vdupq_n_f32 ()) + abort (); + if (test_vdupq_n_f64 ()) + abort (); + if (test_vdupq_n_p8 ()) + abort (); + if (test_vdupq_n_u8 ()) + abort (); + if (test_vdupq_n_s8 ()) + abort (); + if (test_vdupq_n_p16 ()) + abort (); + if (test_vdupq_n_s16 ()) + abort (); + if (test_vdupq_n_u16 ()) + abort (); + if (test_vdupq_n_s32 ()) + abort (); + if (test_vdupq_n_u32 ()) + abort (); + if (test_vdupq_n_s64 ()) + abort (); + if (test_vdupq_n_u64 ()) + abort (); + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */