[gcc r10-8972] aarch64: Add vcopy(q)__lane(q)_bf16 intrinsics
Andrea Corallo
akrl@gcc.gnu.org
Wed Nov 4 10:18:05 GMT 2020
https://gcc.gnu.org/g:b768eef488ac4329986eb540da81ba41bee7024a
commit r10-8972-gb768eef488ac4329986eb540da81ba41bee7024a
Author: Andrea Corallo <andrea.corallo@arm.com>
Date: Thu Oct 8 12:29:00 2020 +0200
aarch64: Add vcopy(q)__lane(q)_bf16 intrinsics
gcc/ChangeLog
2020-10-20 Andrea Corallo <andrea.corallo@arm.com>
* config/aarch64/arm_neon.h (vcopy_lane_bf16, vcopyq_lane_bf16)
(vcopyq_laneq_bf16, vcopy_laneq_bf16): New intrinsics.
gcc/testsuite/ChangeLog
2020-10-20 Andrea Corallo <andrea.corallo@arm.com>
* gcc.target/aarch64/advsimd-intrinsics/bf16_vect_copy_lane_1.c:
New test.
* gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_1.c:
Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_2.c:
Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_1.c:
Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_2.c:
Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_1.c:
Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_2.c:
Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_1.c:
Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_2.c:
Likewise.
Diff:
---
gcc/config/aarch64/arm_neon.h | 36 ++++++++++++++++++++++
.../advsimd-intrinsics/bf16_vect_copy_lane_1.c | 32 +++++++++++++++++++
.../advsimd-intrinsics/vcopy_lane_bf16_indices_1.c | 18 +++++++++++
.../advsimd-intrinsics/vcopy_lane_bf16_indices_2.c | 18 +++++++++++
.../vcopy_laneq_bf16_indices_1.c | 17 ++++++++++
.../vcopy_laneq_bf16_indices_2.c | 17 ++++++++++
.../vcopyq_lane_bf16_indices_1.c | 17 ++++++++++
.../vcopyq_lane_bf16_indices_2.c | 17 ++++++++++
.../vcopyq_laneq_bf16_indices_1.c | 17 ++++++++++
.../vcopyq_laneq_bf16_indices_2.c | 17 ++++++++++
10 files changed, 206 insertions(+)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 85c0d62ca12..55c2a6590aa 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -35716,6 +35716,42 @@ vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a)
return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a);
}
+__extension__ extern __inline bfloat16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcopy_lane_bf16 (bfloat16x4_t __a, const int __lane1,
+ bfloat16x4_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ extern __inline bfloat16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcopyq_lane_bf16 (bfloat16x8_t __a, const int __lane1,
+ bfloat16x4_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ extern __inline bfloat16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcopy_laneq_bf16 (bfloat16x4_t __a, const int __lane1,
+ bfloat16x8_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
+__extension__ extern __inline bfloat16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcopyq_laneq_bf16 (bfloat16x8_t __a, const int __lane1,
+ bfloat16x8_t __b, const int __lane2)
+{
+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+ __a, __lane1);
+}
+
#pragma GCC pop_options
/* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics. */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vect_copy_lane_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vect_copy_lane_1.c
new file mode 100644
index 00000000000..d5aa215c21a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vect_copy_lane_1.c
@@ -0,0 +1,32 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-additional-options "-march=armv8.2-a+bf16 -O3 --save-temps -std=gnu90" } */
+
+#include "arm_neon.h"
+
+bfloat16x4_t __attribute__((noinline,noclone))
+test_vcopy_lane_bf16 (bfloat16x4_t a, bfloat16x4_t b)
+{
+ return vcopy_lane_bf16 (a, 1, b, 2);
+}
+
+bfloat16x8_t __attribute__((noinline,noclone))
+test_vcopyq_lane_bf16 (bfloat16x8_t a, bfloat16x4_t b)
+{
+ return vcopyq_lane_bf16 (a, 1, b, 2);
+}
+
+bfloat16x4_t __attribute__((noinline,noclone))
+test_vcopy_laneq_bf16 (bfloat16x4_t a, bfloat16x8_t b)
+{
+ return vcopy_laneq_bf16 (a, 1, b, 2);
+}
+
+bfloat16x8_t __attribute__((noinline,noclone))
+test_vcopyq_laneq_bf16 (bfloat16x8_t a, bfloat16x8_t b)
+{
+ return vcopyq_laneq_bf16 (a, 1, b, 2);
+}
+
+/* { dg-final { scan-assembler-times "ins\\tv0.h\\\[1\\\], v1.h\\\[2\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "ins\\tv0.h\\\[1\\\], v1.h\\\[0\\\]" 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_1.c
new file mode 100644
index 00000000000..70579800eba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include <arm_neon.h>
+
+bfloat16x4_t
+test_vcopy_lane_bf16 (bfloat16x4_t a, bfloat16x4_t b)
+{
+ bfloat16x4_t res;
+ res = vcopy_lane_bf16 (a, 0, b, 4);
+ res = vcopy_lane_bf16 (a, 0, b, -1);
+ return res;
+}
+
+/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_2.c
new file mode 100644
index 00000000000..a8ef9303689
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include <arm_neon.h>
+
+bfloat16x4_t
+test_vcopy_lane_bf16 (bfloat16x4_t a, bfloat16x4_t b)
+{
+ bfloat16x4_t res;
+ res = vcopy_lane_bf16 (a, -1, b, 2);
+ res = vcopy_lane_bf16 (a, 4, b, 2);
+ return res;
+}
+
+/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_1.c
new file mode 100644
index 00000000000..c156204bcfc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include <arm_neon.h>
+
+bfloat16x4_t
+test_vcopy_laneq_bf16 (bfloat16x4_t a, bfloat16x8_t b)
+{
+ bfloat16x4_t res;
+ /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+ res = vcopy_laneq_bf16 (a, -1, b, 2);
+ /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
+ res = vcopy_laneq_bf16 (a, 4, b, 2);
+ return res;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_2.c
new file mode 100644
index 00000000000..036690b0be6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include <arm_neon.h>
+
+bfloat16x4_t
+test_vcopy_laneq_bf16 (bfloat16x4_t a, bfloat16x8_t b)
+{
+ bfloat16x4_t res;
+ /* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
+ res = vcopy_laneq_bf16 (a, 1, b, -1);
+ /* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
+ res = vcopy_laneq_bf16 (a, 1, b, 8);
+ return res;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_1.c
new file mode 100644
index 00000000000..15fce1b2045
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include <arm_neon.h>
+
+bfloat16x8_t
+test_vcopyq_lane_bf16 (bfloat16x8_t a, bfloat16x4_t b)
+{
+ bfloat16x8_t res;
+ /* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
+ res = vcopyq_lane_bf16 (a, -1, b, 2);
+ /* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
+ res = vcopyq_lane_bf16 (a, 8, b, 2);
+ return res;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_2.c
new file mode 100644
index 00000000000..6e8004a1287
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include <arm_neon.h>
+
+bfloat16x8_t
+test_vcopyq_lane_bf16 (bfloat16x8_t a, bfloat16x4_t b)
+{
+ bfloat16x8_t res;
+ /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+ res = vcopyq_lane_bf16 (a, 2, b, -1);
+ /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
+ res = vcopyq_lane_bf16 (a, 2, b, 4);
+ return res;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_1.c
new file mode 100644
index 00000000000..2a26b42ad90
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include <arm_neon.h>
+
+bfloat16x8_t
+test_vcopyq_laneq_bf16 (bfloat16x8_t a, bfloat16x8_t b)
+{
+ bfloat16x8_t res;
+ /* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
+ res = vcopyq_laneq_bf16 (a, -1, b, 2);
+ /* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
+ res = vcopyq_laneq_bf16 (a, 8, b, 2);
+ return res;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_2.c
new file mode 100644
index 00000000000..421cb2a452e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include <arm_neon.h>
+
+bfloat16x8_t
+test_vcopyq_laneq_bf16 (bfloat16x8_t a, bfloat16x8_t b)
+{
+ bfloat16x8_t res;
+ /* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
+ res = vcopyq_laneq_bf16 (a, 2, b, -1);
+ /* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
+ res = vcopyq_laneq_bf16 (a, 2, b, 8);
+ return res;
+}
More information about the Gcc-cvs
mailing list