This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[AArch64][10/14] ARMv8.2-A FP16 lane scalar intrinsics
- From: Jiong Wang <jiong dot wang at foss dot arm dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Thu, 7 Jul 2016 17:18:29 +0100
- Subject: [AArch64][10/14] ARMv8.2-A FP16 lane scalar intrinsics
- Authentication-results: sourceware.org; auth=none
- References: <67f7b93f-0a92-de8f-8c50-5b4b573fed3a@foss.arm.com> <99eb95e3-5e9c-c6c9-b85f-e67d15f4859a@foss.arm.com> <21c3c64f-95ad-c127-3f8a-4afd236aae33@foss.arm.com> <938d13c1-39be-5fe3-9997-e55942bbd163@foss.arm.com> <a12ecde7-2ac1-0539-334e-9a33395dd3eb@foss.arm.com> <a3eeda81-cb1c-6d9e-706d-c5c067a90d74@foss.arm.com> <cf21a824-01c3-0969-d12b-884c4e70e7f1@foss.arm.com> <c9ed296a-1105-6bda-1927-e72be567c590@foss.arm.com> <d91fc169-1317-55ed-c36c-6dc5dac088cc@foss.arm.com> <94dcb98c-81c6-a1d5-bb1a-ff8278f0a07b@foss.arm.com>
This patch adds ARMv8.2-A FP16 lane scalar intrinsics.
gcc/
2016-07-07 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/arm_neon.h (vfmah_lane_f16): New.
(vfmah_laneq_f16): Likewise.
(vfmsh_lane_f16): Likewise.
(vfmsh_laneq_f16): Likewise.
(vmulh_lane_f16): Likewise.
(vmulh_laneq_f16): Likewise.
(vmulxh_lane_f16): Likewise.
(vmulxh_laneq_f16): Likewise.
>From bcbe5035746c5684a3b9f0b62310f6aa276db364 Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@arm.com>
Date: Thu, 9 Jun 2016 11:06:29 +0100
Subject: [PATCH 10/14] [10/14] ARMv8.2 FP16 lane scalar intrinsics
---
gcc/config/aarch64/arm_neon.h | 52 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 52 insertions(+)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index e727ff1..09095d1 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -26488,6 +26488,20 @@ vfmsq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
/* ARMv8.2-A FP16 lane vector intrinsics. */
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vfmah_lane_f16 (float16_t __a, float16_t __b,
+ float16x4_t __c, const int __lane)
+{
+ return vfmah_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vfmah_laneq_f16 (float16_t __a, float16_t __b,
+ float16x8_t __c, const int __lane)
+{
+ return vfmah_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
+}
+
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vfma_lane_f16 (float16x4_t __a, float16x4_t __b,
float16x4_t __c, const int __lane)
@@ -26528,6 +26542,20 @@ vfmaq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c)
return vfmaq_f16 (__a, __b, vdupq_n_f16 (__c));
}
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vfmsh_lane_f16 (float16_t __a, float16_t __b,
+ float16x4_t __c, const int __lane)
+{
+ return vfmsh_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vfmsh_laneq_f16 (float16_t __a, float16_t __b,
+ float16x8_t __c, const int __lane)
+{
+ return vfmsh_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
+}
+
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vfms_lane_f16 (float16x4_t __a, float16x4_t __b,
float16x4_t __c, const int __lane)
@@ -26568,6 +26596,12 @@ vfmsq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c)
return vfmsq_f16 (__a, __b, vdupq_n_f16 (__c));
}
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vmulh_lane_f16 (float16_t __a, float16x4_t __b, const int __lane)
+{
+ return __a * __aarch64_vget_lane_any (__b, __lane);
+}
+
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vmul_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane)
{
@@ -26580,6 +26614,12 @@ vmulq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane)
return vmulq_f16 (__a, vdupq_n_f16 (__aarch64_vget_lane_any (__b, __lane)));
}
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vmulh_laneq_f16 (float16_t __a, float16x8_t __b, const int __lane)
+{
+ return __a * __aarch64_vget_lane_any (__b, __lane);
+}
+
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vmul_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane)
{
@@ -26604,6 +26644,12 @@ vmulq_n_f16 (float16x8_t __a, float16_t __b)
return vmulq_laneq_f16 (__a, vdupq_n_f16 (__b), 0);
}
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vmulxh_lane_f16 (float16_t __a, float16x4_t __b, const int __lane)
+{
+ return vmulxh_f16 (__a, __aarch64_vget_lane_any (__b, __lane));
+}
+
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vmulx_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane)
{
@@ -26616,6 +26662,12 @@ vmulxq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane)
return vmulxq_f16 (__a, __aarch64_vdupq_lane_f16 (__b, __lane));
}
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vmulxh_laneq_f16 (float16_t __a, float16x8_t __b, const int __lane)
+{
+ return vmulxh_f16 (__a, __aarch64_vget_lane_any (__b, __lane));
+}
+
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vmulx_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane)
{
--
2.5.0