This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[AArch64][6/14] ARMv8.2-A FP16 reduction vector intrinsics
- From: Jiong Wang <jiong dot wang at foss dot arm dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Thu, 7 Jul 2016 17:16:58 +0100
- Subject: [AArch64][6/14] ARMv8.2-A FP16 reduction vector intrinsics
- Authentication-results: sourceware.org; auth=none
- References: <67f7b93f-0a92-de8f-8c50-5b4b573fed3a@foss.arm.com> <99eb95e3-5e9c-c6c9-b85f-e67d15f4859a@foss.arm.com> <21c3c64f-95ad-c127-3f8a-4afd236aae33@foss.arm.com> <938d13c1-39be-5fe3-9997-e55942bbd163@foss.arm.com> <a12ecde7-2ac1-0539-334e-9a33395dd3eb@foss.arm.com> <a3eeda81-cb1c-6d9e-706d-c5c067a90d74@foss.arm.com>
This patch add ARMv8.2-A FP16 reduction vector intrinsics.
gcc/
2016-07-07 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/arm_neon.h (vmaxv_f16): New.
(vmaxvq_f16): Likewise.
(vminv_f16): Likewise.
(vminvq_f16): Likewise.
(vmaxnmv_f16): Likewise.
(vmaxnmvq_f16): Likewise.
(vminnmv_f16): Likewise.
(vminnmvq_f16): Likewise.
* config/aarch64/iterators.md (vp): Support HF modes.
>From 514e5d195867d2f53fac50804748976626748f81 Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@arm.com>
Date: Wed, 8 Jun 2016 10:23:17 +0100
Subject: [PATCH 06/14] [6/14] ARMv8.2 FP16 reduction vector intrinsics
---
gcc/config/aarch64/aarch64-simd-builtins.def | 8 ++---
gcc/config/aarch64/aarch64-simd.md | 12 +++----
gcc/config/aarch64/arm_neon.h | 50 ++++++++++++++++++++++++++++
gcc/config/aarch64/iterators.md | 7 ++--
4 files changed, 65 insertions(+), 12 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 6ff5063..64c5f86 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -234,12 +234,12 @@
BUILTIN_VALL (UNOP, reduc_plus_scal_, 10)
/* Implemented by reduc_<maxmin_uns>_scal_<mode> (producing scalar). */
- BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10)
- BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10)
+ BUILTIN_VDQIF_F16 (UNOP, reduc_smax_scal_, 10)
+ BUILTIN_VDQIF_F16 (UNOP, reduc_smin_scal_, 10)
BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10)
BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10)
- BUILTIN_VDQF (UNOP, reduc_smax_nan_scal_, 10)
- BUILTIN_VDQF (UNOP, reduc_smin_nan_scal_, 10)
+ BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10)
+ BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10)
/* Implemented by <maxmin><mode>3.
smax variants map to fmaxnm,
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index c0600df..d5b25fa 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2073,8 +2073,8 @@
;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
(define_expand "reduc_<maxmin_uns>_scal_<mode>"
[(match_operand:<VEL> 0 "register_operand")
- (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
- FMAXMINV)]
+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
+ FMAXMINV)]
"TARGET_SIMD"
{
rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
@@ -2121,12 +2121,12 @@
)
(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
- FMAXMINV))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
+ FMAXMINV))]
"TARGET_SIMD"
"<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
- [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
+ [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
)
;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index b09a3a7..f3e5d0e 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -26638,6 +26638,56 @@ vmulxq_n_f16 (float16x8_t __a, float16_t __b)
return vmulxq_f16 (__a, vdupq_n_f16 (__b));
}
+/* ARMv8.2-A FP16 reduction vector intrinsics. */
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vmaxv_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_reduc_smax_nan_scal_v4hf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vmaxvq_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_reduc_smax_nan_scal_v8hf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vminv_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_reduc_smin_nan_scal_v4hf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vminvq_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_reduc_smin_nan_scal_v8hf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vmaxnmv_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_reduc_smax_scal_v4hf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vmaxnmvq_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_reduc_smax_scal_v8hf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vminnmv_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_reduc_smin_scal_v4hf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vminnmvq_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_reduc_smin_scal_v8hf (__a);
+}
+
#pragma GCC pop_options
#undef __aarch64_vget_lane_any
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 8d4dc6c..011b937 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -159,6 +159,8 @@
;; Vector modes except double int.
(define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF])
+(define_mode_iterator VDQIF_F16 [V8QI V16QI V4HI V8HI V2SI V4SI
+ V4HF V8HF V2SF V4SF V2DF])
;; Vector modes for S type.
(define_mode_iterator VDQ_SI [V2SI V4SI])
@@ -760,8 +762,9 @@
(define_mode_attr vp [(V8QI "v") (V16QI "v")
(V4HI "v") (V8HI "v")
(V2SI "p") (V4SI "v")
- (V2DI "p") (V2DF "p")
- (V2SF "p") (V4SF "v")])
+ (V2DI "p") (V2DF "p")
+ (V2SF "p") (V4SF "v")
+ (V4HF "v") (V8HF "v")])
(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")])
(define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")])
--
2.5.0