This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[AArch64][8/14] ARMv8.2-A FP16 two operands scalar intrinsics
- From: Jiong Wang <jiong dot wang at foss dot arm dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Thu, 7 Jul 2016 17:17:55 +0100
- Subject: [AArch64][8/14] ARMv8.2-A FP16 two operands scalar intrinsics
- Authentication-results: sourceware.org; auth=none
- References: <67f7b93f-0a92-de8f-8c50-5b4b573fed3a@foss.arm.com> <99eb95e3-5e9c-c6c9-b85f-e67d15f4859a@foss.arm.com> <21c3c64f-95ad-c127-3f8a-4afd236aae33@foss.arm.com> <938d13c1-39be-5fe3-9997-e55942bbd163@foss.arm.com> <a12ecde7-2ac1-0539-334e-9a33395dd3eb@foss.arm.com> <a3eeda81-cb1c-6d9e-706d-c5c067a90d74@foss.arm.com> <cf21a824-01c3-0969-d12b-884c4e70e7f1@foss.arm.com> <c9ed296a-1105-6bda-1927-e72be567c590@foss.arm.com>
This patch add ARMv8.2-A FP16 two operands scalar intrinsics.
2016-07-07 Jiong Wang <jiong.wang@arm.com>
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64.md
(<FCVT_F2FIXED:fcvt_fixed_insn>hf<mode>3): New.
(<FCVT_FIXED2F:fcvt_fixed_insn><mode>hf3): Likewise.
(add<mode>3): Likewise.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(<fmaxmin><mode>3): Extend to HF.
* config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Likewise.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_HSDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_HSDI:mode>3): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_fac<optab><mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn>hfhi3): New.
(<FCVT_FIXED2F:fcvt_fixed_insn>hihf3): Likewise.
* config/aarch64/iterators.md (VHSDF_SDF): Delete.
(VSDQ_HSDI): Support HI.
(fcvt_target, FCVT_TARGET): Likewise.
* config/aarch64/arm_fp16.h: (vaddh_f16): New.
(vsubh_f16): Likewise.
(vabdh_f16): Likewise.
(vcageh_f16): Likewise.
(vcagth_f16): Likewise.
(vcaleh_f16): Likewise.
(vcalth_f16): Likewise. (vcleh_f16): Likewise.
(vclth_f16): Likewise.
(vcvth_n_f16_s16): Likewise.
(vcvth_n_f16_s32): Likewise.
(vcvth_n_f16_s64): Likewise.
(vcvth_n_f16_u16): Likewise.
(vcvth_n_f16_u32): Likewise.
(vcvth_n_f16_u64): Likewise.
(vcvth_n_s16_f16): Likewise.
(vcvth_n_s32_f16): Likewise.
(vcvth_n_s64_f16): Likewise.
(vcvth_n_u16_f16): Likewise.
(vcvth_n_u32_f16): Likewise.
(vcvth_n_u64_f16): Likewise.
(vdivh_f16): Likewise.
(vmaxh_f16): Likewise.
(vmaxnmh_f16): Likewise.
(vminh_f16): Likewise.
(vminnmh_f16): Likewise.
(vmulh_f16): Likewise.
(vmulxh_f16): Likewise.
(vrecpsh_f16): Likewise.
(vrsqrtsh_f16): Likewise.
>From 59446f3e1ce914b1102320e0d81654f211fad07d Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@arm.com>
Date: Thu, 9 Jun 2016 11:02:39 +0100
Subject: [PATCH 08/14] [8/14] ARMv8.2 FP16 two operands scalar intrinsics
---
gcc/config/aarch64/aarch64-simd-builtins.def | 31 +++--
gcc/config/aarch64/aarch64-simd.md | 40 +++---
gcc/config/aarch64/aarch64.md | 88 ++++++++----
gcc/config/aarch64/arm_fp16.h | 200 +++++++++++++++++++++++++++
gcc/config/aarch64/iterators.md | 11 +-
5 files changed, 309 insertions(+), 61 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 6a74daa..b32fdfe 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -41,7 +41,7 @@
BUILTIN_VDC (COMBINE, combine, 0)
BUILTIN_VB (BINOP, pmul, 0)
- BUILTIN_VHSDF_SDF (BINOP, fmulx, 0)
+ BUILTIN_VHSDF_HSDF (BINOP, fmulx, 0)
BUILTIN_VHSDF_DF (UNOP, sqrt, 2)
BUILTIN_VD_BHSI (BINOP, addp, 0)
VAR1 (UNOP, addp, 0, di)
@@ -393,13 +393,12 @@
/* Implemented by
aarch64_frecp<FRECP:frecp_suffix><mode>. */
BUILTIN_GPF_F16 (UNOP, frecpe, 0)
- BUILTIN_GPF (BINOP, frecps, 0)
BUILTIN_GPF_F16 (UNOP, frecpx, 0)
BUILTIN_VDQ_SI (UNOP, urecpe, 0)
BUILTIN_VHSDF (UNOP, frecpe, 0)
- BUILTIN_VHSDF (BINOP, frecps, 0)
+ BUILTIN_VHSDF_HSDF (BINOP, frecps, 0)
/* Implemented by a mixture of abs2 patterns. Note the DImode builtin is
only ever used for the int64x1_t intrinsic, there is no scalar version. */
@@ -496,17 +495,23 @@
/* Implemented by <FCVT_F2FIXED/FIXED2F:fcvt_fixed_insn><*><*>3. */
BUILTIN_VSDQ_HSDI (SHIFTIMM, scvtf, 3)
BUILTIN_VSDQ_HSDI (FCVTIMM_SUS, ucvtf, 3)
- BUILTIN_VHSDF_SDF (SHIFTIMM, fcvtzs, 3)
- BUILTIN_VHSDF_SDF (SHIFTIMM_USS, fcvtzu, 3)
+ BUILTIN_VHSDF_HSDF (SHIFTIMM, fcvtzs, 3)
+ BUILTIN_VHSDF_HSDF (SHIFTIMM_USS, fcvtzu, 3)
+ VAR1 (SHIFTIMM, scvtfsi, 3, hf)
+ VAR1 (SHIFTIMM, scvtfdi, 3, hf)
+ VAR1 (FCVTIMM_SUS, ucvtfsi, 3, hf)
+ VAR1 (FCVTIMM_SUS, ucvtfdi, 3, hf)
+ BUILTIN_GPI (SHIFTIMM, fcvtzshf, 3)
+ BUILTIN_GPI (SHIFTIMM_USS, fcvtzuhf, 3)
/* Implemented by aarch64_rsqrte<mode>. */
BUILTIN_VHSDF_HSDF (UNOP, rsqrte, 0)
/* Implemented by aarch64_rsqrts<mode>. */
- BUILTIN_VHSDF_SDF (BINOP, rsqrts, 0)
+ BUILTIN_VHSDF_HSDF (BINOP, rsqrts, 0)
/* Implemented by fabd<mode>3. */
- BUILTIN_VHSDF_SDF (BINOP, fabd, 3)
+ BUILTIN_VHSDF_HSDF (BINOP, fabd, 3)
/* Implemented by aarch64_faddp<mode>. */
BUILTIN_VHSDF (BINOP, faddp, 0)
@@ -522,10 +527,10 @@
BUILTIN_VHSDF_HSDF (UNOP, neg, 2)
/* Implemented by aarch64_fac<optab><mode>. */
- BUILTIN_VHSDF_SDF (BINOP_USS, faclt, 0)
- BUILTIN_VHSDF_SDF (BINOP_USS, facle, 0)
- BUILTIN_VHSDF_SDF (BINOP_USS, facgt, 0)
- BUILTIN_VHSDF_SDF (BINOP_USS, facge, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, faclt, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, facle, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, facgt, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, facge, 0)
/* Implemented by sqrt<mode>2. */
VAR1 (UNOP, sqrt, 2, hf)
@@ -543,3 +548,7 @@
BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2)
BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2)
BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
+
+ /* Implemented by <fmaxmin><mode>3. */
+ VAR1 (BINOP, fmax, 3, hf)
+ VAR1 (BINOP, fmin, 3, hf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 6e6c4ac..bc02833 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -391,9 +391,9 @@
[(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
(define_insn "aarch64_rsqrts<mode>"
- [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
- (unspec:VHSDF_SDF [(match_operand:VHSDF_SDF 1 "register_operand" "w")
- (match_operand:VHSDF_SDF 2 "register_operand" "w")]
+ [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
+ (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
+ (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
UNSPEC_RSQRTS))]
"TARGET_SIMD"
"frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
@@ -475,11 +475,11 @@
)
(define_insn "fabd<mode>3"
- [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
- (abs:VHSDF_SDF
- (minus:VHSDF_SDF
- (match_operand:VHSDF_SDF 1 "register_operand" "w")
- (match_operand:VHSDF_SDF 2 "register_operand" "w"))))]
+ [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
+ (abs:VHSDF_HSDF
+ (minus:VHSDF_HSDF
+ (match_operand:VHSDF_HSDF 1 "register_operand" "w")
+ (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
"TARGET_SIMD"
"fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
[(set_attr "type" "neon_fp_abd_<stype><q>")]
@@ -3021,10 +3021,10 @@
;; fmulx.
(define_insn "aarch64_fmulx<mode>"
- [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
- (unspec:VHSDF_SDF
- [(match_operand:VHSDF_SDF 1 "register_operand" "w")
- (match_operand:VHSDF_SDF 2 "register_operand" "w")]
+ [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
+ (unspec:VHSDF_HSDF
+ [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
+ (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
UNSPEC_FMULX))]
"TARGET_SIMD"
"fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
@@ -4290,10 +4290,10 @@
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
(neg:<V_cmp_result>
(FAC_COMPARISONS:<V_cmp_result>
- (abs:VHSDF_SDF
- (match_operand:VHSDF_SDF 1 "register_operand" "w"))
- (abs:VHSDF_SDF
- (match_operand:VHSDF_SDF 2 "register_operand" "w"))
+ (abs:VHSDF_HSDF
+ (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
+ (abs:VHSDF_HSDF
+ (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
)))]
"TARGET_SIMD"
"fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
@@ -5400,10 +5400,10 @@
)
(define_insn "aarch64_frecps<mode>"
- [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
- (unspec:VHSDF_SDF
- [(match_operand:VHSDF_SDF 1 "register_operand" "w")
- (match_operand:VHSDF_SDF 2 "register_operand" "w")]
+ [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
+ (unspec:VHSDF_HSDF
+ [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
+ (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
UNSPEC_FRECPS))]
"TARGET_SIMD"
"frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 520026d..81a4f20 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4661,38 +4661,78 @@
(set_attr "simd" "*, yes")]
)
+(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn>hf<mode>3"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (unspec:GPI [(match_operand:HF 1 "register_operand" "w")
+ (match_operand:SI 2 "immediate_operand" "i")]
+ FCVT_F2FIXED))]
+ "TARGET_FP_F16INST"
+ "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<GPI:w>0, %h1, #%2"
+ [(set_attr "type" "f_cvtf2i")]
+)
+
+(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><mode>hf3"
+ [(set (match_operand:HF 0 "register_operand" "=w")
+ (unspec:HF [(match_operand:GPI 1 "register_operand" "r")
+ (match_operand:SI 2 "immediate_operand" "i")]
+ FCVT_FIXED2F))]
+ "TARGET_FP_F16INST"
+ "<FCVT_FIXED2F:fcvt_fixed_insn>\t%h0, %<GPI:w>1, #%2"
+ [(set_attr "type" "f_cvti2f")]
+)
+
+(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn>hf3"
+ [(set (match_operand:HI 0 "register_operand" "=w")
+ (unspec:HI [(match_operand:HF 1 "register_operand" "w")
+ (match_operand:SI 2 "immediate_operand" "i")]
+ FCVT_F2FIXED))]
+ "TARGET_SIMD"
+ "<FCVT_F2FIXED:fcvt_fixed_insn>\t%h0, %h1, #%2"
+ [(set_attr "type" "neon_fp_to_int_s")]
+)
+
+(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn>hi3"
+ [(set (match_operand:HF 0 "register_operand" "=w")
+ (unspec:HF [(match_operand:HI 1 "register_operand" "w")
+ (match_operand:SI 2 "immediate_operand" "i")]
+ FCVT_FIXED2F))]
+ "TARGET_SIMD"
+ "<FCVT_FIXED2F:fcvt_fixed_insn>\t%h0, %h1, #%2"
+ [(set_attr "type" "neon_int_to_fp_s")]
+)
+
;; -------------------------------------------------------------------
;; Floating-point arithmetic
;; -------------------------------------------------------------------
(define_insn "add<mode>3"
- [(set (match_operand:GPF 0 "register_operand" "=w")
- (plus:GPF
- (match_operand:GPF 1 "register_operand" "w")
- (match_operand:GPF 2 "register_operand" "w")))]
+ [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+ (plus:GPF_F16
+ (match_operand:GPF_F16 1 "register_operand" "w")
+ (match_operand:GPF_F16 2 "register_operand" "w")))]
"TARGET_FLOAT"
"fadd\\t%<s>0, %<s>1, %<s>2"
- [(set_attr "type" "fadd<s>")]
+ [(set_attr "type" "fadd<stype>")]
)
(define_insn "sub<mode>3"
- [(set (match_operand:GPF 0 "register_operand" "=w")
- (minus:GPF
- (match_operand:GPF 1 "register_operand" "w")
- (match_operand:GPF 2 "register_operand" "w")))]
+ [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+ (minus:GPF_F16
+ (match_operand:GPF_F16 1 "register_operand" "w")
+ (match_operand:GPF_F16 2 "register_operand" "w")))]
"TARGET_FLOAT"
"fsub\\t%<s>0, %<s>1, %<s>2"
- [(set_attr "type" "fadd<s>")]
+ [(set_attr "type" "fadd<stype>")]
)
(define_insn "mul<mode>3"
- [(set (match_operand:GPF 0 "register_operand" "=w")
- (mult:GPF
- (match_operand:GPF 1 "register_operand" "w")
- (match_operand:GPF 2 "register_operand" "w")))]
+ [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+ (mult:GPF_F16
+ (match_operand:GPF_F16 1 "register_operand" "w")
+ (match_operand:GPF_F16 2 "register_operand" "w")))]
"TARGET_FLOAT"
"fmul\\t%<s>0, %<s>1, %<s>2"
- [(set_attr "type" "fmul<s>")]
+ [(set_attr "type" "fmul<stype>")]
)
(define_insn "*fnmul<mode>3"
@@ -4716,13 +4756,13 @@
)
(define_insn "div<mode>3"
- [(set (match_operand:GPF 0 "register_operand" "=w")
- (div:GPF
- (match_operand:GPF 1 "register_operand" "w")
- (match_operand:GPF 2 "register_operand" "w")))]
+ [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+ (div:GPF_F16
+ (match_operand:GPF_F16 1 "register_operand" "w")
+ (match_operand:GPF_F16 2 "register_operand" "w")))]
"TARGET_FLOAT"
"fdiv\\t%<s>0, %<s>1, %<s>2"
- [(set_attr "type" "fdiv<s>")]
+ [(set_attr "type" "fdiv<stype>")]
)
(define_insn "neg<mode>2"
@@ -4773,13 +4813,13 @@
;; Scalar forms for the IEEE-754 fmax()/fmin() functions
(define_insn "<fmaxmin><mode>3"
- [(set (match_operand:GPF 0 "register_operand" "=w")
- (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")
- (match_operand:GPF 2 "register_operand" "w")]
+ [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+ (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")
+ (match_operand:GPF_F16 2 "register_operand" "w")]
FMAXMIN))]
"TARGET_FLOAT"
"<fmaxmin_op>\\t%<s>0, %<s>1, %<s>2"
- [(set_attr "type" "f_minmax<s>")]
+ [(set_attr "type" "f_minmax<stype>")]
)
;; For copysign (x, y), we want to generate:
diff --git a/gcc/config/aarch64/arm_fp16.h b/gcc/config/aarch64/arm_fp16.h
index 818aa61..21edc65 100644
--- a/gcc/config/aarch64/arm_fp16.h
+++ b/gcc/config/aarch64/arm_fp16.h
@@ -360,6 +360,206 @@ vsqrth_f16 (float16_t __a)
return __builtin_aarch64_sqrthf (__a);
}
+/* ARMv8.2-A FP16 two operands scalar intrinsics. */
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vaddh_f16 (float16_t __a, float16_t __b)
+{
+ return __a + __b;
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vabdh_f16 (float16_t __a, float16_t __b)
+{
+ return __builtin_aarch64_fabdhf (__a, __b);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcageh_f16 (float16_t __a, float16_t __b)
+{
+ return __builtin_aarch64_facgehf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcagth_f16 (float16_t __a, float16_t __b)
+{
+ return __builtin_aarch64_facgthf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcaleh_f16 (float16_t __a, float16_t __b)
+{
+ return __builtin_aarch64_faclehf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcalth_f16 (float16_t __a, float16_t __b)
+{
+ return __builtin_aarch64_faclthf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vceqh_f16 (float16_t __a, float16_t __b)
+{
+ return __builtin_aarch64_cmeqhf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcgeh_f16 (float16_t __a, float16_t __b)
+{
+ return __builtin_aarch64_cmgehf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcgth_f16 (float16_t __a, float16_t __b)
+{
+ return __builtin_aarch64_cmgthf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcleh_f16 (float16_t __a, float16_t __b)
+{
+ return __builtin_aarch64_cmlehf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vclth_f16 (float16_t __a, float16_t __b)
+{
+ return __builtin_aarch64_cmlthf_uss (__a, __b);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_n_f16_s16 (int16_t __a, const int __b)
+{
+ return __builtin_aarch64_scvtfhi (__a, __b);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_n_f16_s32 (int32_t __a, const int __b)
+{
+ return __builtin_aarch64_scvtfsihf (__a, __b);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_n_f16_s64 (int64_t __a, const int __b)
+{
+ return __builtin_aarch64_scvtfdihf (__a, __b);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_n_f16_u16 (uint16_t __a, const int __b)
+{
+ return __builtin_aarch64_ucvtfhi_sus (__a, __b);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_n_f16_u32 (uint32_t __a, const int __b)
+{
+ return __builtin_aarch64_ucvtfsihf_sus (__a, __b);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_n_f16_u64 (uint64_t __a, const int __b)
+{
+ return __builtin_aarch64_ucvtfdihf_sus (__a, __b);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vcvth_n_s16_f16 (float16_t __a, const int __b)
+{
+ return __builtin_aarch64_fcvtzshf (__a, __b);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvth_n_s32_f16 (float16_t __a, const int __b)
+{
+ return __builtin_aarch64_fcvtzshfsi (__a, __b);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvth_n_s64_f16 (float16_t __a, const int __b)
+{
+ return __builtin_aarch64_fcvtzshfdi (__a, __b);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcvth_n_u16_f16 (float16_t __a, const int __b)
+{
+ return __builtin_aarch64_fcvtzuhf_uss (__a, __b);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvth_n_u32_f16 (float16_t __a, const int __b)
+{
+ return __builtin_aarch64_fcvtzuhfsi_uss (__a, __b);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvth_n_u64_f16 (float16_t __a, const int __b)
+{
+ return __builtin_aarch64_fcvtzuhfdi_uss (__a, __b);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vdivh_f16 (float16_t __a, float16_t __b)
+{
+ return __a / __b;
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vmaxh_f16 (float16_t __a, float16_t __b)
+{
+ return __builtin_aarch64_fmaxhf (__a, __b);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vmaxnmh_f16 (float16_t __a, float16_t __b)
+{
+ return __builtin_aarch64_fmaxhf (__a, __b);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vminh_f16 (float16_t __a, float16_t __b)
+{
+ return __builtin_aarch64_fminhf (__a, __b);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vminnmh_f16 (float16_t __a, float16_t __b)
+{
+ return __builtin_aarch64_fminhf (__a, __b);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vmulh_f16 (float16_t __a, float16_t __b)
+{
+ return __a * __b;
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vmulxh_f16 (float16_t __a, float16_t __b)
+{
+ return __builtin_aarch64_fmulxhf (__a, __b);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrecpsh_f16 (float16_t __a, float16_t __b)
+{
+ return __builtin_aarch64_frecpshf (__a, __b);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrsqrtsh_f16 (float16_t __a, float16_t __b)
+{
+ return __builtin_aarch64_rsqrtshf (__a, __b);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vsubh_f16 (float16_t __a, float16_t __b)
+{
+ return __a - __b;
+}
+
#pragma GCC pop_options
#endif
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 20d0f1b..91e2e64 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -105,9 +105,6 @@
(define_mode_iterator VHSDF_DF [(V4HF "TARGET_SIMD_F16INST")
(V8HF "TARGET_SIMD_F16INST")
V2SF V4SF V2DF DF])
-(define_mode_iterator VHSDF_SDF [(V4HF "TARGET_SIMD_F16INST")
- (V8HF "TARGET_SIMD_F16INST")
- V2SF V4SF V2DF SF DF])
(define_mode_iterator VHSDF_HSDF [(V4HF "TARGET_SIMD_F16INST")
(V8HF "TARGET_SIMD_F16INST")
V2SF V4SF V2DF
@@ -190,7 +187,9 @@
;; Scalar and Vector modes for S and D, Vector modes for H.
(define_mode_iterator VSDQ_HSDI [(V4HI "TARGET_SIMD_F16INST")
(V8HI "TARGET_SIMD_F16INST")
- V2SI V4SI V2DI SI DI])
+ V2SI V4SI V2DI
+ (HI "TARGET_SIMD_F16INST")
+ SI DI])
;; Vector modes for Q and H types.
(define_mode_iterator VDQQH [V8QI V16QI V4HI V8HI])
@@ -705,12 +704,12 @@
(V2DI "v2df") (V4SI "v4sf") (V2SI "v2sf")
(SF "si") (DF "di") (SI "sf") (DI "df")
(V4HF "v4hi") (V8HF "v8hi") (V4HI "v4hf")
- (V8HI "v8hf")])
+ (V8HI "v8hf") (HF "hi") (HI "hf")])
(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")
(V2DI "V2DF") (V4SI "V4SF") (V2SI "V2SF")
(SF "SI") (DF "DI") (SI "SF") (DI "DF")
(V4HF "V4HI") (V8HF "V8HI") (V4HI "V4HF")
- (V8HI "V8HF")])
+ (V8HI "V8HF") (HF "HI") (HI "HF")])
;; for the inequal width integer to fp conversions
--
2.5.0