This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[AArch64][3/14] ARMv8.2-A FP16 two operands vector intrinsics
- From: Jiong Wang <jiong dot wang at foss dot arm dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Thu, 7 Jul 2016 17:15:04 +0100
- Subject: [AArch64][3/14] ARMv8.2-A FP16 two operands vector intrinsics
- Authentication-results: sourceware.org; auth=none
- References: <67f7b93f-0a92-de8f-8c50-5b4b573fed3a@foss.arm.com> <99eb95e3-5e9c-c6c9-b85f-e67d15f4859a@foss.arm.com> <21c3c64f-95ad-c127-3f8a-4afd236aae33@foss.arm.com>
This patch add ARMv8.2-A FP16 two operands vector intrinsics.
gcc/
2016-07-07 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode
iterator.
* config/aarch64/arm_neon.h (vadd_f16): Likewise.
(vaddq_f16): Likewise.
(vabd_f16): Likewise.
(vabdq_f16): Likewise.
(vcage_f16): Likewise.
(vcageq_f16): Likewise.
(vcagt_f16): Likewise.
(vcagtq_f16): Likewise.
(vcale_f16): Likewise.
(vcaleq_f16): Likewise.
(vcalt_f16): Likewise.
(vcaltq_f16): Likewise.
(vceq_f16): Likewise.
(vceqq_f16): Likewise.
(vcge_f16): Likewise.
(vcgeq_f16): Likewise.
(vcgt_f16): Likewise.
(vcgtq_f16): Likewise.
(vcle_f16): Likewise.
(vcleq_f16): Likewise.
(vclt_f16): Likewise.
(vcltq_f16): Likewise.
(vcvt_n_f16_s16): Likewise.
(vcvtq_n_f16_s16): Likewise.
(vcvt_n_f16_u16): Likewise.
(vcvtq_n_f16_u16): Likewise.
(vcvt_n_s16_f16): Likewise.
(vcvtq_n_s16_f16): Likewise.
(vcvt_n_u16_f16): Likewise.
(vcvtq_n_u16_f16): Likewise.
(vdiv_f16): Likewise.
(vdivq_f16): Likewise.
(vdup_lane_f16): Likewise.
(vdup_laneq_f16): Likewise.
(vdupq_lane_f16): Likewise.
(vdupq_laneq_f16): Likewise.
(vdups_lane_f16): Likewise.
(vdups_laneq_f16): Likewise.
(vmax_f16): Likewise.
(vmaxq_f16): Likewise.
(vmaxnm_f16): Likewise.
(vmaxnmq_f16): Likewise.
(vmin_f16): Likewise.
(vminq_f16): Likewise.
(vminnm_f16): Likewise.
(vminnmq_f16): Likewise.
(vmul_f16): Likewise.
(vmulq_f16): Likewise.
(vmulx_f16): Likewise.
(vmulxq_f16): Likewise.
(vpadd_f16): Likewise.
(vpaddq_f16): Likewise.
(vpmax_f16): Likewise.
(vpmaxq_f16): Likewise.
(vpmaxnm_f16): Likewise.
(vpmaxnmq_f16): Likewise.
(vpmin_f16): Likewise.
(vpminq_f16): Likewise.
(vpminnm_f16): Likewise.
(vpminnmq_f16): Likewise.
(vrecps_f16): Likewise.
(vrecpsq_f16): Likewise.
(vrsqrts_f16): Likewise.
(vrsqrtsq_f16): Likewise.
(vsub_f16): Likewise.
(vsubq_f16): Likewise.
commit 5ed72d355491365b3af5883cdc5a4fdaf5cb545b
Author: Jiong Wang <jiong.wang@arm.com>
Date: Wed Jun 8 10:10:28 2016 +0100
[3/14] ARMv8.2 FP16 two operands vector intrinsics
gcc/config/aarch64/aarch64-simd-builtins.def | 40 +--
gcc/config/aarch64/aarch64-simd.md | 152 +++++------
gcc/config/aarch64/arm_neon.h | 362 +++++++++++++++++++++++++++
gcc/config/aarch64/iterators.md | 10 +
4 files changed, 473 insertions(+), 91 deletions(-)
commit 5ed72d355491365b3af5883cdc5a4fdaf5cb545b
Author: Jiong Wang <jiong.wang@arm.com>
Date: Wed Jun 8 10:10:28 2016 +0100
[3/14] ARMv8.2 FP16 two operands vector intrinsics
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 3e48046..fe17298 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -41,7 +41,7 @@
BUILTIN_VDC (COMBINE, combine, 0)
BUILTIN_VB (BINOP, pmul, 0)
- BUILTIN_VALLF (BINOP, fmulx, 0)
+ BUILTIN_VHSDF_SDF (BINOP, fmulx, 0)
BUILTIN_VHSDF_DF (UNOP, sqrt, 2)
BUILTIN_VD_BHSI (BINOP, addp, 0)
VAR1 (UNOP, addp, 0, di)
@@ -248,22 +248,22 @@
BUILTIN_VDQ_BHSI (BINOP, smin, 3)
BUILTIN_VDQ_BHSI (BINOP, umax, 3)
BUILTIN_VDQ_BHSI (BINOP, umin, 3)
- BUILTIN_VDQF (BINOP, smax_nan, 3)
- BUILTIN_VDQF (BINOP, smin_nan, 3)
+ BUILTIN_VHSDF (BINOP, smax_nan, 3)
+ BUILTIN_VHSDF (BINOP, smin_nan, 3)
/* Implemented by <fmaxmin><mode>3. */
- BUILTIN_VDQF (BINOP, fmax, 3)
- BUILTIN_VDQF (BINOP, fmin, 3)
+ BUILTIN_VHSDF (BINOP, fmax, 3)
+ BUILTIN_VHSDF (BINOP, fmin, 3)
/* Implemented by aarch64_<maxmin_uns>p<mode>. */
BUILTIN_VDQ_BHSI (BINOP, smaxp, 0)
BUILTIN_VDQ_BHSI (BINOP, sminp, 0)
BUILTIN_VDQ_BHSI (BINOP, umaxp, 0)
BUILTIN_VDQ_BHSI (BINOP, uminp, 0)
- BUILTIN_VDQF (BINOP, smaxp, 0)
- BUILTIN_VDQF (BINOP, sminp, 0)
- BUILTIN_VDQF (BINOP, smax_nanp, 0)
- BUILTIN_VDQF (BINOP, smin_nanp, 0)
+ BUILTIN_VHSDF (BINOP, smaxp, 0)
+ BUILTIN_VHSDF (BINOP, sminp, 0)
+ BUILTIN_VHSDF (BINOP, smax_nanp, 0)
+ BUILTIN_VHSDF (BINOP, smin_nanp, 0)
/* Implemented by <frint_pattern><mode>2. */
BUILTIN_VHSDF (UNOP, btrunc, 2)
@@ -383,7 +383,7 @@
BUILTIN_VDQ_SI (UNOP, urecpe, 0)
BUILTIN_VHSDF (UNOP, frecpe, 0)
- BUILTIN_VDQF (BINOP, frecps, 0)
+ BUILTIN_VHSDF (BINOP, frecps, 0)
/* Implemented by a mixture of abs2 patterns. Note the DImode builtin is
only ever used for the int64x1_t intrinsic, there is no scalar version. */
@@ -475,22 +475,22 @@
BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_laneq, 0)
/* Implemented by <FCVT_F2FIXED/FIXED2F:fcvt_fixed_insn><*><*>3. */
- BUILTIN_VSDQ_SDI (SHIFTIMM, scvtf, 3)
- BUILTIN_VSDQ_SDI (FCVTIMM_SUS, ucvtf, 3)
- BUILTIN_VALLF (SHIFTIMM, fcvtzs, 3)
- BUILTIN_VALLF (SHIFTIMM_USS, fcvtzu, 3)
+ BUILTIN_VSDQ_HSDI (SHIFTIMM, scvtf, 3)
+ BUILTIN_VSDQ_HSDI (FCVTIMM_SUS, ucvtf, 3)
+ BUILTIN_VHSDF_SDF (SHIFTIMM, fcvtzs, 3)
+ BUILTIN_VHSDF_SDF (SHIFTIMM_USS, fcvtzu, 3)
/* Implemented by aarch64_rsqrte<mode>. */
BUILTIN_VHSDF_SDF (UNOP, rsqrte, 0)
/* Implemented by aarch64_rsqrts<mode>. */
- BUILTIN_VALLF (BINOP, rsqrts, 0)
+ BUILTIN_VHSDF_SDF (BINOP, rsqrts, 0)
/* Implemented by fabd<mode>3. */
- BUILTIN_VALLF (BINOP, fabd, 3)
+ BUILTIN_VHSDF_SDF (BINOP, fabd, 3)
/* Implemented by aarch64_faddp<mode>. */
- BUILTIN_VDQF (BINOP, faddp, 0)
+ BUILTIN_VHSDF (BINOP, faddp, 0)
/* Implemented by aarch64_cm<optab><mode>. */
BUILTIN_VHSDF_SDF (BINOP_USS, cmeq, 0)
@@ -501,3 +501,9 @@
/* Implemented by neg<mode>2. */
BUILTIN_VHSDF (UNOP, neg, 2)
+
+ /* Implemented by aarch64_fac<optab><mode>. */
+ BUILTIN_VHSDF_SDF (BINOP_USS, faclt, 0)
+ BUILTIN_VHSDF_SDF (BINOP_USS, facle, 0)
+ BUILTIN_VHSDF_SDF (BINOP_USS, facgt, 0)
+ BUILTIN_VHSDF_SDF (BINOP_USS, facge, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index da6dd52..0a80adb 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -391,13 +391,13 @@
[(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
(define_insn "aarch64_rsqrts<mode>"
- [(set (match_operand:VALLF 0 "register_operand" "=w")
- (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
- (match_operand:VALLF 2 "register_operand" "w")]
- UNSPEC_RSQRTS))]
+ [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
+ (unspec:VHSDF_SDF [(match_operand:VHSDF_SDF 1 "register_operand" "w")
+ (match_operand:VHSDF_SDF 2 "register_operand" "w")]
+ UNSPEC_RSQRTS))]
"TARGET_SIMD"
"frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
- [(set_attr "type" "neon_fp_rsqrts_<Vetype><q>")])
+ [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
(define_expand "rsqrt<mode>2"
[(set (match_operand:VALLF 0 "register_operand" "=w")
@@ -475,14 +475,14 @@
)
(define_insn "fabd<mode>3"
- [(set (match_operand:VALLF 0 "register_operand" "=w")
- (abs:VALLF
- (minus:VALLF
- (match_operand:VALLF 1 "register_operand" "w")
- (match_operand:VALLF 2 "register_operand" "w"))))]
+ [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
+ (abs:VHSDF_SDF
+ (minus:VHSDF_SDF
+ (match_operand:VHSDF_SDF 1 "register_operand" "w")
+ (match_operand:VHSDF_SDF 2 "register_operand" "w"))))]
"TARGET_SIMD"
"fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
- [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
+ [(set_attr "type" "neon_fp_abd_<stype><q>")]
)
(define_insn "and<mode>3"
@@ -1062,10 +1062,10 @@
;; Pairwise FP Max/Min operations.
(define_insn "aarch64_<maxmin_uns>p<mode>"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
- (match_operand:VDQF 2 "register_operand" "w")]
- FMAXMINV))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:VHSDF 2 "register_operand" "w")]
+ FMAXMINV))]
"TARGET_SIMD"
"<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_minmax<q>")]
@@ -1474,39 +1474,39 @@
;; FP arithmetic operations.
(define_insn "add<mode>3"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (plus:VDQF (match_operand:VDQF 1 "register_operand" "w")
- (match_operand:VDQF 2 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:VHSDF 2 "register_operand" "w")))]
"TARGET_SIMD"
"fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
- [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
+ [(set_attr "type" "neon_fp_addsub_<stype><q>")]
)
(define_insn "sub<mode>3"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (minus:VDQF (match_operand:VDQF 1 "register_operand" "w")
- (match_operand:VDQF 2 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:VHSDF 2 "register_operand" "w")))]
"TARGET_SIMD"
"fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
- [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
+ [(set_attr "type" "neon_fp_addsub_<stype><q>")]
)
(define_insn "mul<mode>3"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (mult:VDQF (match_operand:VDQF 1 "register_operand" "w")
- (match_operand:VDQF 2 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:VHSDF 2 "register_operand" "w")))]
"TARGET_SIMD"
"fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
- [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
+ [(set_attr "type" "neon_fp_mul_<stype><q>")]
)
(define_insn "div<mode>3"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (div:VDQF (match_operand:VDQF 1 "register_operand" "w")
- (match_operand:VDQF 2 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:VHSDF 2 "register_operand" "w")))]
"TARGET_SIMD"
"fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
- [(set_attr "type" "neon_fp_div_<Vetype><q>")]
+ [(set_attr "type" "neon_fp_div_<stype><q>")]
)
(define_insn "neg<mode>2"
@@ -1771,24 +1771,24 @@
;; Convert between fixed-point and floating-point (vector modes)
-(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VDQF:mode>3"
- [(set (match_operand:<VDQF:FCVT_TARGET> 0 "register_operand" "=w")
- (unspec:<VDQF:FCVT_TARGET> [(match_operand:VDQF 1 "register_operand" "w")
- (match_operand:SI 2 "immediate_operand" "i")]
+(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
+ [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
+ (unspec:<VHSDF:FCVT_TARGET> [(match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:SI 2 "immediate_operand" "i")]
FCVT_F2FIXED))]
"TARGET_SIMD"
"<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
- [(set_attr "type" "neon_fp_to_int_<VDQF:Vetype><q>")]
+ [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
)
-(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_SDI:mode>3"
- [(set (match_operand:<VDQ_SDI:FCVT_TARGET> 0 "register_operand" "=w")
- (unspec:<VDQ_SDI:FCVT_TARGET> [(match_operand:VDQ_SDI 1 "register_operand" "w")
- (match_operand:SI 2 "immediate_operand" "i")]
+(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
+ [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
+ (unspec:<VDQ_HSDI:FCVT_TARGET> [(match_operand:VDQ_HSDI 1 "register_operand" "w")
+ (match_operand:SI 2 "immediate_operand" "i")]
FCVT_FIXED2F))]
"TARGET_SIMD"
"<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
- [(set_attr "type" "neon_int_to_fp_<VDQ_SDI:Vetype><q>")]
+ [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
)
;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
@@ -1947,33 +1947,33 @@
;; NaNs.
(define_insn "<su><maxmin><mode>3"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w")
- (match_operand:VDQF 2 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:VHSDF 2 "register_operand" "w")))]
"TARGET_SIMD"
"f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
- [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
+ [(set_attr "type" "neon_fp_minmax_<stype><q>")]
)
(define_insn "<maxmin_uns><mode>3"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
- (match_operand:VDQF 2 "register_operand" "w")]
- FMAXMIN_UNS))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:VHSDF 2 "register_operand" "w")]
+ FMAXMIN_UNS))]
"TARGET_SIMD"
"<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
- [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
+ [(set_attr "type" "neon_fp_minmax_<stype><q>")]
)
;; Auto-vectorized forms for the IEEE-754 fmax()/fmin() functions
(define_insn "<fmaxmin><mode>3"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
- (match_operand:VDQF 2 "register_operand" "w")]
- FMAXMIN))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:VHSDF 2 "register_operand" "w")]
+ FMAXMIN))]
"TARGET_SIMD"
"<fmaxmin_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
- [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
+ [(set_attr "type" "neon_fp_minmax_<stype><q>")]
)
;; 'across lanes' add.
@@ -1993,13 +1993,13 @@
)
(define_insn "aarch64_faddp<mode>"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
- (match_operand:VDQF 2 "register_operand" "w")]
- UNSPEC_FADDV))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:VHSDF 2 "register_operand" "w")]
+ UNSPEC_FADDV))]
"TARGET_SIMD"
"faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
- [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
+ [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
)
(define_insn "aarch64_reduc_plus_internal<mode>"
@@ -2995,13 +2995,14 @@
;; fmulx.
(define_insn "aarch64_fmulx<mode>"
- [(set (match_operand:VALLF 0 "register_operand" "=w")
- (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
- (match_operand:VALLF 2 "register_operand" "w")]
- UNSPEC_FMULX))]
+ [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
+ (unspec:VHSDF_SDF
+ [(match_operand:VHSDF_SDF 1 "register_operand" "w")
+ (match_operand:VHSDF_SDF 2 "register_operand" "w")]
+ UNSPEC_FMULX))]
"TARGET_SIMD"
"fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
- [(set_attr "type" "neon_fp_mul_<Vetype>")]
+ [(set_attr "type" "neon_fp_mul_<stype>")]
)
;; vmulxq_lane_f32, and vmulx_laneq_f32
@@ -4261,16 +4262,18 @@
;; Note we can also handle what would be fac(le|lt) by
;; generating fac(ge|gt).
-(define_insn "*aarch64_fac<optab><mode>"
+(define_insn "aarch64_fac<optab><mode>"
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
(neg:<V_cmp_result>
(FAC_COMPARISONS:<V_cmp_result>
- (abs:VALLF (match_operand:VALLF 1 "register_operand" "w"))
- (abs:VALLF (match_operand:VALLF 2 "register_operand" "w"))
+ (abs:VHSDF_SDF
+ (match_operand:VHSDF_SDF 1 "register_operand" "w"))
+ (abs:VHSDF_SDF
+ (match_operand:VHSDF_SDF 2 "register_operand" "w"))
)))]
"TARGET_SIMD"
"fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
- [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
+ [(set_attr "type" "neon_fp_compare_<stype><q>")]
)
;; addp
@@ -5373,13 +5376,14 @@
)
(define_insn "aarch64_frecps<mode>"
- [(set (match_operand:VALLF 0 "register_operand" "=w")
- (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
- (match_operand:VALLF 2 "register_operand" "w")]
- UNSPEC_FRECPS))]
+ [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
+ (unspec:VHSDF_SDF
+ [(match_operand:VHSDF_SDF 1 "register_operand" "w")
+ (match_operand:VHSDF_SDF 2 "register_operand" "w")]
+ UNSPEC_FRECPS))]
"TARGET_SIMD"
"frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
- [(set_attr "type" "neon_fp_recps_<Vetype><q>")]
+ [(set_attr "type" "neon_fp_recps_<stype><q>")]
)
(define_insn "aarch64_urecpe<mode>"
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 3018049..e78ff43 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -26096,6 +26096,368 @@ vsqrtq_f16 (float16x8_t a)
return __builtin_aarch64_sqrtv8hf (a);
}
+/* ARMv8.2-A FP16 two operands vector intrinsics. */
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vadd_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __a + __b;
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vaddq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ return __a + __b;
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vabd_f16 (float16x4_t a, float16x4_t b)
+{
+ return __builtin_aarch64_fabdv4hf (a, b);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vabdq_f16 (float16x8_t a, float16x8_t b)
+{
+ return __builtin_aarch64_fabdv8hf (a, b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcage_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_facgev4hf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcageq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ return __builtin_aarch64_facgev8hf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcagt_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_facgtv4hf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcagtq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ return __builtin_aarch64_facgtv8hf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcale_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_faclev4hf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcaleq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ return __builtin_aarch64_faclev8hf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcalt_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_facltv4hf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcaltq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ return __builtin_aarch64_facltv8hf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vceq_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_cmeqv4hf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vceqq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ return __builtin_aarch64_cmeqv8hf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcge_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_cmgev4hf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgeq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ return __builtin_aarch64_cmgev8hf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgt_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_cmgtv4hf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgtq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ return __builtin_aarch64_cmgtv8hf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcle_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_cmlev4hf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcleq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ return __builtin_aarch64_cmlev8hf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vclt_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_cmltv4hf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcltq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ return __builtin_aarch64_cmltv8hf_uss (__a, __b);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vcvt_n_f16_s16 (int16x4_t __a, const int __b)
+{
+ return __builtin_aarch64_scvtfv4hi (__a, __b);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vcvtq_n_f16_s16 (int16x8_t __a, const int __b)
+{
+ return __builtin_aarch64_scvtfv8hi (__a, __b);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vcvt_n_f16_u16 (uint16x4_t __a, const int __b)
+{
+ return __builtin_aarch64_ucvtfv4hi_sus (__a, __b);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vcvtq_n_f16_u16 (uint16x8_t __a, const int __b)
+{
+ return __builtin_aarch64_ucvtfv8hi_sus (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcvt_n_s16_f16 (float16x4_t __a, const int __b)
+{
+ return __builtin_aarch64_fcvtzsv4hf (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcvtq_n_s16_f16 (float16x8_t __a, const int __b)
+{
+ return __builtin_aarch64_fcvtzsv8hf (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcvt_n_u16_f16 (float16x4_t __a, const int __b)
+{
+ return __builtin_aarch64_fcvtzuv4hf_uss (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcvtq_n_u16_f16 (float16x8_t __a, const int __b)
+{
+ return __builtin_aarch64_fcvtzuv8hf_uss (__a, __b);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vdiv_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __a / __b;
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vdivq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ return __a / __b;
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vmax_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_smax_nanv4hf (__a, __b);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vmaxq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ return __builtin_aarch64_smax_nanv8hf (__a, __b);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vmaxnm_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_fmaxv4hf (__a, __b);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vmaxnmq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ return __builtin_aarch64_fmaxv8hf (__a, __b);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vmin_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_smin_nanv4hf (__a, __b);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vminq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ return __builtin_aarch64_smin_nanv8hf (__a, __b);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vminnm_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_fminv4hf (__a, __b);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vminnmq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ return __builtin_aarch64_fminv8hf (__a, __b);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vmul_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __a * __b;
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vmulq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ return __a * __b;
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vmulx_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_fmulxv4hf (__a, __b);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vmulxq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ return __builtin_aarch64_fmulxv8hf (__a, __b);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vpadd_f16 (float16x4_t a, float16x4_t b)
+{
+ return __builtin_aarch64_faddpv4hf (a, b);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vpaddq_f16 (float16x8_t a, float16x8_t b)
+{
+ return __builtin_aarch64_faddpv8hf (a, b);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vpmax_f16 (float16x4_t a, float16x4_t b)
+{
+ return __builtin_aarch64_smax_nanpv4hf (a, b);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vpmaxq_f16 (float16x8_t a, float16x8_t b)
+{
+ return __builtin_aarch64_smax_nanpv8hf (a, b);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vpmaxnm_f16 (float16x4_t a, float16x4_t b)
+{
+ return __builtin_aarch64_smaxpv4hf (a, b);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vpmaxnmq_f16 (float16x8_t a, float16x8_t b)
+{
+ return __builtin_aarch64_smaxpv8hf (a, b);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vpmin_f16 (float16x4_t a, float16x4_t b)
+{
+ return __builtin_aarch64_smin_nanpv4hf (a, b);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vpminq_f16 (float16x8_t a, float16x8_t b)
+{
+ return __builtin_aarch64_smin_nanpv8hf (a, b);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vpminnm_f16 (float16x4_t a, float16x4_t b)
+{
+ return __builtin_aarch64_sminpv4hf (a, b);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vpminnmq_f16 (float16x8_t a, float16x8_t b)
+{
+ return __builtin_aarch64_sminpv8hf (a, b);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrecps_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_frecpsv4hf (__a, __b);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrecpsq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ return __builtin_aarch64_frecpsv8hf (__a, __b);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrsqrts_f16 (float16x4_t a, float16x4_t b)
+{
+ return __builtin_aarch64_rsqrtsv4hf (a, b);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrsqrtsq_f16 (float16x8_t a, float16x8_t b)
+{
+ return __builtin_aarch64_rsqrtsv8hf (a, b);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vsub_f16 (float16x4_t __a, float16x4_t __b)
+{
+ return __a - __b;
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vsubq_f16 (float16x8_t __a, float16x8_t __b)
+{
+ return __a - __b;
+}
+
#pragma GCC pop_options
#undef __aarch64_vget_lane_any
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index af5eda9..35190b4 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -166,9 +166,19 @@
;; Vector modes for S and D
(define_mode_iterator VDQ_SDI [V2SI V4SI V2DI])
+;; Vector modes for H, S and D
+(define_mode_iterator VDQ_HSDI [(V4HI "TARGET_SIMD_F16INST")
+ (V8HI "TARGET_SIMD_F16INST")
+ V2SI V4SI V2DI])
+
;; Scalar and Vector modes for S and D
(define_mode_iterator VSDQ_SDI [V2SI V4SI V2DI SI DI])
+;; Scalar and Vector modes for S and D, Vector modes for H.
+(define_mode_iterator VSDQ_HSDI [(V4HI "TARGET_SIMD_F16INST")
+ (V8HI "TARGET_SIMD_F16INST")
+ V2SI V4SI V2DI SI DI])
+
;; Vector modes for Q and H types.
(define_mode_iterator VDQQH [V8QI V16QI V4HI V8HI])