This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [AArch64][2/14] ARMv8.2-A FP16 one operand vector intrinsics


On 07/07/16 17:14, Jiong Wang wrote:
This patch add ARMv8.2-A FP16 one operand vector intrinsics.

We introduced new mode iterators to cover HF modes, qualified patterns
which was using old mode iterators are switched to new ones.

We can't simply extend old iterator like VDQF to conver HF modes,
because not all patterns using VDQF are with new FP16 support, thus we
introduced new, temperary iterators, and only apply new iterators on
those patterns which do have FP16 supports.

I noticed the patchset at

  https://gcc.gnu.org/ml/gcc-patches/2016-06/msg00308.html

has some modifications on the standard name "div" and "sqrt", thus there
are minor conflicts as this patch touch "sqrt" as well.

This patch resolve the conflict and the change is to let
aarch64_emit_approx_sqrt simply return false for V4HFmode and V8HFmode.

gcc/
2016-07-20  Jiong Wang<jiong.wang@arm.com>

        * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
        * config/aarch64/aarch64-simd-builtins.def: Register new builtins.
        * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
        (neg<mode>2): Likewise.
        (abs<mode>2): Likewise.
        (<frint_pattern><mode>2): Likewise.
        (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
        (<optab><VDQF:mode><fcvt_target>2): Likewise.
        (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
        (ftrunc<VDQF:mode>2): Likewise.
        (<optab><fcvt_target><VDQF:mode>2): Likewise.
        (sqrt<mode>2): Likewise.
        (*sqrt<mode>2): Likewise.
        (aarch64_frecpe<mode>): Likewise.
        (aarch64_cm<optab><mode>): Likewise.
        * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
        false for V4HF and V8HF.
        * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
        (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
        (stype): New.
        * config/aarch64/arm_neon.h (vdup_n_f16): New.
        (vdupq_n_f16): Likewise.
        (vld1_dup_f16): Use vdup_n_f16.
        (vld1q_dup_f16): Use vdupq_n_f16.
        (vabs_f16): New.
        (vabsq_f16): Likewise.
        (vceqz_f16): Likewise.
        (vceqzq_f16): Likewise.
        (vcgez_f16): Likewise.
        (vcgezq_f16): Likewise.
        (vcgtz_f16): Likewise.
        (vcgtzq_f16): Likewise.
        (vclez_f16): Likewise.
        (vclezq_f16): Likewise.
        (vcltz_f16): Likewise.
        (vcltzq_f16): Likewise.
        (vcvt_f16_s16): Likewise.
        (vcvtq_f16_s16): Likewise.
        (vcvt_f16_u16): Likewise.
        (vcvtq_f16_u16): Likewise.
        (vcvt_s16_f16): Likewise.
        (vcvtq_s16_f16): Likewise.
        (vcvt_u16_f16): Likewise.
        (vcvtq_u16_f16): Likewise.
        (vcvta_s16_f16): Likewise.
        (vcvtaq_s16_f16): Likewise.
        (vcvta_u16_f16): Likewise.
        (vcvtaq_u16_f16): Likewise.
        (vcvtm_s16_f16): Likewise.
        (vcvtmq_s16_f16): Likewise.
        (vcvtm_u16_f16): Likewise.
        (vcvtmq_u16_f16): Likewise.
        (vcvtn_s16_f16): Likewise.
        (vcvtnq_s16_f16): Likewise.
        (vcvtn_u16_f16): Likewise.
        (vcvtnq_u16_f16): Likewise.
        (vcvtp_s16_f16): Likewise.
        (vcvtpq_s16_f16): Likewise.
        (vcvtp_u16_f16): Likewise.
        (vcvtpq_u16_f16): Likewise.
        (vneg_f16): Likewise.
        (vnegq_f16): Likewise.
        (vrecpe_f16): Likewise.
        (vrecpeq_f16): Likewise.
        (vrnd_f16): Likewise.
        (vrndq_f16): Likewise.
        (vrnda_f16): Likewise.
        (vrndaq_f16): Likewise.
        (vrndi_f16): Likewise.
        (vrndiq_f16): Likewise.
        (vrndm_f16): Likewise.
        (vrndmq_f16): Likewise.
        (vrndn_f16): Likewise.
        (vrndnq_f16): Likewise.
        (vrndp_f16): Likewise.
        (vrndpq_f16): Likewise.
        (vrndx_f16): Likewise.
        (vrndxq_f16): Likewise.
        (vrsqrte_f16): Likewise.
        (vrsqrteq_f16): Likewise.
        (vsqrt_f16): Likewise.
        (vsqrtq_f16): Likewise.

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 6b90b2af5e9d2b5e7f48569ec1ebcb0ef16314ee..af5fac5b29cf5373561d9bf9a69c401d2bec5cec 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -139,6 +139,10 @@ aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_none, qualifier_unsigned };
 #define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers)
 static enum aarch64_type_qualifiers
+aarch64_types_binop_uss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_none, qualifier_none };
+#define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers)
+static enum aarch64_type_qualifiers
 aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_poly, qualifier_poly, qualifier_poly };
 #define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index f1ad325f464f89c981cbdee8a8f6afafa938639a..22c87be429ba1aac2bbe77f1119d16b6b8bd6e80 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -42,7 +42,7 @@
   BUILTIN_VDC (COMBINE, combine, 0)
   BUILTIN_VB (BINOP, pmul, 0)
   BUILTIN_VALLF (BINOP, fmulx, 0)
-  BUILTIN_VDQF_DF (UNOP, sqrt, 2)
+  BUILTIN_VHSDF_DF (UNOP, sqrt, 2)
   BUILTIN_VD_BHSI (BINOP, addp, 0)
   VAR1 (UNOP, addp, 0, di)
   BUILTIN_VDQ_BHSI (UNOP, clrsb, 2)
@@ -266,23 +266,29 @@
   BUILTIN_VDQF (BINOP, smin_nanp, 0)
 
   /* Implemented by <frint_pattern><mode>2.  */
-  BUILTIN_VDQF (UNOP, btrunc, 2)
-  BUILTIN_VDQF (UNOP, ceil, 2)
-  BUILTIN_VDQF (UNOP, floor, 2)
-  BUILTIN_VDQF (UNOP, nearbyint, 2)
-  BUILTIN_VDQF (UNOP, rint, 2)
-  BUILTIN_VDQF (UNOP, round, 2)
-  BUILTIN_VDQF_DF (UNOP, frintn, 2)
+  BUILTIN_VHSDF (UNOP, btrunc, 2)
+  BUILTIN_VHSDF (UNOP, ceil, 2)
+  BUILTIN_VHSDF (UNOP, floor, 2)
+  BUILTIN_VHSDF (UNOP, nearbyint, 2)
+  BUILTIN_VHSDF (UNOP, rint, 2)
+  BUILTIN_VHSDF (UNOP, round, 2)
+  BUILTIN_VHSDF_DF (UNOP, frintn, 2)
 
   /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2.  */
+  VAR1 (UNOP, lbtruncv4hf, 2, v4hi)
+  VAR1 (UNOP, lbtruncv8hf, 2, v8hi)
   VAR1 (UNOP, lbtruncv2sf, 2, v2si)
   VAR1 (UNOP, lbtruncv4sf, 2, v4si)
   VAR1 (UNOP, lbtruncv2df, 2, v2di)
 
+  VAR1 (UNOPUS, lbtruncuv4hf, 2, v4hi)
+  VAR1 (UNOPUS, lbtruncuv8hf, 2, v8hi)
   VAR1 (UNOPUS, lbtruncuv2sf, 2, v2si)
   VAR1 (UNOPUS, lbtruncuv4sf, 2, v4si)
   VAR1 (UNOPUS, lbtruncuv2df, 2, v2di)
 
+  VAR1 (UNOP, lroundv4hf, 2, v4hi)
+  VAR1 (UNOP, lroundv8hf, 2, v8hi)
   VAR1 (UNOP, lroundv2sf, 2, v2si)
   VAR1 (UNOP, lroundv4sf, 2, v4si)
   VAR1 (UNOP, lroundv2df, 2, v2di)
@@ -290,38 +296,52 @@
   VAR1 (UNOP, lroundsf, 2, si)
   VAR1 (UNOP, lrounddf, 2, di)
 
+  VAR1 (UNOPUS, lrounduv4hf, 2, v4hi)
+  VAR1 (UNOPUS, lrounduv8hf, 2, v8hi)
   VAR1 (UNOPUS, lrounduv2sf, 2, v2si)
   VAR1 (UNOPUS, lrounduv4sf, 2, v4si)
   VAR1 (UNOPUS, lrounduv2df, 2, v2di)
   VAR1 (UNOPUS, lroundusf, 2, si)
   VAR1 (UNOPUS, lroundudf, 2, di)
 
+  VAR1 (UNOP, lceilv4hf, 2, v4hi)
+  VAR1 (UNOP, lceilv8hf, 2, v8hi)
   VAR1 (UNOP, lceilv2sf, 2, v2si)
   VAR1 (UNOP, lceilv4sf, 2, v4si)
   VAR1 (UNOP, lceilv2df, 2, v2di)
 
+  VAR1 (UNOPUS, lceiluv4hf, 2, v4hi)
+  VAR1 (UNOPUS, lceiluv8hf, 2, v8hi)
   VAR1 (UNOPUS, lceiluv2sf, 2, v2si)
   VAR1 (UNOPUS, lceiluv4sf, 2, v4si)
   VAR1 (UNOPUS, lceiluv2df, 2, v2di)
   VAR1 (UNOPUS, lceilusf, 2, si)
   VAR1 (UNOPUS, lceiludf, 2, di)
 
+  VAR1 (UNOP, lfloorv4hf, 2, v4hi)
+  VAR1 (UNOP, lfloorv8hf, 2, v8hi)
   VAR1 (UNOP, lfloorv2sf, 2, v2si)
   VAR1 (UNOP, lfloorv4sf, 2, v4si)
   VAR1 (UNOP, lfloorv2df, 2, v2di)
 
+  VAR1 (UNOPUS, lflooruv4hf, 2, v4hi)
+  VAR1 (UNOPUS, lflooruv8hf, 2, v8hi)
   VAR1 (UNOPUS, lflooruv2sf, 2, v2si)
   VAR1 (UNOPUS, lflooruv4sf, 2, v4si)
   VAR1 (UNOPUS, lflooruv2df, 2, v2di)
   VAR1 (UNOPUS, lfloorusf, 2, si)
   VAR1 (UNOPUS, lfloorudf, 2, di)
 
+  VAR1 (UNOP, lfrintnv4hf, 2, v4hi)
+  VAR1 (UNOP, lfrintnv8hf, 2, v8hi)
   VAR1 (UNOP, lfrintnv2sf, 2, v2si)
   VAR1 (UNOP, lfrintnv4sf, 2, v4si)
   VAR1 (UNOP, lfrintnv2df, 2, v2di)
   VAR1 (UNOP, lfrintnsf, 2, si)
   VAR1 (UNOP, lfrintndf, 2, di)
 
+  VAR1 (UNOPUS, lfrintnuv4hf, 2, v4hi)
+  VAR1 (UNOPUS, lfrintnuv8hf, 2, v8hi)
   VAR1 (UNOPUS, lfrintnuv2sf, 2, v2si)
   VAR1 (UNOPUS, lfrintnuv4sf, 2, v4si)
   VAR1 (UNOPUS, lfrintnuv2df, 2, v2di)
@@ -329,10 +349,14 @@
   VAR1 (UNOPUS, lfrintnudf, 2, di)
 
   /* Implemented by <optab><fcvt_target><VDQF:mode>2.  */
+  VAR1 (UNOP, floatv4hi, 2, v4hf)
+  VAR1 (UNOP, floatv8hi, 2, v8hf)
   VAR1 (UNOP, floatv2si, 2, v2sf)
   VAR1 (UNOP, floatv4si, 2, v4sf)
   VAR1 (UNOP, floatv2di, 2, v2df)
 
+  VAR1 (UNOP, floatunsv4hi, 2, v4hf)
+  VAR1 (UNOP, floatunsv8hi, 2, v8hf)
   VAR1 (UNOP, floatunsv2si, 2, v2sf)
   VAR1 (UNOP, floatunsv4si, 2, v4sf)
   VAR1 (UNOP, floatunsv2di, 2, v2df)
@@ -358,13 +382,13 @@
 
   BUILTIN_VDQ_SI (UNOP, urecpe, 0)
 
-  BUILTIN_VDQF (UNOP, frecpe, 0)
+  BUILTIN_VHSDF (UNOP, frecpe, 0)
   BUILTIN_VDQF (BINOP, frecps, 0)
 
   /* Implemented by a mixture of abs2 patterns.  Note the DImode builtin is
      only ever used for the int64x1_t intrinsic, there is no scalar version.  */
   BUILTIN_VSDQ_I_DI (UNOP, abs, 0)
-  BUILTIN_VDQF (UNOP, abs, 2)
+  BUILTIN_VHSDF (UNOP, abs, 2)
 
   BUILTIN_VQ_HSF (UNOP, vec_unpacks_hi_, 10)
   VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
@@ -457,7 +481,7 @@
   BUILTIN_VALLF (SHIFTIMM_USS, fcvtzu, 3)
 
   /* Implemented by aarch64_rsqrte<mode>.  */
-  BUILTIN_VALLF (UNOP, rsqrte, 0)
+  BUILTIN_VHSDF_SDF (UNOP, rsqrte, 0)
 
   /* Implemented by aarch64_rsqrts<mode>.  */
   BUILTIN_VALLF (BINOP, rsqrts, 0)
@@ -467,3 +491,13 @@
 
   /* Implemented by aarch64_faddp<mode>.  */
   BUILTIN_VDQF (BINOP, faddp, 0)
+
+  /* Implemented by aarch64_cm<optab><mode>.  */
+  BUILTIN_VHSDF_SDF (BINOP_USS, cmeq, 0)
+  BUILTIN_VHSDF_SDF (BINOP_USS, cmge, 0)
+  BUILTIN_VHSDF_SDF (BINOP_USS, cmgt, 0)
+  BUILTIN_VHSDF_SDF (BINOP_USS, cmle, 0)
+  BUILTIN_VHSDF_SDF (BINOP_USS, cmlt, 0)
+
+  /* Implemented by neg<mode>2.  */
+  BUILTIN_VHSDF (UNOP, neg, 2)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 251ad972a4bed027f8c77946fb21ce8d94dc3035..8e922e697d2b1a5ab2e09974429a788731a4dcc5 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -383,12 +383,12 @@
 )
 
 (define_insn "aarch64_rsqrte<mode>"
-  [(set (match_operand:VALLF 0 "register_operand" "=w")
-	(unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
+  [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
+	(unspec:VHSDF_SDF [(match_operand:VHSDF_SDF 1 "register_operand" "w")]
 		     UNSPEC_RSQRTE))]
   "TARGET_SIMD"
   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
-  [(set_attr "type" "neon_fp_rsqrte_<Vetype><q>")])
+  [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 
 (define_insn "aarch64_rsqrts<mode>"
   [(set (match_operand:VALLF 0 "register_operand" "=w")
@@ -1565,19 +1565,19 @@
 )
 
 (define_insn "neg<mode>2"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
-       (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+       (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
  "TARGET_SIMD"
  "fneg\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "type" "neon_fp_neg_<Vetype><q>")]
+  [(set_attr "type" "neon_fp_neg_<stype><q>")]
 )
 
 (define_insn "abs<mode>2"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
-       (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+       (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
  "TARGET_SIMD"
  "fabs\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "type" "neon_fp_abs_<Vetype><q>")]
+  [(set_attr "type" "neon_fp_abs_<stype><q>")]
 )
 
 (define_insn "fma<mode>4"
@@ -1735,24 +1735,24 @@
 ;; Vector versions of the floating-point frint patterns.
 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
 (define_insn "<frint_pattern><mode>2"
-  [(set (match_operand:VDQF 0 "register_operand" "=w")
-	(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
-		      FRINT))]
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
+		       FRINT))]
   "TARGET_SIMD"
   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "type" "neon_fp_round_<Vetype><q>")]
+  [(set_attr "type" "neon_fp_round_<stype><q>")]
 )
 
 ;; Vector versions of the fcvt standard patterns.
 ;; Expands to lbtrunc, lround, lceil, lfloor
-(define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
+(define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
 	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
-			       [(match_operand:VDQF 1 "register_operand" "w")]
+			       [(match_operand:VHSDF 1 "register_operand" "w")]
 			       FCVT)))]
   "TARGET_SIMD"
   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
+  [(set_attr "type" "neon_fp_to_int_<stype><q>")]
 )
 
 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
@@ -1775,36 +1775,36 @@
   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
 )
 
-(define_expand "<optab><VDQF:mode><fcvt_target>2"
+(define_expand "<optab><VHSDF:mode><fcvt_target>2"
   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
 	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
-			       [(match_operand:VDQF 1 "register_operand")]
-			       UNSPEC_FRINTZ)))]
+			       [(match_operand:VHSDF 1 "register_operand")]
+				UNSPEC_FRINTZ)))]
   "TARGET_SIMD"
   {})
 
-(define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
+(define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
 	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
-			       [(match_operand:VDQF 1 "register_operand")]
-			       UNSPEC_FRINTZ)))]
+			       [(match_operand:VHSDF 1 "register_operand")]
+				UNSPEC_FRINTZ)))]
   "TARGET_SIMD"
   {})
 
-(define_expand "ftrunc<VDQF:mode>2"
-  [(set (match_operand:VDQF 0 "register_operand")
-	(unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
-		      UNSPEC_FRINTZ))]
+(define_expand "ftrunc<VHSDF:mode>2"
+  [(set (match_operand:VHSDF 0 "register_operand")
+	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
+		       UNSPEC_FRINTZ))]
   "TARGET_SIMD"
   {})
 
-(define_insn "<optab><fcvt_target><VDQF:mode>2"
-  [(set (match_operand:VDQF 0 "register_operand" "=w")
-	(FLOATUORS:VDQF
+(define_insn "<optab><fcvt_target><VHSDF:mode>2"
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+	(FLOATUORS:VHSDF
 	  (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "type" "neon_int_to_fp_<Vetype><q>")]
+  [(set_attr "type" "neon_int_to_fp_<stype><q>")]
 )
 
 ;; Conversions between vectors of floats and doubles.
@@ -4296,14 +4296,14 @@
   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
 	(neg:<V_cmp_result>
 	  (COMPARISONS:<V_cmp_result>
-	    (match_operand:VALLF 1 "register_operand" "w,w")
-	    (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
+	    (match_operand:VHSDF_SDF 1 "register_operand" "w,w")
+	    (match_operand:VHSDF_SDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
 	  )))]
   "TARGET_SIMD"
   "@
   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
-  [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
+  [(set_attr "type" "neon_fp_compare_<stype><q>")]
 )
 
 ;; fac(ge|gt)
@@ -4348,8 +4348,8 @@
 ;; sqrt
 
 (define_expand "sqrt<mode>2"
-  [(set (match_operand:VDQF 0 "register_operand")
-	(sqrt:VDQF (match_operand:VDQF 1 "register_operand")))]
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+	(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
   "TARGET_SIMD"
 {
   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
@@ -4357,11 +4357,11 @@
 })
 
 (define_insn "*sqrt<mode>2"
-  [(set (match_operand:VDQF 0 "register_operand" "=w")
-        (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+	(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")]
+  [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
 )
 
 ;; Patterns for vector struct loads and stores.
@@ -5413,12 +5413,12 @@
 )
 
 (define_insn "aarch64_frecpe<mode>"
-  [(set (match_operand:VDQF 0 "register_operand" "=w")
-	(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
-		    UNSPEC_FRECPE))]
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
+	 UNSPEC_FRECPE))]
   "TARGET_SIMD"
   "frecpe\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]
+  [(set_attr "type" "neon_fp_recpe_<stype><q>")]
 )
 
 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 58a9d695c0ef9e6e1d67030580428699aba05be4..5ed633542efe58763d68fd9bfbb478ae6ef569c3 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7598,6 +7598,10 @@ bool
 aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
 {
   machine_mode mode = GET_MODE (dst);
+
+  if (mode == V4HFmode || mode == V8HFmode)
+    return false;
+
   machine_mode mmsk = mode_for_vector
 		        (int_mode_for_mode (GET_MODE_INNER (mode)),
 			 GET_MODE_NUNITS (mode));
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index fd5f094de6a058065e2b1377f5ffc4c1aba01f97..b4310f27aac08ab6ff5e89d58512dafc389b2c37 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -26028,6 +26028,365 @@ __INTERLEAVE_LIST (zip)
 
 /* End of optimal implementations in approved order.  */
 
+#pragma GCC pop_options
+
+/* ARMv8.2-A FP16 intrinsics.  */
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+fp16")
+
+/* ARMv8.2-A FP16 one operand vector intrinsics.  */
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vabs_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_absv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vabsq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_absv8hf (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vceqz_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_cmeqv4hf_uss (__a, vdup_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vceqzq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_cmeqv8hf_uss (__a, vdupq_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgez_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_cmgev4hf_uss (__a, vdup_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgezq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_cmgev8hf_uss (__a, vdupq_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgtz_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_cmgtv4hf_uss (__a, vdup_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgtzq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_cmgtv8hf_uss (__a, vdupq_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vclez_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_cmlev4hf_uss (__a, vdup_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vclezq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_cmlev8hf_uss (__a, vdupq_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcltz_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_cmltv4hf_uss (__a, vdup_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcltzq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_cmltv8hf_uss (__a, vdupq_n_f16 (0.0f));
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vcvt_f16_s16 (int16x4_t __a)
+{
+  return __builtin_aarch64_floatv4hiv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vcvtq_f16_s16 (int16x8_t __a)
+{
+  return __builtin_aarch64_floatv8hiv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vcvt_f16_u16 (uint16x4_t __a)
+{
+  return __builtin_aarch64_floatunsv4hiv4hf ((int16x4_t) __a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vcvtq_f16_u16 (uint16x8_t __a)
+{
+  return __builtin_aarch64_floatunsv8hiv8hf ((int16x8_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcvt_s16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lbtruncv4hfv4hi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcvtq_s16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lbtruncv8hfv8hi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcvt_u16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lbtruncuv4hfv4hi_us (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcvtq_u16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lbtruncuv8hfv8hi_us (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcvta_s16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lroundv4hfv4hi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcvtaq_s16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lroundv8hfv8hi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcvta_u16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lrounduv4hfv4hi_us (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcvtaq_u16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lrounduv8hfv8hi_us (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcvtm_s16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lfloorv4hfv4hi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcvtmq_s16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lfloorv8hfv8hi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcvtm_u16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lflooruv4hfv4hi_us (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcvtmq_u16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lflooruv8hfv8hi_us (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcvtn_s16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lfrintnv4hfv4hi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcvtnq_s16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lfrintnv8hfv8hi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcvtn_u16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lfrintnuv4hfv4hi_us (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcvtnq_u16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lfrintnuv8hfv8hi_us (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcvtp_s16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lceilv4hfv4hi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcvtpq_s16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lceilv8hfv8hi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcvtp_u16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lceiluv4hfv4hi_us (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcvtpq_u16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lceiluv8hfv8hi_us (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vneg_f16 (float16x4_t __a)
+{
+  return -__a;
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vnegq_f16 (float16x8_t __a)
+{
+  return -__a;
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrecpe_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_frecpev4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrecpeq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_frecpev8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrnd_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_btruncv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_btruncv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrnda_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_roundv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndaq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_roundv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrndi_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_nearbyintv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndiq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_nearbyintv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrndm_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_floorv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndmq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_floorv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrndn_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_frintnv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndnq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_frintnv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrndp_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_ceilv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndpq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_ceilv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrndx_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_rintv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndxq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_rintv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrsqrte_f16 (float16x4_t a)
+{
+  return __builtin_aarch64_rsqrtev4hf (a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrsqrteq_f16 (float16x8_t a)
+{
+  return __builtin_aarch64_rsqrtev8hf (a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vsqrt_f16 (float16x4_t a)
+{
+  return __builtin_aarch64_sqrtv4hf (a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vsqrtq_f16 (float16x8_t a)
+{
+  return __builtin_aarch64_sqrtv8hf (a);
+}
+
+#pragma GCC pop_options
+
 #undef __aarch64_vget_lane_any
 
 #undef __aarch64_vdup_lane_any
@@ -26084,6 +26443,4 @@ __INTERLEAVE_LIST (zip)
 #undef __aarch64_vdupq_laneq_u32
 #undef __aarch64_vdupq_laneq_u64
 
-#pragma GCC pop_options
-
 #endif
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index e8fbb1281dec2e8f37f58ef2ced792dd62e3b5aa..af5eda9b9f4a80e1309655dcd7798337e1d818eb 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -88,11 +88,20 @@
 ;; Vector Float modes suitable for moving, loading and storing.
 (define_mode_iterator VDQF_F16 [V4HF V8HF V2SF V4SF V2DF])
 
-;; Vector Float modes, barring HF modes.
+;; Vector Float modes.
 (define_mode_iterator VDQF [V2SF V4SF V2DF])
+(define_mode_iterator VHSDF [(V4HF "TARGET_SIMD_F16INST")
+			     (V8HF "TARGET_SIMD_F16INST")
+			     V2SF V4SF V2DF])
 
 ;; Vector Float modes, and DF.
 (define_mode_iterator VDQF_DF [V2SF V4SF V2DF DF])
+(define_mode_iterator VHSDF_DF [(V4HF "TARGET_SIMD_F16INST")
+				(V8HF "TARGET_SIMD_F16INST")
+				V2SF V4SF V2DF DF])
+(define_mode_iterator VHSDF_SDF [(V4HF "TARGET_SIMD_F16INST")
+				 (V8HF "TARGET_SIMD_F16INST")
+				 V2SF V4SF V2DF SF DF])
 
 ;; Vector single Float modes.
 (define_mode_iterator VDQSF [V2SF V4SF])
@@ -366,7 +375,8 @@
 		    (V4HI "") (V8HI "")
 		    (V2SI "") (V4SI  "")
 		    (V2DI "") (V2SF "")
-		    (V4SF "") (V2DF "")])
+		    (V4SF "") (V4HF "")
+		    (V8HF "") (V2DF "")])
 
 ;; For scalar usage of vector/FP registers, narrowing
 (define_mode_attr vn2 [(QI "") (HI "b") (SI "h") (DI "s")
@@ -447,6 +457,16 @@
 			  (QI "b")   (HI "h")
 			  (SI "s")   (DI "d")])
 
+;; Vetype is used everywhere in scheduling type and assembly output,
+;; sometimes they are not the same, for example HF modes on some
+;; instructions.  stype is defined to represent scheduling type
+;; more accurately.
+(define_mode_attr stype [(V8QI "b") (V16QI "b") (V4HI "s") (V8HI "s")
+			 (V2SI "s") (V4SI "s") (V2DI "d") (V4HF "s")
+			 (V8HF "s") (V2SF "s") (V4SF "s") (V2DF "d")
+			 (HF "s") (SF "s") (DF "d") (QI "b") (HI "s")
+			 (SI "s") (DI "d")])
+
 ;; Mode-to-bitwise operation type mapping.
 (define_mode_attr Vbtype [(V8QI "8b")  (V16QI "16b")
 			  (V4HI "8b") (V8HI  "16b")
@@ -656,10 +676,14 @@
 
 (define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")
 			       (V2DI "v2df") (V4SI "v4sf") (V2SI "v2sf")
-			       (SF "si") (DF "di") (SI "sf") (DI "df")])
+			       (SF "si") (DF "di") (SI "sf") (DI "df")
+			       (V4HF "v4hi") (V8HF "v8hi") (V4HI "v4hf")
+			       (V8HI "v8hf")])
 (define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")
 			       (V2DI "V2DF") (V4SI "V4SF") (V2SI "V2SF")
-			       (SF "SI") (DF "DI") (SI "SF") (DI "DF")])
+			       (SF "SI") (DF "DI") (SI "SF") (DI "DF")
+			       (V4HF "V4HI") (V8HF "V8HI") (V4HI "V4HF")
+			       (V8HI "V8HF")])
 
 
 ;; for the inequal width integer to fp conversions
@@ -687,6 +711,7 @@
 ;; the 'x' constraint.  All other modes may use the 'w' constraint.
 (define_mode_attr h_con [(V2SI "w") (V4SI "w")
 			 (V4HI "x") (V8HI "x")
+			 (V4HF "w") (V8HF "w")
 			 (V2SF "w") (V4SF "w")
 			 (V2DF "w") (DF "w")])
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]