diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index af95ac1..165132d 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -19642,6 +19642,7 @@ arm_debugger_arg_offset (int value, rtx addr) typedef enum { T_V8QI, T_V4HI, + T_V4HF, T_V2SI, T_V2SF, T_DI, @@ -19659,14 +19660,15 @@ typedef enum { #define TYPE_MODE_BIT(X) (1 << (X)) #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \ - | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \ - | TYPE_MODE_BIT (T_DI)) + | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \ + | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI)) #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \ | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \ | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI)) #define v8qi_UP T_V8QI #define v4hi_UP T_V4HI +#define v4hf_UP T_V4HF #define v2si_UP T_V2SI #define v2sf_UP T_V2SF #define di_UP T_DI @@ -19702,6 +19704,8 @@ typedef enum { NEON_SCALARMULH, NEON_SCALARMAC, NEON_CONVERT, + NEON_FLOAT_WIDEN, + NEON_FLOAT_NARROW, NEON_FIXCONV, NEON_SELECT, NEON_RESULTPAIR, @@ -20095,6 +20099,7 @@ arm_init_neon_builtins (void) tree neon_intQI_type_node; tree neon_intHI_type_node; + tree neon_floatHF_type_node; tree neon_polyQI_type_node; tree neon_polyHI_type_node; tree neon_intSI_type_node; @@ -20121,6 +20126,7 @@ arm_init_neon_builtins (void) tree V8QI_type_node; tree V4HI_type_node; + tree V4HF_type_node; tree V2SI_type_node; tree V2SF_type_node; tree V16QI_type_node; @@ -20175,6 +20181,9 @@ arm_init_neon_builtins (void) neon_float_type_node = make_node (REAL_TYPE); TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE; layout_type (neon_float_type_node); + neon_floatHF_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode); + layout_type (neon_floatHF_type_node); /* Define typedefs which exactly correspond to the modes we are basing vector types on. If you change these names you'll need to change @@ -20183,6 +20192,8 @@ arm_init_neon_builtins (void) "__builtin_neon_qi"); (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node, "__builtin_neon_hi"); + (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node, + "__builtin_neon_hf"); (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node, "__builtin_neon_si"); (*lang_hooks.types.register_builtin_type) (neon_float_type_node, @@ -20224,6 +20235,8 @@ arm_init_neon_builtins (void) build_vector_type_for_mode (neon_intQI_type_node, V8QImode); V4HI_type_node = build_vector_type_for_mode (neon_intHI_type_node, V4HImode); + V4HF_type_node = + build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode); V2SI_type_node = build_vector_type_for_mode (neon_intSI_type_node, V2SImode); V2SF_type_node = @@ -20346,14 +20359,13 @@ arm_init_neon_builtins (void) neon_builtin_datum *d = &neon_builtin_data[i]; const char* const modenames[] = { - "v8qi", "v4hi", "v2si", "v2sf", "di", + "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di", "v16qi", "v8hi", "v4si", "v4sf", "v2di", "ti", "ei", "oi" }; char namebuf[60]; tree ftype = NULL; int is_load = 0, is_store = 0; - gcc_assert (ARRAY_SIZE (modenames) == T_MAX); d->fcode = fcode; @@ -20549,8 +20561,9 @@ arm_init_neon_builtins (void) case NEON_REINTERP: { /* We iterate over 5 doubleword types, then 5 quadword - types. */ - int rhs = d->mode % 5; + types. V4HF is not a type used in reinterpret, so we translate + d->mode to the correct index in reinterp_ftype_dreg. */ + int rhs = (d->mode - ((d->mode > T_V4HF) ? 1 : 0)) % 5; switch (insn_data[d->code].operand[0].mode) { case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break; @@ -20567,7 +20580,36 @@ arm_init_neon_builtins (void) } } break; - + case NEON_FLOAT_WIDEN: + { + tree eltype = NULL_TREE; + tree return_type = NULL_TREE; + switch (insn_data[d->code].operand[1].mode) + { + case V4HFmode: + eltype = V4HF_type_node; + return_type = V4SF_type_node; + break; + default: gcc_unreachable (); + } + ftype = build_function_type_list (return_type, eltype, NULL); + break; + } + case NEON_FLOAT_NARROW: + { + tree eltype = NULL_TREE; + tree return_type = NULL_TREE; + switch (insn_data[d->code].operand[1].mode) + { + case V4SFmode: + eltype = V4SF_type_node; + return_type = V4HF_type_node; + break; + default: gcc_unreachable (); + } + ftype = build_function_type_list (return_type, eltype, NULL); + break; + } default: gcc_unreachable (); } @@ -21564,6 +21606,8 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target) case NEON_DUP: case NEON_RINT: case NEON_SPLIT: + case NEON_FLOAT_WIDEN: + case NEON_FLOAT_NARROW: case NEON_REINTERP: return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); @@ -25190,7 +25234,7 @@ arm_vector_mode_supported_p (enum machine_mode mode) { /* Neon also supports V2SImode, etc. listed in the clause below. */ if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode - || mode == V16QImode || mode == V4SFmode || mode == V2DImode)) + || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode)) return true; if ((TARGET_NEON || TARGET_IWMMXT) @@ -26034,6 +26078,7 @@ static arm_mangle_map_entry arm_mangle_map[] = { { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" }, { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" }, { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" }, + { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" }, { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" }, { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" }, { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" }, diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index cc1774b..c47fdf6 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -1052,7 +1052,7 @@ extern int prefer_neon_for_64bits; /* Modes valid for Neon D registers. */ #define VALID_NEON_DREG_MODE(MODE) \ ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \ - || (MODE) == V2SFmode || (MODE) == DImode) + || (MODE) == V4HFmode || (MODE) == V2SFmode || (MODE) == DImode) /* Modes valid for Neon Q registers. */ #define VALID_NEON_QREG_MODE(MODE) \ diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 4d945ce..e23d03b 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -43,6 +43,7 @@ typedef __builtin_neon_hi int16x4_t __attribute__ ((__vector_size__ (8))); typedef __builtin_neon_si int32x2_t __attribute__ ((__vector_size__ (8))); typedef __builtin_neon_di int64x1_t; typedef __builtin_neon_sf float32x2_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_hf float16x4_t __attribute__ ((__vector_size__ (8))); typedef __builtin_neon_poly8 poly8x8_t __attribute__ ((__vector_size__ (8))); typedef __builtin_neon_poly16 poly16x4_t __attribute__ ((__vector_size__ (8))); typedef __builtin_neon_uqi uint8x8_t __attribute__ ((__vector_size__ (8))); @@ -6016,6 +6017,22 @@ vcvtq_u32_f32 (float32x4_t __a) return (uint32x4_t)__builtin_neon_vcvtv4sf (__a, 0); } +#if ((__ARM_FP & 0x2) != 0) +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vcvt_f16_f32 (float32x4_t __a) +{ + return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a); +} + +#endif +#if ((__ARM_FP & 0x2) != 0) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvt_f32_f16 (float16x4_t __a) +{ + return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a); +} + +#endif __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vcvt_n_s32_f32 (float32x2_t __a, const int __b) { diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 5bf6d31..92f1d7a 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -132,6 +132,8 @@ VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi), VAR2 (UNOP, vrev16, v8qi, v16qi), VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf), VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf), +VAR1 (FLOAT_WIDEN, vcvtv4sf, v4hf), +VAR1 (FLOAT_NARROW, vcvtv4hf, v4sf), VAR10 (SELECT, vbsl, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), VAR2 (RINT, vrintn, v2sf, v4sf), diff --git a/gcc/config/arm/neon-gen.ml b/gcc/config/arm/neon-gen.ml index a811e15..42b36c2 100644 --- a/gcc/config/arm/neon-gen.ml +++ b/gcc/config/arm/neon-gen.ml @@ -121,6 +121,7 @@ let rec signed_ctype = function | T_uint16 | T_int16 -> T_intHI | T_uint32 | T_int32 -> T_intSI | T_uint64 | T_int64 -> T_intDI + | T_float16 -> T_floatHF | T_float32 -> T_floatSF | T_poly8 -> T_intQI | T_poly16 -> T_intHI @@ -272,12 +273,11 @@ let infoword_value elttype features = with multiple modes (). *) let rec mode_suffix elttype shape = try - let mode = mode_of_elt elttype shape in - string_of_mode mode + mode_of_elt_str elttype shape with MixedMode (dst, src) -> - let dstmode = mode_of_elt dst shape - and srcmode = mode_of_elt src shape in - string_of_mode dstmode ^ string_of_mode srcmode + let dstmode = mode_of_elt_str dst shape + and srcmode = mode_of_elt_str src shape in + dstmode ^ srcmode let get_shuffle features = try @@ -291,19 +291,24 @@ let print_feature_test_start features = match List.find (fun feature -> match feature with Requires_feature _ -> true | Requires_arch _ -> true + | Requires_FP_bit _ -> true | _ -> false) features with - Requires_feature feature -> + Requires_feature feature -> Format.printf "#ifdef __ARM_FEATURE_%s@\n" feature | Requires_arch arch -> Format.printf "#if __ARM_ARCH >= %d@\n" arch + | Requires_FP_bit bit -> + Format.printf "#if ((__ARM_FP & 0x%X) != 0)@\n" + (1 lsl bit) | _ -> assert false with Not_found -> assert true let print_feature_test_end features = let feature = - List.exists (function Requires_feature x -> true - | Requires_arch x -> true + List.exists (function Requires_feature _ -> true + | Requires_arch _ -> true + | Requires_FP_bit _ -> true | _ -> false) features in if feature then Format.printf "#endif@\n" @@ -365,6 +370,7 @@ let deftypes () = "__builtin_neon_hi", "int", 16, 4; "__builtin_neon_si", "int", 32, 2; "__builtin_neon_di", "int", 64, 1; + "__builtin_neon_hf", "float", 16, 4; "__builtin_neon_sf", "float", 32, 2; "__builtin_neon_poly8", "poly", 8, 8; "__builtin_neon_poly16", "poly", 16, 4; diff --git a/gcc/config/arm/neon-testgen.ml b/gcc/config/arm/neon-testgen.ml index d855c29..543318b 100644 --- a/gcc/config/arm/neon-testgen.ml +++ b/gcc/config/arm/neon-testgen.ml @@ -163,10 +163,12 @@ let effective_target features = match List.find (fun feature -> match feature with Requires_feature _ -> true | Requires_arch _ -> true + | Requires_FP_bit 1 -> true | _ -> false) features with Requires_feature "FMA" -> "arm_neonv2" | Requires_arch 8 -> "arm_v8_neon" + | Requires_FP_bit 1 -> "arm_neon_fp16" | _ -> assert false with Not_found -> "arm_neon" diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index d7c2cb3..33a7868 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -3281,6 +3281,24 @@ (const_string "neon_fp_vadd_qqq_vabs_qq")))] ) +(define_insn "neon_vcvtv4sfv4hf" + [(set (match_operand:V4SF 0 "s_register_operand" "=w") + (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")] + UNSPEC_VCVT))] + "TARGET_NEON && TARGET_FP16" + "vcvt.f32.f16\t%q0, %P1" + [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")] +) + +(define_insn "neon_vcvtv4hfv4sf" + [(set (match_operand:V4HF 0 "s_register_operand" "=w") + (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")] + UNSPEC_VCVT))] + "TARGET_NEON && TARGET_FP16" + "vcvt.f16.f32\t%P0, %q1" + [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")] +) + (define_insn "neon_vcvt_n" [(set (match_operand: 0 "s_register_operand" "=w") (unspec: [(match_operand:VCVTF 1 "s_register_operand" "w") diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml index 34090c9..6a0e1a7 100644 --- a/gcc/config/arm/neon.ml +++ b/gcc/config/arm/neon.ml @@ -21,7 +21,7 @@ . *) (* Shorthand types for vector elements. *) -type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16 +type elts = S8 | S16 | S32 | S64 | F16 | F32 | U8 | U16 | U32 | U64 | P8 | P16 | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts | Cast of elts * elts | NoElts @@ -37,6 +37,7 @@ type vectype = T_int8x8 | T_int8x16 | T_uint16x4 | T_uint16x8 | T_uint32x2 | T_uint32x4 | T_uint64x1 | T_uint64x2 + | T_float16x4 | T_float32x2 | T_float32x4 | T_poly8x8 | T_poly8x16 | T_poly16x4 | T_poly16x8 @@ -46,11 +47,13 @@ type vectype = T_int8x8 | T_int8x16 | T_uint8 | T_uint16 | T_uint32 | T_uint64 | T_poly8 | T_poly16 - | T_float32 | T_arrayof of int * vectype + | T_float16 | T_float32 + | T_arrayof of int * vectype | T_ptrto of vectype | T_const of vectype | T_void | T_intQI | T_intHI | T_intSI - | T_intDI | T_floatSF + | T_intDI | T_floatHF + | T_floatSF (* The meanings of the following are: TImode : "Tetra", two registers (four words). @@ -93,7 +96,7 @@ type arity = Arity0 of vectype | Arity4 of vectype * vectype * vectype * vectype * vectype type vecmode = V8QI | V4HI | V2SI | V2SF | DI - | V16QI | V8HI | V4SI | V4SF | V2DI + | V16QI | V8HI | V4SI | V4SF | V4HF | V2DI | QI | HI | SI | SF type opcode = @@ -208,6 +211,12 @@ type opcode = (* Reinterpret casts. *) | Vreinterp +let string_of_mode = function + V8QI -> "v8qi" | V4HI -> "v4hi" | V4HF -> "v4hf" |V2SI -> "v2si" + | V2SF -> "v2sf" | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" + | V4SI -> "v4si" | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" + | HI -> "hi" | SI -> "si" | SF -> "sf" + let rev_elems revsize elsize nelts _ = let mask = (revsize / elsize) - 1 in let arr = Array.init nelts @@ -284,13 +293,17 @@ type features = | Fixed_core_reg (* Mark that the intrinsic requires __ARM_FEATURE_string to be defined. *) | Requires_feature of string + (* Mark that the intrinsic requires a particular architecture version. *) | Requires_arch of int + (* Mark that the intrinsic requires a particular bit in __ARM_FP to + be set. *) + | Requires_FP_bit of int exception MixedMode of elts * elts let rec elt_width = function S8 | U8 | P8 | I8 | B8 -> 8 - | S16 | U16 | P16 | I16 | B16 -> 16 + | S16 | U16 | P16 | I16 | B16 | F16 -> 16 | S32 | F32 | U32 | I32 | B32 -> 32 | S64 | U64 | I64 | B64 -> 64 | Conv (a, b) -> @@ -303,7 +316,7 @@ let rec elt_class = function S8 | S16 | S32 | S64 -> Signed | U8 | U16 | U32 | U64 -> Unsigned | P8 | P16 -> Poly - | F32 -> Float + | F16 | F32 -> Float | I8 | I16 | I32 | I64 -> Int | B8 | B16 | B32 | B64 -> Bits | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b) @@ -315,6 +328,7 @@ let elt_of_class_width c w = | Signed, 16 -> S16 | Signed, 32 -> S32 | Signed, 64 -> S64 + | Float, 16 -> F16 | Float, 32 -> F32 | Unsigned, 8 -> U8 | Unsigned, 16 -> U16 @@ -330,7 +344,7 @@ let elt_of_class_width c w = | Bits, 16 -> B16 | Bits, 32 -> B32 | Bits, 64 -> B64 - | _ -> failwith "Bad element type" + | _ -> failwith ("Bad element type" ^ (string_of_int w)) (* Return unsigned integer element the same width as argument. *) let unsigned_of_elt elt = @@ -407,6 +421,14 @@ let rec mode_of_elt elt shape = | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops))) | _ -> failwith "invalid shape" +let mode_of_elt_str elt shape = + match elt with + Cast (F16, F32) -> "v4hfv4sf" + | Cast (F32, F16) -> "v4sfv4hf" + | _ -> + let mode = mode_of_elt elt shape in + string_of_mode mode + (* Modify an element type dependent on the shape of the instruction and the operand number. *) @@ -454,10 +476,11 @@ let type_for_elt shape elt no = | U16 -> T_uint16x4 | U32 -> T_uint32x2 | U64 -> T_uint64x1 + | F16 -> T_float16x4 | F32 -> T_float32x2 | P8 -> T_poly8x8 | P16 -> T_poly16x4 - | _ -> failwith "Bad elt type" + | _ -> failwith "Bad elt type for Dreg" end | Qreg -> begin match elt with @@ -472,7 +495,7 @@ let type_for_elt shape elt no = | F32 -> T_float32x4 | P8 -> T_poly8x16 | P16 -> T_poly16x8 - | _ -> failwith "Bad elt type" + | _ -> failwith "Bad elt type for Qreg" end | Corereg -> begin match elt with @@ -487,7 +510,7 @@ let type_for_elt shape elt no = | P8 -> T_poly8 | P16 -> T_poly16 | F32 -> T_float32 - | _ -> failwith "Bad elt type" + | _ -> failwith "Bad elt type for Corereg" end | Immed -> T_immediate (0, 0) @@ -506,7 +529,7 @@ let type_for_elt shape elt no = let vectype_size = function T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1 | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1 - | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64 + | T_float32x2 | T_poly8x8 | T_poly16x4 | T_float16x4 -> 64 | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2 | T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2 | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128 @@ -621,6 +644,11 @@ let shift_right_acc shape elt = const_shift (fun imm -> T_immediate (1, imm)) ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt +let vcvt_sh shape elt = + match elt with + Cast (el1, el2) -> let vt1,vt2 = type_for_elt shape el1, type_for_elt shape el2 in Arity1 (vt1 0, vt2 1), elt + | _ -> failwith "Error in vcvt_sh" + (* Use for immediate right-shifts when the operation doesn't care about signedness. *) @@ -1217,6 +1245,12 @@ let ops = [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1, [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; + Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1], + Use_operands [| Dreg; Qreg; |], "vcvt", vcvt_sh, + [Cast (F16, F32)]; + Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1], + Use_operands [| Qreg; Dreg; |], "vcvt", vcvt_sh, + [Cast (F32, F16)]; Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2, [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2, @@ -1782,7 +1816,7 @@ let rec string_of_elt = function | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64" | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64" | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64" - | F32 -> "f32" | P8 -> "p8" | P16 -> "p16" + | F32 -> "f32" | P8 -> "p8" | P16 -> "p16" | F16 -> "f16" | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b | NoElts -> failwith "No elts" @@ -1809,6 +1843,7 @@ let string_of_vectype vt = | T_uint32x4 -> affix "uint32x4" | T_uint64x1 -> affix "uint64x1" | T_uint64x2 -> affix "uint64x2" + | T_float16x4 -> affix "float16x4" | T_float32x2 -> affix "float32x2" | T_float32x4 -> affix "float32x4" | T_poly8x8 -> affix "poly8x8" @@ -1825,6 +1860,7 @@ let string_of_vectype vt = | T_uint64 -> affix "uint64" | T_poly8 -> affix "poly8" | T_poly16 -> affix "poly16" + | T_float16 -> affix "float16" | T_float32 -> affix "float32" | T_immediate _ -> "const int" | T_void -> "void" @@ -1832,6 +1868,7 @@ let string_of_vectype vt = | T_intHI -> "__builtin_neon_hi" | T_intSI -> "__builtin_neon_si" | T_intDI -> "__builtin_neon_di" + | T_floatHF -> "__builtin_neon_hf" | T_floatSF -> "__builtin_neon_sf" | T_arrayof (num, base) -> let basename = name (fun x -> x) base in @@ -1852,12 +1889,6 @@ let string_of_inttype = function | B_CImode -> "__builtin_neon_ci" | B_XImode -> "__builtin_neon_xi" -let string_of_mode = function - V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf" - | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si" - | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si" - | SF -> "sf" - (* Use uppercase chars for letters which form part of the intrinsic name, but should be omitted from the builtin name (the info is passed in an extra argument, instead). *)