[Patch AArch64 13/17] Enable _Float16 for AArch64
Richard Earnshaw (lists)
Richard.Earnshaw@arm.com
Thu Nov 24 14:40:00 GMT 2016
On 11/11/16 15:40, James Greenhalgh wrote:
>
> Hi,
>
> This patch adds the back-end wiring to get AArch64 support for
> the _Float16 type working.
>
> Bootstrapped on AArch64 with no issues.
>
> OK?
>
> Thanks,
> James
>
> ---
> 2016-11-09 James Greenhalgh <james.greenhalgh@arm.com>
>
> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Update
> __FLT_EVAL_METHOD__ and __FLT_EVAL_METHOD_C99__ when we switch
> architecture levels.
> * config/aarch64/aarch64.c (aarch64_promoted_type): Only promote
> the aarch64_fp16_type_node, not all HFmode types.
> (aarch64_libgcc_floating_mode_supported_p): Support HFmode.
> (aarch64_scalar_mode_supported_p): Likewise.
> (aarch64_excess_precision): New.
> (TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P): Define.
> (TARGET_SCALAR_MODE_SUPPORTED_P): Likewise.
> (TARGET_C_EXCESS_PRECISION): Likewise.
>
> 2016-11-09 James Greenhalgh <james.greenhalgh@arm.com>
>
> * gcc.target/aarch64/_Float16_1.c: New.
> * gcc.target/aarch64/_Float16_2.c: Likewise.
> * gcc.target/aarch64/_Float16_3.c: Likewise.
>
>
> - if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
> +
> + if (TYPE_P (t) && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
Hmm, SCALAR_FLOAT_TYPE_P is a bit more efficient that TYPE_P in that it
avoids an indirection. It also avoids the second indirection for
TYPE_MAIN_VARIANT in all the cases where we don't have a real type.
So I think
if (SCALAR_FLOAT_TYPE_P (t)
&& TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
would be preferable here.
OK with that change.
R.
> 0013-Patch-AArch64-13-17-Enable-_Float16-for-AArch64.patch
>
>
> diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
> index 422e322..320b912 100644
> --- a/gcc/config/aarch64/aarch64-c.c
> +++ b/gcc/config/aarch64/aarch64-c.c
> @@ -133,6 +133,16 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
>
> aarch64_def_or_undef (TARGET_CRYPTO, "__ARM_FEATURE_CRYPTO", pfile);
> aarch64_def_or_undef (TARGET_SIMD_RDMA, "__ARM_FEATURE_QRDMX", pfile);
> +
> + /* Not for ACLE, but required to keep "float.h" correct if we switch
> + target between implementations that do or do not support ARMv8.2-A
> + 16-bit floating-point extensions. */
> + cpp_undef (pfile, "__FLT_EVAL_METHOD__");
> + builtin_define_with_int_value ("__FLT_EVAL_METHOD__",
> + c_flt_eval_method (true));
> + cpp_undef (pfile, "__FLT_EVAL_METHOD_C99__");
> + builtin_define_with_int_value ("__FLT_EVAL_METHOD_C99__",
> + c_flt_eval_method (false));
> }
>
> /* Implement TARGET_CPU_CPP_BUILTINS. */
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index ec17af4..824b27c 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -14143,12 +14143,20 @@ aarch64_vec_fpconst_pow_of_2 (rtx x)
> return firstval;
> }
>
> -/* Implement TARGET_PROMOTED_TYPE to promote __fp16 to float. */
> +/* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
> + to float.
> +
> + __fp16 always promotes through this hook.
> + _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
> + through the generic excess precision logic rather than here. */
> +
> static tree
> aarch64_promoted_type (const_tree t)
> {
> - if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
> +
> + if (TYPE_P (t) && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
> return float_type_node;
> +
> return NULL_TREE;
> }
>
> @@ -14168,6 +14176,17 @@ aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
> }
> }
>
> +/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
> + if MODE is HFmode, and punt to the generic implementation otherwise. */
> +
> +static bool
> +aarch64_libgcc_floating_mode_supported_p (machine_mode mode)
> +{
> + return (mode == HFmode
> + ? true
> + : default_libgcc_floating_mode_supported_p (mode));
> +}
> +
> /* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
> if MODE is HFmode, and punt to the generic implementation otherwise. */
>
> @@ -14179,6 +14198,47 @@ aarch64_scalar_mode_supported_p (machine_mode mode)
> : default_scalar_mode_supported_p (mode));
> }
>
> +/* Set the value of FLT_EVAL_METHOD.
> + ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
> +
> + 0: evaluate all operations and constants, whose semantic type has at
> + most the range and precision of type float, to the range and
> + precision of float; evaluate all other operations and constants to
> + the range and precision of the semantic type;
> +
> + N, where _FloatN is a supported interchange floating type
> + evaluate all operations and constants, whose semantic type has at
> + most the range and precision of _FloatN type, to the range and
> + precision of the _FloatN type; evaluate all other operations and
> + constants to the range and precision of the semantic type;
> +
> + If we have the ARMv8.2-A extensions then we support _Float16 in native
> + precision, so we should set this to 16. Otherwise, we support the type,
> + but want to evaluate expressions in float precision, so set this to
> + 0. */
> +
> +static enum flt_eval_method
> +aarch64_excess_precision (enum excess_precision_type type)
> +{
> + switch (type)
> + {
> + case EXCESS_PRECISION_TYPE_FAST:
> + case EXCESS_PRECISION_TYPE_STANDARD:
> + /* We can calculate either in 16-bit range and precision or
> + 32-bit range and precision. Make that decision based on whether
> + we have native support for the ARMv8.2-A 16-bit floating-point
> + instructions or not. */
> + return (TARGET_FP_F16INST
> + ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
> + : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
> + case EXCESS_PRECISION_TYPE_IMPLICIT:
> + return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
> + default:
> + gcc_unreachable ();
> + }
> + return FLT_EVAL_METHOD_UNPREDICTABLE;
> +}
> +
> #undef TARGET_ADDRESS_COST
> #define TARGET_ADDRESS_COST aarch64_address_cost
>
> @@ -14257,6 +14317,9 @@ aarch64_scalar_mode_supported_p (machine_mode mode)
> #undef TARGET_BUILTIN_RECIPROCAL
> #define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
>
> +#undef TARGET_C_EXCESS_PRECISION
> +#define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
> +
> #undef TARGET_EXPAND_BUILTIN
> #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
>
> @@ -14313,6 +14376,10 @@ aarch64_scalar_mode_supported_p (machine_mode mode)
> #undef TARGET_LIBGCC_CMP_RETURN_MODE
> #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
>
> +#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
> +#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
> +aarch64_libgcc_floating_mode_supported_p
> +
> #undef TARGET_MANGLE_TYPE
> #define TARGET_MANGLE_TYPE aarch64_mangle_type
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/_Float16_1.c b/gcc/testsuite/gcc.target/aarch64/_Float16_1.c
> new file mode 100644
> index 0000000..320f154
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/_Float16_1.c
> @@ -0,0 +1,47 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=armv8.2-a+nofp16" } */
> +
> +#pragma GCC target ("arch=armv8.2-a+nofp16")
> +
> +_Float16
> +foo_v8 (_Float16 x, _Float16 y, unsigned int *eval)
> +{
> + *eval = __FLT_EVAL_METHOD__;
> + return x * x + y;
> +}
> +
> +__fp16
> +bar_v8 (__fp16 x, __fp16 y, unsigned int *eval)
> +{
> + *eval = __FLT_EVAL_METHOD__;
> + return x * x + y;
> +}
> +
> +#pragma GCC target ("arch=armv8.2-a+fp16")
> +
> +_Float16
> +foo_v82 (_Float16 x, _Float16 y, unsigned int *eval)
> +{
> + *eval = __FLT_EVAL_METHOD__;
> + return x * x + y;
> +}
> +
> +__fp16
> +bar_v82 (__fp16 x, __fp16 y, unsigned int *eval)
> +{
> + *eval = __FLT_EVAL_METHOD__;
> + return x * x + y;
> +}
> +
> +/* Test that we merge to FMA operations. This indicates that we are not
> + making extraneous conversions between modes. */
> +
> +/* Three FMA operations in 32-bit precision, from foo_v8, bar_v8, bar_v82. */
> +/* { dg-final { scan-assembler-times "fmadd\ts\[0-9\]\+" 3 } } */
> +
> +/* One FMA operation in 16-bit precision, from foo_v82. */
> +/* { dg-final { scan-assembler-times "fmadd\th\[0-9\]\+" 1 } } */
> +
> +/* Test that we are resetting the __FLT_EVAL_METHOD__. */
> +/* { dg-final { scan-assembler-times "mov\tw\[0-9\]\+, 16" 2 } } */
> +/* { dg-final { scan-assembler-times "str\twzr" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/_Float16_2.c b/gcc/testsuite/gcc.target/aarch64/_Float16_2.c
> new file mode 100644
> index 0000000..8b2aa1e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/_Float16_2.c
> @@ -0,0 +1,47 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=armv8.2-a+nofp16 -fpermitted-flt-eval-methods=c11" } */
> +
> +#pragma GCC target ("arch=armv8.2-a+nofp16")
> +
> +_Float16
> +foo_v8 (_Float16 x, _Float16 y, unsigned int *eval)
> +{
> + *eval = __FLT_EVAL_METHOD__;
> + return x * x + y;
> +}
> +
> +__fp16
> +bar_v8 (__fp16 x, __fp16 y, unsigned int *eval)
> +{
> + *eval = __FLT_EVAL_METHOD__;
> + return x * x + y;
> +}
> +
> +#pragma GCC target ("arch=armv8.2-a+fp16")
> +
> +_Float16
> +foo_v82 (_Float16 x, _Float16 y, unsigned int *eval)
> +{
> + *eval = __FLT_EVAL_METHOD__;
> + return x * x + y;
> +}
> +
> +__fp16
> +bar_v82 (__fp16 x, __fp16 y, unsigned int *eval)
> +{
> + *eval = __FLT_EVAL_METHOD__;
> + return x * x + y;
> +}
> +
> +/* Test that we merge to FMA operations. This indicates that we are not
> + making extraneous conversions between modes. */
> +
> +/* Three FMA operations in 32-bit precision, from foo_v8, bar_v8, bar_v82. */
> +/* { dg-final { scan-assembler-times "fmadd\ts\[0-9\]\+" 3 } } */
> +
> +/* One FMA operation in 16-bit precision, from foo_v82. */
> +/* { dg-final { scan-assembler-times "fmadd\th\[0-9\]\+" 1 } } */
> +
> +/* Test that in -fpermitted-flt-eval-methods=c11 we don't set the
> + __FLT_EVAL_METHOD__ to anything other than 0. */
> +/* { dg-final { scan-assembler-times "str\twzr" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/_Float16_3.c b/gcc/testsuite/gcc.target/aarch64/_Float16_3.c
> new file mode 100644
> index 0000000..2d20250
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/_Float16_3.c
> @@ -0,0 +1,46 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=armv8.2-a+nofp16 -std=c11 -ffp-contract=fast" } */
> +
> +#pragma GCC target ("arch=armv8.2-a+nofp16")
> +
> +_Float16
> +foo_v8 (_Float16 x, _Float16 y, unsigned int *eval)
> +{
> + *eval = __FLT_EVAL_METHOD__;
> + return x * x + y;
> +}
> +
> +__fp16
> +bar_v8 (__fp16 x, __fp16 y, unsigned int *eval)
> +{
> + *eval = __FLT_EVAL_METHOD__;
> + return x * x + y;
> +}
> +
> +#pragma GCC target ("arch=armv8.2-a+fp16")
> +
> +_Float16
> +foo_v82 (_Float16 x, _Float16 y, unsigned int *eval)
> +{
> + *eval = __FLT_EVAL_METHOD__;
> + return x * x + y;
> +}
> +
> +__fp16
> +bar_v82 (__fp16 x, __fp16 y, unsigned int *eval)
> +{
> + *eval = __FLT_EVAL_METHOD__;
> + return x * x + y;
> +}
> +
> +/* Test that we merge to FMA operations. This indicates that we are not
> + making extraneous conversions between modes. */
> +
> +/* Three FMA operations in 32-bit precision, from foo_v8, bar_v8, bar_v82. */
> +/* { dg-final { scan-assembler-times "fmadd\ts\[0-9\]\+" 3 } } */
> +
> +/* One FMA operation in 16-bit precision, from foo_v82. */
> +/* { dg-final { scan-assembler-times "fmadd\th\[0-9\]\+" 1 } } */
> +
> +/* Test that in C11 mode, we don't reset __FLT_EVAL_METHOD__. */
> +/* { dg-final { scan-assembler-times "str\twzr" 4 } } */
>
More information about the Gcc-patches
mailing list