[Patch AArch64 13/17] Enable _Float16 for AArch64

Thu Nov 24 14:40:00 GMT 2016

On 11/11/16 15:40, James Greenhalgh wrote:
> 
>  Hi,
> 
> This patch adds the back-end wiring to get AArch64 support for
> the _Float16 type working.
> 
> Bootstrapped on AArch64 with no issues.
> 
> OK?
> 
> Thanks,
> James
> 
> ---
> 2016-11-09  James Greenhalgh  <james.greenhalgh@arm.com>
> 
> 	* config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Update
> 	__FLT_EVAL_METHOD__ and __FLT_EVAL_METHOD_C99__ when we switch
> 	architecture levels.
> 	* config/aarch64/aarch64.c (aarch64_promoted_type): Only promote
> 	the aarch64_fp16_type_node, not all HFmode types.
> 	(aarch64_libgcc_floating_mode_supported_p): Support HFmode.
> 	(aarch64_scalar_mode_supported_p): Likewise.
> 	(aarch64_excess_precision): New.
> 	(TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P): Define.
> 	(TARGET_SCALAR_MODE_SUPPORTED_P): Likewise.
> 	(TARGET_C_EXCESS_PRECISION): Likewise.
> 
> 2016-11-09  James Greenhalgh  <james.greenhalgh@arm.com>
> 
> 	* gcc.target/aarch64/_Float16_1.c: New.
> 	* gcc.target/aarch64/_Float16_2.c: Likewise.
> 	* gcc.target/aarch64/_Float16_3.c: Likewise.
> 
> 

> -  if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
> +
> +  if (TYPE_P (t) && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)

Hmm, SCALAR_FLOAT_TYPE_P is a bit more efficient that TYPE_P in that it
avoids an indirection.  It also avoids the second indirection for
TYPE_MAIN_VARIANT in all the cases where we don't have a real type.

So I think
   if (SCALAR_FLOAT_TYPE_P (t)
       && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)

would be preferable here.

OK with that change.

R.

> 0013-Patch-AArch64-13-17-Enable-_Float16-for-AArch64.patch
> 
> 
> diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
> index 422e322..320b912 100644
> --- a/gcc/config/aarch64/aarch64-c.c
> +++ b/gcc/config/aarch64/aarch64-c.c
> @@ -133,6 +133,16 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
>  
>    aarch64_def_or_undef (TARGET_CRYPTO, "__ARM_FEATURE_CRYPTO", pfile);
>    aarch64_def_or_undef (TARGET_SIMD_RDMA, "__ARM_FEATURE_QRDMX", pfile);
> +
> +  /* Not for ACLE, but required to keep "float.h" correct if we switch
> +     target between implementations that do or do not support ARMv8.2-A
> +     16-bit floating-point extensions.  */
> +  cpp_undef (pfile, "__FLT_EVAL_METHOD__");
> +  builtin_define_with_int_value ("__FLT_EVAL_METHOD__",
> +				 c_flt_eval_method (true));
> +  cpp_undef (pfile, "__FLT_EVAL_METHOD_C99__");
> +  builtin_define_with_int_value ("__FLT_EVAL_METHOD_C99__",
> +				 c_flt_eval_method (false));
>  }
>  
>  /* Implement TARGET_CPU_CPP_BUILTINS.  */
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index ec17af4..824b27c 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -14143,12 +14143,20 @@ aarch64_vec_fpconst_pow_of_2 (rtx x)
>    return firstval;
>  }
>  
> -/* Implement TARGET_PROMOTED_TYPE to promote __fp16 to float.  */
> +/* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
> +   to float.
> +
> +   __fp16 always promotes through this hook.
> +   _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
> +   through the generic excess precision logic rather than here.  */
> +
>  static tree
>  aarch64_promoted_type (const_tree t)
>  {
> -  if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
> +
> +  if (TYPE_P (t) && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
>      return float_type_node;
> +
>    return NULL_TREE;
>  }
>  
> @@ -14168,6 +14176,17 @@ aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
>      }
>  }
>  
> +/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
> +   if MODE is HFmode, and punt to the generic implementation otherwise.  */
> +
> +static bool
> +aarch64_libgcc_floating_mode_supported_p (machine_mode mode)
> +{
> +  return (mode == HFmode
> +	  ? true
> +	  : default_libgcc_floating_mode_supported_p (mode));
> +}
> +
>  /* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
>     if MODE is HFmode, and punt to the generic implementation otherwise.  */
>  
> @@ -14179,6 +14198,47 @@ aarch64_scalar_mode_supported_p (machine_mode mode)
>  	  : default_scalar_mode_supported_p (mode));
>  }
>  
> +/* Set the value of FLT_EVAL_METHOD.
> +   ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
> +
> +    0: evaluate all operations and constants, whose semantic type has at
> +       most the range and precision of type float, to the range and
> +       precision of float; evaluate all other operations and constants to
> +       the range and precision of the semantic type;
> +
> +    N, where _FloatN is a supported interchange floating type
> +       evaluate all operations and constants, whose semantic type has at
> +       most the range and precision of _FloatN type, to the range and
> +       precision of the _FloatN type; evaluate all other operations and
> +       constants to the range and precision of the semantic type;
> +
> +   If we have the ARMv8.2-A extensions then we support _Float16 in native
> +   precision, so we should set this to 16.  Otherwise, we support the type,
> +   but want to evaluate expressions in float precision, so set this to
> +   0.  */
> +
> +static enum flt_eval_method
> +aarch64_excess_precision (enum excess_precision_type type)
> +{
> +  switch (type)
> +    {
> +      case EXCESS_PRECISION_TYPE_FAST:
> +      case EXCESS_PRECISION_TYPE_STANDARD:
> +	/* We can calculate either in 16-bit range and precision or
> +	   32-bit range and precision.  Make that decision based on whether
> +	   we have native support for the ARMv8.2-A 16-bit floating-point
> +	   instructions or not.  */
> +	return (TARGET_FP_F16INST
> +		? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
> +		: FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
> +      case EXCESS_PRECISION_TYPE_IMPLICIT:
> +	return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
> +      default:
> +	gcc_unreachable ();
> +    }
> +  return FLT_EVAL_METHOD_UNPREDICTABLE;
> +}
> +
>  #undef TARGET_ADDRESS_COST
>  #define TARGET_ADDRESS_COST aarch64_address_cost
>  
> @@ -14257,6 +14317,9 @@ aarch64_scalar_mode_supported_p (machine_mode mode)
>  #undef TARGET_BUILTIN_RECIPROCAL
>  #define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
>  
> +#undef TARGET_C_EXCESS_PRECISION
> +#define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
> +
>  #undef  TARGET_EXPAND_BUILTIN
>  #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
>  
> @@ -14313,6 +14376,10 @@ aarch64_scalar_mode_supported_p (machine_mode mode)
>  #undef TARGET_LIBGCC_CMP_RETURN_MODE
>  #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
>  
> +#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
> +#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
> +aarch64_libgcc_floating_mode_supported_p
> +
>  #undef TARGET_MANGLE_TYPE
>  #define TARGET_MANGLE_TYPE aarch64_mangle_type
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/_Float16_1.c b/gcc/testsuite/gcc.target/aarch64/_Float16_1.c
> new file mode 100644
> index 0000000..320f154
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/_Float16_1.c
> @@ -0,0 +1,47 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=armv8.2-a+nofp16" } */
> +
> +#pragma GCC target ("arch=armv8.2-a+nofp16")
> +
> +_Float16
> +foo_v8 (_Float16 x, _Float16 y, unsigned int *eval)
> +{
> +  *eval = __FLT_EVAL_METHOD__;
> +  return x * x + y;
> +}
> +
> +__fp16
> +bar_v8 (__fp16 x, __fp16 y, unsigned int *eval)
> +{
> +  *eval = __FLT_EVAL_METHOD__;
> +  return x * x + y;
> +}
> +
> +#pragma GCC target ("arch=armv8.2-a+fp16")
> +
> +_Float16
> +foo_v82 (_Float16 x, _Float16 y, unsigned int *eval)
> +{
> +  *eval = __FLT_EVAL_METHOD__;
> +  return x * x + y;
> +}
> +
> +__fp16
> +bar_v82 (__fp16 x, __fp16 y, unsigned int *eval)
> +{
> +  *eval = __FLT_EVAL_METHOD__;
> +  return x * x + y;
> +}
> +
> +/* Test that we merge to FMA operations.  This indicates that we are not
> +   making extraneous conversions between modes.  */
> +
> +/* Three FMA operations in 32-bit precision, from foo_v8, bar_v8, bar_v82.  */
> +/* { dg-final { scan-assembler-times "fmadd\ts\[0-9\]\+" 3 } } */
> +
> +/* One FMA operation in 16-bit precision, from foo_v82.  */
> +/* { dg-final { scan-assembler-times "fmadd\th\[0-9\]\+" 1 } } */
> +
> +/* Test that we are resetting the __FLT_EVAL_METHOD__.  */
> +/* { dg-final { scan-assembler-times "mov\tw\[0-9\]\+, 16" 2 } } */
> +/* { dg-final { scan-assembler-times "str\twzr" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/_Float16_2.c b/gcc/testsuite/gcc.target/aarch64/_Float16_2.c
> new file mode 100644
> index 0000000..8b2aa1e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/_Float16_2.c
> @@ -0,0 +1,47 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=armv8.2-a+nofp16 -fpermitted-flt-eval-methods=c11" } */
> +
> +#pragma GCC target ("arch=armv8.2-a+nofp16")
> +
> +_Float16
> +foo_v8 (_Float16 x, _Float16 y, unsigned int *eval)
> +{
> +  *eval = __FLT_EVAL_METHOD__;
> +  return x * x + y;
> +}
> +
> +__fp16
> +bar_v8 (__fp16 x, __fp16 y, unsigned int *eval)
> +{
> +  *eval = __FLT_EVAL_METHOD__;
> +  return x * x + y;
> +}
> +
> +#pragma GCC target ("arch=armv8.2-a+fp16")
> +
> +_Float16
> +foo_v82 (_Float16 x, _Float16 y, unsigned int *eval)
> +{
> +  *eval = __FLT_EVAL_METHOD__;
> +  return x * x + y;
> +}
> +
> +__fp16
> +bar_v82 (__fp16 x, __fp16 y, unsigned int *eval)
> +{
> +  *eval = __FLT_EVAL_METHOD__;
> +  return x * x + y;
> +}
> +
> +/* Test that we merge to FMA operations.  This indicates that we are not
> +   making extraneous conversions between modes.  */
> +
> +/* Three FMA operations in 32-bit precision, from foo_v8, bar_v8, bar_v82.  */
> +/* { dg-final { scan-assembler-times "fmadd\ts\[0-9\]\+" 3 } } */
> +
> +/* One FMA operation in 16-bit precision, from foo_v82.  */
> +/* { dg-final { scan-assembler-times "fmadd\th\[0-9\]\+" 1 } } */
> +
> +/* Test that in -fpermitted-flt-eval-methods=c11 we don't set the
> +   __FLT_EVAL_METHOD__ to anything other than 0.  */
> +/* { dg-final { scan-assembler-times "str\twzr" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/_Float16_3.c b/gcc/testsuite/gcc.target/aarch64/_Float16_3.c
> new file mode 100644
> index 0000000..2d20250
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/_Float16_3.c
> @@ -0,0 +1,46 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=armv8.2-a+nofp16 -std=c11 -ffp-contract=fast" } */
> +
> +#pragma GCC target ("arch=armv8.2-a+nofp16")
> +
> +_Float16
> +foo_v8 (_Float16 x, _Float16 y, unsigned int *eval)
> +{
> +  *eval = __FLT_EVAL_METHOD__;
> +  return x * x + y;
> +}
> +
> +__fp16
> +bar_v8 (__fp16 x, __fp16 y, unsigned int *eval)
> +{
> +  *eval = __FLT_EVAL_METHOD__;
> +  return x * x + y;
> +}
> +
> +#pragma GCC target ("arch=armv8.2-a+fp16")
> +
> +_Float16
> +foo_v82 (_Float16 x, _Float16 y, unsigned int *eval)
> +{
> +  *eval = __FLT_EVAL_METHOD__;
> +  return x * x + y;
> +}
> +
> +__fp16
> +bar_v82 (__fp16 x, __fp16 y, unsigned int *eval)
> +{
> +  *eval = __FLT_EVAL_METHOD__;
> +  return x * x + y;
> +}
> +
> +/* Test that we merge to FMA operations.  This indicates that we are not
> +   making extraneous conversions between modes.  */
> +
> +/* Three FMA operations in 32-bit precision, from foo_v8, bar_v8, bar_v82.  */
> +/* { dg-final { scan-assembler-times "fmadd\ts\[0-9\]\+" 3 } } */
> +
> +/* One FMA operation in 16-bit precision, from foo_v82.  */
> +/* { dg-final { scan-assembler-times "fmadd\th\[0-9\]\+" 1 } } */
> +
> +/* Test that in C11 mode, we don't reset __FLT_EVAL_METHOD__.  */
> +/* { dg-final { scan-assembler-times "str\twzr" 4 } } */
>