This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 1/3][AArch64] Add more choices for the reciprocal square root approximation


On Fri, May 27, 2016 at 05:57:23PM -0500, Evandro Menezes wrote:
> From 86d7690632d03ec85fd69bfaef8e89c0542518ad Mon Sep 17 00:00:00 2001
> From: Evandro Menezes <e.menezes@samsung.com>
> Date: Thu, 3 Mar 2016 18:13:46 -0600
> Subject: [PATCH 1/3] [AArch64] Add more choices for the reciprocal square root
>  approximation
> 
> Allow a target to prefer such operation depending on the operation mode.
> 
> 2016-03-03  Evandro Menezes  <e.menezes@samsung.com>
> 
> gcc/
> 	* config/aarch64/aarch64-protos.h
> 	(AARCH64_APPROX_MODE): New macro.
> 	(AARCH64_APPROX_{NONE,ALL}): Likewise.
> 	(cpu_approx_modes): New structure.
> 	(tune_params): New member "approx_modes".
> 	* config/aarch64/aarch64-tuning-flags.def
> 	(AARCH64_EXTRA_TUNE_APPROX_RSQRT): Remove macro.
> 	* config/aarch64/aarch64.c
> 	({generic,exynosm1,xgene1}_approx_modes): New core
> 	"cpu_approx_modes" structures.
> 	(generic_tunings): New member "approx_modes".
> 	(cortexa35_tunings): Likewise.
> 	(cortexa53_tunings): Likewise.
> 	(cortexa57_tunings): Likewise.
> 	(cortexa72_tunings): Likewise.
> 	(exynosm1_tunings): Likewise.
> 	(thunderx_tunings): Likewise.
> 	(xgene1_tunings): Likewise.
> 	(use_rsqrt_p): New argument for the mode and use new member from
> 	"tune_params".
> 	(aarch64_builtin_reciprocal): Devise mode from builtin.
> 	(aarch64_optab_supported_p): New argument for the mode.
> 	* doc/invoke.texi (-mlow-precision-recip-sqrt): Reword description.

We're almost there, just a couple of style comments left on this one
I think. Thanks for sticking with it so far.

> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index f22a31c..6156281 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -178,6 +178,23 @@ struct cpu_branch_cost
>    const int unpredictable;  /* Unpredictable branch or optimizing for speed.  */
>  };
>  
> +/* Control approximate alternatives to certain FP operators.  */
> +#define AARCH64_APPROX_MODE(MODE) \
> +  ((MIN_MODE_FLOAT <= (MODE) && (MODE) <= MAX_MODE_FLOAT) \
> +   ? (1 << ((MODE) - MIN_MODE_FLOAT)) \
> +   : (MIN_MODE_VECTOR_FLOAT <= (MODE) && (MODE) <= MAX_MODE_VECTOR_FLOAT) \
> +     ? (1 << ((MODE) - MIN_MODE_VECTOR_FLOAT \
> +	      + MAX_MODE_FLOAT - MIN_MODE_FLOAT + 1)) \
> +     : (0))
> +#define AARCH64_APPROX_NONE (0)
> +#define AARCH64_APPROX_ALL (-1)
> +
> +/* Allowed modes for approximations.  */
> +struct cpu_approx_modes
> +{
> +  const unsigned int recip_sqrt; /* Reciprocal square root.  */
> +};
> +
>  struct tune_params
>  {
>    const struct cpu_cost_table *insn_extra_cost;
> @@ -218,6 +235,8 @@ struct tune_params
>    } autoprefetcher_model;
>  
>    unsigned int extra_tuning_flags;
> +
> +  const struct cpu_approx_modes *approx_modes;

Sorry to be irritating, but would you mind putting this up beside the
other "struct" members of tune_params. So directly under the branch_costs
member?

>  };
>  
>  #define AARCH64_FUSION_PAIR(x, name) \


> diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
> index 7e45a0c..048c2a3 100644
> --- a/gcc/config/aarch64/aarch64-tuning-flags.def
> +++ b/gcc/config/aarch64/aarch64-tuning-flags.def
> @@ -29,5 +29,3 @@
>       AARCH64_TUNE_ to give an enum name. */
>  
>  AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS)
> -AARCH64_EXTRA_TUNING_OPTION ("approx_rsqrt", APPROX_RSQRT)
> -
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 9995494..e532cfc 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -38,6 +38,7 @@
>  #include "recog.h"
>  #include "diagnostic.h"
>  #include "insn-attr.h"
> +#include "insn-modes.h"

Do we need this include? My build test of just this patch suggests not (maybe
you need it later in the series?). This can probably be dropped.

>  #include "alias.h"
>  #include "fold-const.h"
>  #include "stor-layout.h"
> @@ -393,6 +394,24 @@ static const struct cpu_branch_cost cortexa57_branch_cost =
>    3   /* Unpredictable.  */
>  };
>  
> +/* Generic approximation modes.  */
> +static const cpu_approx_modes generic_approx_modes =
> +{
> +  AARCH64_APPROX_NONE	/* recip_sqrt  */
> +};
> +
> +/* Approximation modes for Exynos M1.  */
> +static const cpu_approx_modes exynosm1_approx_modes =
> +{
> +  AARCH64_APPROX_ALL	/* recip_sqrt  */
> +};
> +
> +/* Approximation modes for Xgene1.  */

This should be "X-Gene 1" looking at how Applied Micro style it in their
marketing materials.

> +static const cpu_approx_modes xgene1_approx_modes =
> +{
> +  AARCH64_APPROX_ALL	/* recip_sqrt  */
> +};
> +
>  static bool
> -use_rsqrt_p (void)
> +use_rsqrt_p (machine_mode mode)
>  {
>    return (!flag_trapping_math
>  	  && flag_unsafe_math_optimizations
> -	  && ((aarch64_tune_params.extra_tuning_flags
> -	       & AARCH64_EXTRA_TUNE_APPROX_RSQRT)
> +	  && ((aarch64_tune_params.approx_modes->recip_sqrt
> +	       & AARCH64_APPROX_MODE (mode))
>  	      || flag_mrecip_low_precision_sqrt));
>  }
>  
> @@ -7467,7 +7494,9 @@ use_rsqrt_p (void)
>  static tree
>  aarch64_builtin_reciprocal (tree fndecl)
>  {
> -  if (!use_rsqrt_p ())
> +  machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
> +
> +  if (!use_rsqrt_p (mode))
>      return NULL_TREE;
>    return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
>  }
> @@ -13889,13 +13918,13 @@ aarch64_promoted_type (const_tree t)
>  /* Implement the TARGET_OPTAB_SUPPORTED_P hook.  */
>  
>  static bool
> -aarch64_optab_supported_p (int op, machine_mode, machine_mode,
> +aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
>  			   optimization_type opt_type)
>  {
>    switch (op)
>      {
>      case rsqrt_optab:
> -      return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
> +      return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
>  
>      default:
>        return true;
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index f1ac257..4340b08 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -12939,7 +12939,7 @@ corresponding flag to the linker.
>  When calculating the reciprocal square root approximation,
>  uses one less step than otherwise, thus reducing latency and precision.
>  This is only relevant if @option{-ffast-math} enables the reciprocal square root
> -approximation, which in turn depends on the target processor.
> +approximation.

This hunk no longer applies on trunk over Wilco's changes. Can you check
whether you're happy with the wording that is on trunk now, and drop or
update this hunk as appropriate.

Thanks,
James


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]