This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[AArch64] Add precision choices for the reciprocal square root approximation


        Add precision choices for the reciprocal square root approximation

        Allow a target to prefer such operation depending on the FP
   precision.

        gcc/
            * config/aarch64/aarch64-protos.h
            (AARCH64_EXTRA_TUNE_APPROX_RSQRT): New macro.
            * config/aarch64/aarch64-tuning-flags.def
            (AARCH64_EXTRA_TUNE_APPROX_RSQRT_DF): New mask.
            (AARCH64_EXTRA_TUNE_APPROX_RSQRT_SF): Likewise.
            * config/aarch64/aarch64.c
            (use_rsqrt_p): New argument for the mode.
            (aarch64_builtin_reciprocal): Devise mode from builtin.
            (aarch64_optab_supported_p): New argument for the mode.

This patch allows a target to choose for which FP precision the reciprocal square root approximation is used.

For example, though this approximation is improves the performance noticeably for DF on A57, for SF, not so much, if at all.

Feedback appreciated.

Thank you,

--
Evandro Menezes

>From 95581aefcf324233c3603f4d8232ee18c5836f8a Mon Sep 17 00:00:00 2001
From: Evandro Menezes <e.menezes@samsung.com>
Date: Thu, 17 Mar 2016 17:00:03 -0500
Subject: [PATCH] Add precision choices for the reciprocal square root
 approximation

Allow a target to prefer such operation depending on the FP precision.

gcc/
	* config/aarch64/aarch64-protos.h
	(AARCH64_EXTRA_TUNE_APPROX_RSQRT): New macro.
	* config/aarch64/aarch64-tuning-flags.def
	(AARCH64_EXTRA_TUNE_APPROX_RSQRT_DF): New mask.
	(AARCH64_EXTRA_TUNE_APPROX_RSQRT_SF): Likewise.
	* config/aarch64/aarch64.c
	(use_rsqrt_p): New argument for the mode.
	(aarch64_builtin_reciprocal): Devise mode from builtin.
	(aarch64_optab_supported_p): New argument for the mode.
---
 gcc/config/aarch64/aarch64-protos.h         |  3 +++
 gcc/config/aarch64/aarch64-tuning-flags.def |  3 ++-
 gcc/config/aarch64/aarch64.c                | 23 +++++++++++++++--------
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index dced209..58e5d73 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -263,6 +263,9 @@ enum aarch64_extra_tuning_flags
 };
 #undef AARCH64_EXTRA_TUNING_OPTION
 
+#define AARCH64_EXTRA_TUNE_APPROX_RSQRT \
+  (AARCH64_EXTRA_TUNE_APPROX_RSQRT_DF | AARCH64_EXTRA_TUNE_APPROX_RSQRT_SF)
+
 extern struct tune_params aarch64_tune_params;
 
 HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
index 7e45a0c..57d9588 100644
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -29,5 +29,6 @@
      AARCH64_TUNE_ to give an enum name. */
 
 AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS)
-AARCH64_EXTRA_TUNING_OPTION ("approx_rsqrt", APPROX_RSQRT)
+AARCH64_EXTRA_TUNING_OPTION ("approx_rsqrt", APPROX_RSQRT_DF)
+AARCH64_EXTRA_TUNING_OPTION ("approx_rsqrtf", APPROX_RSQRT_SF)
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 12e498d..e651123 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7440,12 +7440,16 @@ aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
    to optimize 1.0/sqrt.  */
 
 static bool
-use_rsqrt_p (void)
+use_rsqrt_p (machine_mode mode)
 {
   return (!flag_trapping_math
 	  && flag_unsafe_math_optimizations
-	  && ((aarch64_tune_params.extra_tuning_flags
-	       & AARCH64_EXTRA_TUNE_APPROX_RSQRT)
+	  && ((GET_MODE_INNER (mode) == SFmode
+	       && (aarch64_tune_params.extra_tuning_flags
+		   & AARCH64_EXTRA_TUNE_APPROX_RSQRT_SF))
+	      || (GET_MODE_INNER (mode) == DFmode
+		  && (aarch64_tune_params.extra_tuning_flags
+		      & AARCH64_EXTRA_TUNE_APPROX_RSQRT_DF))
 	      || flag_mrecip_low_precision_sqrt));
 }
 
@@ -7455,9 +7459,12 @@ use_rsqrt_p (void)
 static tree
 aarch64_builtin_reciprocal (tree fndecl)
 {
-  if (!use_rsqrt_p ())
-    return NULL_TREE;
-  return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
+  machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
+
+  if (use_rsqrt_p (mode))
+    return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
+
+  return NULL_TREE;
 }
 
 typedef rtx (*rsqrte_type) (rtx, rtx);
@@ -13952,13 +13959,13 @@ aarch64_promoted_type (const_tree t)
 /* Implement the TARGET_OPTAB_SUPPORTED_P hook.  */
 
 static bool
-aarch64_optab_supported_p (int op, machine_mode, machine_mode,
+aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
 			   optimization_type opt_type)
 {
   switch (op)
     {
     case rsqrt_optab:
-      return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
+      return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
 
     default:
       return true;
-- 
1.9.1


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]