This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][AArch64] Implement TARGET_SCHED_MACRO_FUSION_PAIR_P



On 13/11/14 07:24, Andrew Pinski wrote:
On Tue, Nov 11, 2014 at 3:55 AM, Kyrill Tkachov <kyrylo.tkachov@arm.com> wrote:
Hi all,

This is the aarch64 implementation of the macro fusion hook, used to fuse
mov+movk instructions together.

A new field is declared in the tuning struct and as we add more fuseable ops
in the future we will fill in more bits in the fuseable_ops field.

Bootstrapped and tested on aarch64-none-linux-gnu.

Ok for trunk?
I have a few comments about this patch as I have a patch which depends
on this for ThunderX.

Each new function should have a comment before it saying what the function does.
The checks inside aarch_macro_fusion_pair_p for single_set and
any_condjump_p seems a little too restrictive to add more fusion
without major changes to the code.
In the fusion case I am adding is about fusing any single cycle
arithmetic instruction (on ThunderX) that produces flags and the
branch that consumes it.

Hi Andrew,

Thanks for the feedback, here's an updated patch that should be a bit more robust.

How does this look?

Kyrill

2014-11-13  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

    * config/aarch64/aarch64.c: Include tm-constrs.h
    (AARCH64_FUSE_ADRP_ADD): Define.
    (cortexa57_tunings): Add AARCH64_FUSE_ADRP_ADD to fuseable_ops.
    (cortexa53_tunings): Likewise.
    (aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_ADRP_ADD.

Thanks,
Andrew

2014-11-11  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * config/aarch64/aarch64-protos.h (struct tune_params): Add
     fuseable_ops field.
     * config/aarch64/aarch64.c (generic_tunings): Specify fuseable_ops.
     (cortexa53_tunings): Likewise.
     (cortexa57_tunings): Likewise.
     (thunderx_tunings): Likewise.
     (aarch64_macro_fusion_p): New function.
     (aarch_macro_fusion_pair_p): Likewise.
     (TARGET_SCHED_MACRO_FUSION_P): Define.
     (TARGET_SCHED_MACRO_FUSION_PAIR_P): Likewise.
     (AARCH64_FUSE_MOV_MOVK): Likewise.
     (AARCH64_FUSE_NOTHING): Likewise.
commit 94f8c07830b50e612ea1378fab0ed8363a418826
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Tue Oct 21 10:36:48 2014 +0100

    [AArch64] Implement TARGET_MACRO_FUSION

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 470b9eb..9e0ff8c 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -170,6 +170,7 @@ struct tune_params
   const struct cpu_vector_cost *const vec_costs;
   const int memmov_cost;
   const int issue_rate;
+  const unsigned int fuseable_ops;
 };
 
 HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0429d96..7121a05 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -302,6 +302,9 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 };
 
+#define AARCH64_FUSE_NOTHING	(0)
+#define AARCH64_FUSE_MOV_MOVK	(1 << 0)
+
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
 #endif
@@ -312,7 +315,8 @@ static const struct tune_params generic_tunings =
   &generic_regmove_cost,
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
-  NAMED_PARAM (issue_rate, 2)
+  NAMED_PARAM (issue_rate, 2),
+  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING)
 };
 
 static const struct tune_params cortexa53_tunings =
@@ -322,7 +326,8 @@ static const struct tune_params cortexa53_tunings =
   &cortexa53_regmove_cost,
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
-  NAMED_PARAM (issue_rate, 2)
+  NAMED_PARAM (issue_rate, 2),
+  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
 };
 
 static const struct tune_params cortexa57_tunings =
@@ -332,7 +337,8 @@ static const struct tune_params cortexa57_tunings =
   &cortexa57_regmove_cost,
   &cortexa57_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
-  NAMED_PARAM (issue_rate, 3)
+  NAMED_PARAM (issue_rate, 3),
+  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
 };
 
 static const struct tune_params thunderx_tunings =
@@ -342,7 +348,8 @@ static const struct tune_params thunderx_tunings =
   &thunderx_regmove_cost,
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 6),
-  NAMED_PARAM (issue_rate, 2)
+  NAMED_PARAM (issue_rate, 2),
+  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING)
 };
 
 /* A processor implementing AArch64.  */
@@ -9968,6 +9975,58 @@ aarch64_use_by_pieces_infrastructure_p (unsigned int size,
   return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
 }
 
+/* Implement TARGET_SCHED_MACRO_FUSION_P.  Return true if target supports
+   instruction fusion of some sort.  */
+
+static bool
+aarch64_macro_fusion_p (void)
+{
+  return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING;
+}
+
+
+/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P.  Return true if PREV and CURR
+   should be kept together during scheduling.  */
+
+static bool
+aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
+{
+  rtx set_dest;
+  rtx prev_set = single_set (prev);
+  rtx curr_set = single_set (curr);
+  /* prev and curr are simple SET insns i.e. no flag setting or branching.  */
+  bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
+
+  if (!aarch64_macro_fusion_p ())
+    return false;
+
+  if (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK)
+    {
+      /* We are trying to match:
+         prev (mov)  == (set (reg r0) (const_int imm16))
+         curr (movk) == (set (zero_extract (reg r0)
+                                           (const_int 16)
+                                           (const_int 16))
+                             (const_int imm16_1))  */
+
+      set_dest = SET_DEST (curr_set);
+
+      if (simple_sets_p && GET_CODE (set_dest) == ZERO_EXTRACT
+          && CONST_INT_P (SET_SRC (curr_set))
+          && CONST_INT_P (SET_SRC (prev_set))
+          && CONST_INT_P (XEXP (set_dest, 2))
+          && INTVAL (XEXP (set_dest, 2)) == 16
+          && REG_P (XEXP (set_dest, 0))
+          && REG_P (SET_DEST (prev_set))
+          && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
+        {
+          return true;
+        }
+    }
+
+  return false;
+}
+
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST aarch64_address_cost
 
@@ -10224,6 +10283,12 @@ aarch64_use_by_pieces_infrastructure_p (unsigned int size,
 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
   aarch64_use_by_pieces_infrastructure_p
 
+#undef TARGET_SCHED_MACRO_FUSION_P
+#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
+
+#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
+#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-aarch64.h"

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]