This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][AArch64][2/5] Implement adrp+add fusion


Hi all,

This patch is just rebased on top of the changes from the previous patch in the series. Otherwise it's the same as https://gcc.gnu.org/ml/gcc-patches/2014-11/msg01263.html with some style cleanup

There can be cases where we miss fusion of adrd+add because although they are generated together (in aarch64_load_symref_appropriately), combine can sometimes combine the losym part with the instruction after it and we end up with an instruction stream where the is an insn between the two, preventing the fusion in sched1. We still catch enough cases to make this approach worthwhile and the above-mentioned exceptions can be mitigated in the future (for example, by somehow delaying the generation of the adrp,add RTL after combine but before sched1)

Tested and bootstrapped on aarch64-none-linux-gnu.
Ok for trunk?

2014-11-18  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

    * config/aarch64/aarch64.c: Include tm-constrs.h
    (AARCH64_FUSE_ADRP_ADD): Define.
    (cortexa57_tunings): Add AARCH64_FUSE_ADRP_ADD to fuseable_ops.
    (cortexa53_tunings): Likewise.
    (aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_ADRP_ADD.
commit 248ec70cfac6cb552a427b4336a3340bb25a5e53
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Thu Nov 6 12:05:26 2014 +0000

    [AArch64] Fuse ADRP+ADD

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 502ba6d..03ae7c4 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -77,6 +77,7 @@
 #include "dumpfile.h"
 #include "builtins.h"
 #include "rtl-iter.h"
+#include "tm-constrs.h"
 
 /* Defined for convenience.  */
 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
@@ -304,6 +305,7 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
 
 #define AARCH64_FUSE_NOTHING	(0)
 #define AARCH64_FUSE_MOV_MOVK	(1 << 0)
+#define AARCH64_FUSE_ADRP_ADD	(1 << 1)
 
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
@@ -327,7 +329,7 @@ static const struct tune_params cortexa53_tunings =
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
   NAMED_PARAM (issue_rate, 2),
-  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
+  NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD))
 };
 
 static const struct tune_params cortexa57_tunings =
@@ -338,7 +340,7 @@ static const struct tune_params cortexa57_tunings =
   &cortexa57_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
   NAMED_PARAM (issue_rate, 3),
-  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
+  NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD))
 };
 
 static const struct tune_params thunderx_tunings =
@@ -10037,6 +10039,32 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
         }
     }
 
+  if (simple_sets_p
+      && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_ADD))
+    {
+
+      /*  We're trying to match:
+          prev (adrp) == (set (reg r1)
+                              (high (symbol_ref ("SYM"))))
+          curr (add) == (set (reg r0)
+                             (lo_sum (reg r1)
+                                     (symbol_ref ("SYM"))))
+          Note that r0 need not necessarily be the same as r1, especially
+          during pre-regalloc scheduling.  */
+
+      if (satisfies_constraint_Ush (SET_SRC (prev_set))
+          && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
+        {
+          if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
+              && REG_P (XEXP (SET_SRC (curr_set), 0))
+              && REGNO (XEXP (SET_SRC (curr_set), 0))
+                 == REGNO (SET_DEST (prev_set))
+              && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
+                              XEXP (SET_SRC (curr_set), 1)))
+            return true;
+        }
+    }
+
   return false;
 }
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]