This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH][AArch64][2/5] Implement adrp+add fusion
- From: Kyrill Tkachov <kyrylo dot tkachov at arm dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: Marcus Shawcroft <marcus dot shawcroft at arm dot com>, Richard Earnshaw <Richard dot Earnshaw at arm dot com>
- Date: Tue, 18 Nov 2014 10:33:44 +0000
- Subject: [PATCH][AArch64][2/5] Implement adrp+add fusion
- Authentication-results: sourceware.org; auth=none
Hi all,
This patch is just rebased on top of the changes from the previous patch
in the series.
Otherwise it's the same as
https://gcc.gnu.org/ml/gcc-patches/2014-11/msg01263.html with some style
cleanup
There can be cases where we miss fusion of adrd+add because although
they are generated together (in aarch64_load_symref_appropriately),
combine can sometimes combine the losym part with the instruction after
it and we end up with an instruction stream where the is an insn between
the two, preventing the fusion in sched1.
We still catch enough cases to make this approach worthwhile and the
above-mentioned exceptions can be mitigated in the future (for example,
by somehow delaying the generation of the adrp,add RTL after combine but
before sched1)
Tested and bootstrapped on aarch64-none-linux-gnu.
Ok for trunk?
2014-11-18 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/aarch64/aarch64.c: Include tm-constrs.h
(AARCH64_FUSE_ADRP_ADD): Define.
(cortexa57_tunings): Add AARCH64_FUSE_ADRP_ADD to fuseable_ops.
(cortexa53_tunings): Likewise.
(aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_ADRP_ADD.
commit 248ec70cfac6cb552a427b4336a3340bb25a5e53
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Thu Nov 6 12:05:26 2014 +0000
[AArch64] Fuse ADRP+ADD
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 502ba6d..03ae7c4 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -77,6 +77,7 @@
#include "dumpfile.h"
#include "builtins.h"
#include "rtl-iter.h"
+#include "tm-constrs.h"
/* Defined for convenience. */
#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
@@ -304,6 +305,7 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
#define AARCH64_FUSE_NOTHING (0)
#define AARCH64_FUSE_MOV_MOVK (1 << 0)
+#define AARCH64_FUSE_ADRP_ADD (1 << 1)
#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
__extension__
@@ -327,7 +329,7 @@ static const struct tune_params cortexa53_tunings =
&generic_vector_cost,
NAMED_PARAM (memmov_cost, 4),
NAMED_PARAM (issue_rate, 2),
- NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
+ NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD))
};
static const struct tune_params cortexa57_tunings =
@@ -338,7 +340,7 @@ static const struct tune_params cortexa57_tunings =
&cortexa57_vector_cost,
NAMED_PARAM (memmov_cost, 4),
NAMED_PARAM (issue_rate, 3),
- NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
+ NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD))
};
static const struct tune_params thunderx_tunings =
@@ -10037,6 +10039,32 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
}
}
+ if (simple_sets_p
+ && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_ADD))
+ {
+
+ /* We're trying to match:
+ prev (adrp) == (set (reg r1)
+ (high (symbol_ref ("SYM"))))
+ curr (add) == (set (reg r0)
+ (lo_sum (reg r1)
+ (symbol_ref ("SYM"))))
+ Note that r0 need not necessarily be the same as r1, especially
+ during pre-regalloc scheduling. */
+
+ if (satisfies_constraint_Ush (SET_SRC (prev_set))
+ && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
+ {
+ if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
+ && REG_P (XEXP (SET_SRC (curr_set), 0))
+ && REGNO (XEXP (SET_SRC (curr_set), 0))
+ == REGNO (SET_DEST (prev_set))
+ && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
+ XEXP (SET_SRC (curr_set), 1)))
+ return true;
+ }
+ }
+
return false;
}