This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH][AArch64] Add branch-cost to cpu tuning information.
- From: Matthew Wahab <matthew dot wahab at arm dot com>
- To: gcc-patches <gcc-patches at gcc dot gnu dot org>
- Date: Tue, 21 Apr 2015 15:00:05 +0100
- Subject: [PATCH][AArch64] Add branch-cost to cpu tuning information.
- Authentication-results: sourceware.org; auth=none
The AArch64 backend sets BRANCH_COST to be the constant value 2 for all cpus,
meaning that the compiler thinks that branches cost the same across all cpus.
This patch reworks the handling of branch costs to allow per-cpu values to be
set. The actual value of the branch-costs is unchanged as the correct values for
will need to be decided for each core.
Tested aarch64-none-linux-gnu with gcc-check.
Ok for trunk?
Matthew
2015-05-21 Matthew Wahab <matthew.wahab@arm.com>
* gcc/config/aarch64-protos.h (struct cpu_branch_cost): New.
(tune_params): Add field branch_costs.
(aarch64_branch_cost): Declare.
* gcc/config/aarch64.c (generic_branch_cost): New.
(generic_tunings): Set field cpu_branch_cost to generic_branch_cost.
(cortexa53_tunings): Likewise.
(cortexa57_tunings): Likewise.
(thunderx_tunings): Likewise.
(xgene1_tunings): Likewise.
(aarch64_branch_cost): Define.
* gcc/config/aarch64/aarch64.h (BRANCH_COST): Redefine.
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 8676c5c..77b01fa 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -162,12 +162,20 @@ struct cpu_vector_cost
const int cond_not_taken_branch_cost; /* Cost of not taken branch. */
};
+/* Branch costs. */
+struct cpu_branch_cost
+{
+ const int predictable; /* Predictable branch or optimizing for size. */
+ const int unpredictable; /* Unpredictable branch or optimizing for speed. */
+};
+
struct tune_params
{
const struct cpu_cost_table *const insn_extra_cost;
const struct cpu_addrcost_table *const addr_cost;
const struct cpu_regmove_cost *const regmove_cost;
const struct cpu_vector_cost *const vec_costs;
+ const struct cpu_branch_cost *const branch_costs;
const int memmov_cost;
const int issue_rate;
const unsigned int fuseable_ops;
@@ -259,6 +267,8 @@ void aarch64_print_operand (FILE *, rtx, char);
void aarch64_print_operand_address (FILE *, rtx);
void aarch64_emit_call_insn (rtx);
+int aarch64_branch_cost (bool, bool);
+
/* Initialize builtins for SIMD intrinsics. */
void init_aarch64_simd_builtins (void);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 77a641e..a020316 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -339,12 +339,20 @@ static const struct cpu_vector_cost xgene1_vector_cost =
#define AARCH64_FUSE_ADRP_LDR (1 << 3)
#define AARCH64_FUSE_CMP_BRANCH (1 << 4)
+/* Generic costs for branch instructions. */
+static const struct cpu_branch_cost generic_branch_cost =
+{
+ 2, /* Predictable. */
+ 2 /* Unpredictable. */
+};
+
static const struct tune_params generic_tunings =
{
&cortexa57_extra_costs,
&generic_addrcost_table,
&generic_regmove_cost,
&generic_vector_cost,
+ &generic_branch_cost,
4, /* memmov_cost */
2, /* issue_rate */
AARCH64_FUSE_NOTHING, /* fuseable_ops */
@@ -362,6 +370,7 @@ static const struct tune_params cortexa53_tunings =
&generic_addrcost_table,
&cortexa53_regmove_cost,
&generic_vector_cost,
+ &generic_branch_cost,
4, /* memmov_cost */
2, /* issue_rate */
(AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -380,6 +389,7 @@ static const struct tune_params cortexa57_tunings =
&cortexa57_addrcost_table,
&cortexa57_regmove_cost,
&cortexa57_vector_cost,
+ &generic_branch_cost,
4, /* memmov_cost */
3, /* issue_rate */
(AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -398,6 +408,7 @@ static const struct tune_params thunderx_tunings =
&generic_addrcost_table,
&thunderx_regmove_cost,
&generic_vector_cost,
+ &generic_branch_cost,
6, /* memmov_cost */
2, /* issue_rate */
AARCH64_FUSE_CMP_BRANCH, /* fuseable_ops */
@@ -415,6 +426,7 @@ static const struct tune_params xgene1_tunings =
&xgene1_addrcost_table,
&xgene1_regmove_cost,
&xgene1_vector_cost,
+ &generic_branch_cost,
6, /* memmov_cost */
4, /* issue_rate */
AARCH64_FUSE_NOTHING, /* fuseable_ops */
@@ -5361,6 +5373,19 @@ aarch64_address_cost (rtx x,
return cost;
}
+int
+aarch64_branch_cost (bool speed_p, bool predictable_p)
+{
+ /* When optimizing for speed, use the cost of unpredictable branches. */
+ const struct cpu_branch_cost *branch_costs =
+ aarch64_tune_params->branch_costs;
+
+ if (!speed_p || predictable_p)
+ return branch_costs->predictable;
+ else
+ return branch_costs->unpredictable;
+}
+
/* Return true if the RTX X in mode MODE is a zero or sign extract
usable in an ADD or SUB (extended register) instruction. */
static bool
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index bf59e40..93a32f5 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -823,7 +823,8 @@ do { \
#define TRAMPOLINE_SECTION text_section
/* To start with. */
-#define BRANCH_COST(SPEED_P, PREDICTABLE_P) 2
+#define BRANCH_COST(SPEED_P, PREDICTABLE_P) \
+ (aarch64_branch_cost (SPEED_P, PREDICTABLE_P))
/* Assembly output. */