This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH][ARM] Improve max_cond_insns setting for Cortex cores
- From: Wilco Dijkstra <Wilco dot Dijkstra at arm dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: Kyrylo Tkachov <Kyrylo dot Tkachov at arm dot com>, Richard Earnshaw <Richard dot Earnshaw at arm dot com>, Richard Sandiford <Richard dot Sandiford at arm dot com>
- Date: Wed, 6 Nov 2019 14:00:48 +0000
- Subject: [PATCH][ARM] Improve max_cond_insns setting for Cortex cores
- Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=arm.com; dmarc=pass action=none header.from=arm.com; dkim=pass header.d=arm.com; arc=none
- Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=q1/ZkzOilqdikMhm0cajmi8nCVp3wjRHSTXncr6E9/o=; b=NL/yZfiPvGT29pyVZN1cML7WNe1hP5HxCWGHVWs8MWaqkhUnwixugoSwKxFPqxOpP0MnpAh36gbdN6p6r7AAwPom4IqEjDak1XDRB3WnhRf6llH7ZUp4PY8OVFEWDEWUuRYQr0b70+O2R2kIM1R+vtdCiPnw2rhUCY+f785Bn2np/+W1xD9MsX+Pp0D9WsNuMry/WW1eLNlAd3Gmsq85wzHO7DdutfLdrCNNA/Mdvp1AZpNzlcp93DMrgRNWM3OzhYGvRtRZlvviBJZ13cdaADVpkzBgKCAMgiO8W2gvt/W2FN8XCwI+QxKa10FanfXSorwF3oLtl8U70YUHM+NdKQ==
- Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=JxFHjtXdAF6dqGMNYQF5McspBg+tCj8JbRfpepiiP+2SxWcA5+/33EVu98I1LxDG9/hCR7wgKKnYYl/WfgSrJhdWCuOh0g2Sy3sB/ZdpUFuw8UZvimJGtlnajexB3+rwNRwG493xjeu1QcjfCyir7xGjCOCV1E3T3Yfzmo9Xmn58iJTCEm04VmdHiaF/y76pSjnMIhNp0/L1q+6GYJjaw6XPIsvLvwlxk/lN6yvCOHy4+J79wGy9HbG5ufrpzNMZKKMd9yr9iSCVvgyt2M2Dg8pi7eKSa+8iz/K4t/XVbEaU41/JYjg/KttT55hZlVztDYxKonBSTFqftRX3LtGUCw==
- Original-authentication-results: spf=none (sender IP is ) smtp.mailfrom=Wilco dot Dijkstra at arm dot com;
Various CPUs have max_cond_insns set to 5 due to historical reasons.
Benchmarking shows that max_cond_insns=2 is fastest on modern Cortex-A
cores, so change it to 2 for all Cortex-A cores. Set max_cond_insns
to 4 on Thumb-2 architectures given it's already limited to that by
MAX_INSN_PER_IT_BLOCK. Also use the CPU tuning setting when a CPU/tune
is selected if -mrestrict-it is not explicitly set.
On Cortex-A57 this gives 1.1% performance gain on SPECINT2006 as well
as a 0.4% codesize reduction.
Bootstrapped on armhf. OK for commit?
ChangeLog:
2019-08-19 Wilco Dijkstra <wdijkstr@arm.com>
* gcc/config/arm/arm.c (arm_option_override_internal):
Use max_cond_insns from CPU tuning unless -mrestrict-it is used.
(arm_v6t2_tune): set max_cond_insns to 4.
(arm_cortex_tune): set max_cond_insns to 2.
(arm_cortex_a8_tune): Likewise.
(arm_cortex_a7_tune): Likewise.
(arm_cortex_a35_tune): Likewise.
(arm_cortex_a53_tune): Likewise.
(arm_cortex_a5_tune): Likewise.
(arm_cortex_a9_tune): Likewise.
(arm_v6m_tune): set max_cond_insns to 4.
---
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 628cf02f23fb29392a63d87f561c3ee2fb73a515..38ac16ad1def91ca78ccfa98fd1679b2b5114851 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -1943,7 +1943,7 @@ const struct tune_params arm_v6t2_tune =
arm_default_branch_cost,
&arm_default_vec_cost,
1, /* Constant limit. */
- 5, /* Max cond insns. */
+ 4, /* Max cond insns. */
8, /* Memset max inline. */
1, /* Issue rate. */
ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1968,7 +1968,7 @@ const struct tune_params arm_cortex_tune =
arm_default_branch_cost,
&arm_default_vec_cost,
1, /* Constant limit. */
- 5, /* Max cond insns. */
+ 2, /* Max cond insns. */
8, /* Memset max inline. */
2, /* Issue rate. */
ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1991,7 +1991,7 @@ const struct tune_params arm_cortex_a8_tune =
arm_default_branch_cost,
&arm_default_vec_cost,
1, /* Constant limit. */
- 5, /* Max cond insns. */
+ 2, /* Max cond insns. */
8, /* Memset max inline. */
2, /* Issue rate. */
ARM_PREFETCH_NOT_BENEFICIAL,
@@ -2014,7 +2014,7 @@ const struct tune_params arm_cortex_a7_tune =
arm_default_branch_cost,
&arm_default_vec_cost,
1, /* Constant limit. */
- 5, /* Max cond insns. */
+ 2, /* Max cond insns. */
8, /* Memset max inline. */
2, /* Issue rate. */
ARM_PREFETCH_NOT_BENEFICIAL,
@@ -2060,7 +2060,7 @@ const struct tune_params arm_cortex_a35_tune =
arm_default_branch_cost,
&arm_default_vec_cost,
1, /* Constant limit. */
- 5, /* Max cond insns. */
+ 2, /* Max cond insns. */
8, /* Memset max inline. */
1, /* Issue rate. */
ARM_PREFETCH_NOT_BENEFICIAL,
@@ -2083,7 +2083,7 @@ const struct tune_params arm_cortex_a53_tune =
arm_default_branch_cost,
&arm_default_vec_cost,
1, /* Constant limit. */
- 5, /* Max cond insns. */
+ 2, /* Max cond insns. */
8, /* Memset max inline. */
2, /* Issue rate. */
ARM_PREFETCH_NOT_BENEFICIAL,
@@ -2167,9 +2167,6 @@ const struct tune_params arm_xgene1_tune =
tune_params::SCHED_AUTOPREF_OFF
};
-/* Branches can be dual-issued on Cortex-A5, so conditional execution is
- less appealing. Set max_insns_skipped to a low value. */
-
const struct tune_params arm_cortex_a5_tune =
{
&cortexa5_extra_costs,
@@ -2178,7 +2175,7 @@ const struct tune_params arm_cortex_a5_tune =
arm_cortex_a5_branch_cost,
&arm_default_vec_cost,
1, /* Constant limit. */
- 1, /* Max cond insns. */
+ 2, /* Max cond insns. */
8, /* Memset max inline. */
2, /* Issue rate. */
ARM_PREFETCH_NOT_BENEFICIAL,
@@ -2201,7 +2198,7 @@ const struct tune_params arm_cortex_a9_tune =
arm_default_branch_cost,
&arm_default_vec_cost,
1, /* Constant limit. */
- 5, /* Max cond insns. */
+ 2, /* Max cond insns. */
8, /* Memset max inline. */
2, /* Issue rate. */
ARM_PREFETCH_BENEFICIAL(4,32,32),
@@ -2328,7 +2325,7 @@ const struct tune_params arm_v6m_tune =
arm_default_branch_cost,
&arm_default_vec_cost, /* Vectorizer costs. */
1, /* Constant limit. */
- 5, /* Max cond insns. */
+ 4, /* Max cond insns. */
8, /* Memset max inline. */
1, /* Issue rate. */
ARM_PREFETCH_NOT_BENEFICIAL,
@@ -3050,6 +3047,11 @@ arm_option_override_internal (struct gcc_options *opts,
if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
opts->x_arm_restrict_it = 0;
+ /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
+ if (!opts_set->x_arm_restrict_it
+ && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
+ opts->x_arm_restrict_it = 0;
+
/* Enable -munaligned-access by default for
- all ARMv6 architecture-based processors when compiling for a 32-bit ISA
i.e. Thumb2 and ARM state only.