This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH][AArch64] Improve Cortex-A53 shift bypass
- From: Wilco Dijkstra <Wilco dot Dijkstra at arm dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: nd <nd at arm dot com>, James Greenhalgh <James dot Greenhalgh at arm dot com>, "Richard Earnshaw" <Richard dot Earnshaw at arm dot com>
- Date: Thu, 27 Apr 2017 17:38:26 +0000
- Subject: [PATCH][AArch64] Improve Cortex-A53 shift bypass
- Authentication-results: sourceware.org; auth=none
- Authentication-results: arm.com; dkim=none (message not signed) header.d=none;arm.com; dmarc=none action=none header.from=arm.com;
- Nodisclaimer: True
- Spamdiagnosticmetadata: NSPM
- Spamdiagnosticoutput: 1:99
The aarch_forward_to_shift_is_not_shifted_reg bypass always returns true
on AArch64 shifted instructions. This causes the bypass to activate in
too many cases, resulting in slower execution on Cortex-A53 like reported
in PR79665.
This patch uses the arm_no_early_alu_shift_dep condition instead which
improves the example in PR79665 by ~7%. Given it is no longer used,
remove aarch_forward_to_shift_is_not_shifted_reg.
Passes AArch64 bootstrap and regress. OK for commit?
ChangeLog:
2017-04-27 Wilco Dijkstra <wdijkstr@arm.com>
PR target/79665
* config/arm/aarch-common.c (arm_no_early_alu_shift_dep):
Remove redundant if.
(aarch_forward_to_shift_is_not_shifted_reg): Remove.
* config/arm/aarch-common-protos.h
(aarch_forward_to_shift_is_not_shifted_re): Remove.
* config/arm/cortex-a53.md: Use arm_no_early_alu_shift_dep in bypass.
--
diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
index 7c2bb4c2ed93728efcbd9e2811c09dddd04b37fe..4350d975abbbbd2cda55ac31e0d47971b40fcde5 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -25,7 +25,6 @@
extern int aarch_accumulator_forwarding (rtx_insn *, rtx_insn *);
extern int aarch_crypto_can_dual_issue (rtx_insn *, rtx_insn *);
-extern int aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *, rtx_insn *);
extern bool aarch_rev16_p (rtx);
extern bool aarch_rev16_shleft_mask_imm_p (rtx, machine_mode);
extern bool aarch_rev16_shright_mask_imm_p (rtx, machine_mode);
diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c
index 742d2ff4c7b779ae07b92f8a800e4667e32c44fb..9da2e382b2a1ecabd56acccc57997dbf626da513 100644
--- a/gcc/config/arm/aarch-common.c
+++ b/gcc/config/arm/aarch-common.c
@@ -254,12 +254,7 @@ arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
return 0;
if ((early_op = arm_find_shift_sub_rtx (op)))
- {
- if (REG_P (early_op))
- early_op = op;
-
- return !reg_overlap_mentioned_p (value, early_op);
- }
+ return !reg_overlap_mentioned_p (value, early_op);
return 0;
}
@@ -472,38 +467,6 @@ aarch_accumulator_forwarding (rtx_insn *producer, rtx_insn *consumer)
return (REGNO (dest) == REGNO (accumulator));
}
-/* Return nonzero if the CONSUMER instruction is some sort of
- arithmetic or logic + shift operation, and the register we are
- writing in PRODUCER is not used in a register shift by register
- operation. */
-
-int
-aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *producer,
- rtx_insn *consumer)
-{
- rtx value, op;
- rtx early_op;
-
- if (!arm_get_set_operands (producer, consumer, &value, &op))
- return 0;
-
- if ((early_op = arm_find_shift_sub_rtx (op)))
- {
- if (REG_P (early_op))
- early_op = op;
-
- /* Any other canonicalisation of a shift is a shift-by-constant
- so we don't care. */
- if (GET_CODE (early_op) == ASHIFT)
- return (!REG_P (XEXP (early_op, 0))
- || !REG_P (XEXP (early_op, 1)));
- else
- return 1;
- }
-
- return 0;
-}
-
/* Return non-zero if the consumer (a multiply-accumulate instruction)
has an accumulator dependency on the result of the producer (a
multiplication instruction) and no other dependency on that result. */
diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md
index 7cf5fc5a0cd1d59efd0be3310b78303018138547..5bd0e62a108241ca56b01315908426e3f095fa81 100644
--- a/gcc/config/arm/cortex-a53.md
+++ b/gcc/config/arm/cortex-a53.md
@@ -211,7 +211,7 @@ (define_bypass 1 "cortex_a53_alu*"
(define_bypass 1 "cortex_a53_alu*"
"cortex_a53_alu_shift*"
- "aarch_forward_to_shift_is_not_shifted_reg")
+ "arm_no_early_alu_shift_dep")
(define_bypass 2 "cortex_a53_alu*"
"cortex_a53_alu_*,cortex_a53_shift*")