This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[COMMITTED][AArch64] Tweak the pipeline model for Exynos M1
- From: Evandro Menezes <e dot menezes at samsung dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Tue, 23 Feb 2016 15:42:19 -0600
- Subject: [COMMITTED][AArch64] Tweak the pipeline model for Exynos M1
- Authentication-results: sourceware.org; auth=none
Minor tweaks to the cost and scheduling models for Exynos M1.
Committed as r233646 and r233647.
--
Evandro Menezes
>From ab6127823e706361315f1c8b87fb4c32bc299b65 Mon Sep 17 00:00:00 2001
From: evandro <evandro@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Tue, 23 Feb 2016 20:21:23 +0000
Subject: [PATCH 1/2] * gcc/config/aarch64/aarch64.c
(exynosm1_tunings): Enable the Newton series for reciprocal square
root in Exynos M1.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@233646 138bc75d-0d04-0410-961f-82ee72b054a4
---
gcc/ChangeLog | 5 +++++
gcc/config/aarch64/aarch64.c | 2 +-
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 3c629ef..22dd022 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-02-23 Evandro Menezes <e.menezes@samsung.com>
+
+ * config/aarch64/aarch64.c (exynosm1_tunings): Enable the Newton
+ series for reciprocal square root in Exynos M1.
+
2016-02-23 Martin Sebor <msebor@redhat.com>
PR c/69759
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 923a4b3..dc3dfea 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -538,7 +538,7 @@ static const struct tune_params exynosm1_tunings =
48, /* max_case_values. */
64, /* cache_line_size. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_RECIP_SQRT) /* tune_flags. */
};
static const struct tune_params thunderx_tunings =
--
1.9.1
>From 01cadc5b883a2613f847aa7a88b86aed454d9413 Mon Sep 17 00:00:00 2001
From: evandro <evandro@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Tue, 23 Feb 2016 21:31:00 +0000
Subject: [PATCH 2/2] Tweak the pipeline model for Exynos M1
gcc/
* config/aarch64/aarch64.c (exynosm1_tunings): Enable fusion of AES{D,E}
and AESMC pairs.
* config/arm/exynos-m1.md: Change cost of STP, fix bypass for stores
and add bypass for AES{D,E} and AESMC pairs.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@233647 138bc75d-0d04-0410-961f-82ee72b054a4
---
gcc/ChangeLog | 7 +++++++
gcc/config/aarch64/aarch64.c | 2 +-
gcc/config/arm/exynos-m1.md | 26 +++++++++++++++++---------
3 files changed, 25 insertions(+), 10 deletions(-)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 22dd022..07b50b5 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,12 @@
2016-02-23 Evandro Menezes <e.menezes@samsung.com>
+ * config/arm/exynos-m1.md: Change cost of STP, fix bypass for stores
+ and add bypass for AES{D,E} and AESMC pairs.
+ * config/aarch64/aarch64.c (exynosm1_tunings): Enable fusion of AES{D,E}
+ and AESMC pairs.
+
+2016-02-23 Evandro Menezes <e.menezes@samsung.com>
+
* config/aarch64/aarch64.c (exynosm1_tunings): Enable the Newton
series for reciprocal square root in Exynos M1.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index dc3dfea..6dc8330 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -526,7 +526,7 @@ static const struct tune_params exynosm1_tunings =
&generic_branch_cost,
4, /* memmov_cost */
3, /* issue_rate */
- (AARCH64_FUSE_NOTHING), /* fusible_ops */
+ (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
4, /* function_align. */
4, /* jump_align. */
4, /* loop_align. */
diff --git a/gcc/config/arm/exynos-m1.md b/gcc/config/arm/exynos-m1.md
index 2f52b22..318b151 100644
--- a/gcc/config/arm/exynos-m1.md
+++ b/gcc/config/arm/exynos-m1.md
@@ -248,10 +248,6 @@
(eq_attr "type" "neon_load4_all_lanes, neon_load4_all_lanes_q")
(const_string "neon_load4_all")
- (eq_attr "type" "f_stores, f_stored,\
- neon_stp, neon_stp_q")
- (const_string "neon_store")
-
(eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q")
(const_string "neon_store1_1")
@@ -730,8 +726,14 @@
(define_insn_reservation
"exynos_m1_neon_store" 1
(and (eq_attr "tune" "exynosm1")
- (eq_attr "exynos_m1_neon_type" "neon_store"))
- "(em1_fst, em1_st)")
+ (eq_attr "type" "f_stores, f_stored, neon_stp"))
+ "em1_sfst")
+
+(define_insn_reservation
+ "exynos_m1_neon_store_q" 3
+ (and (eq_attr "tune" "exynosm1")
+ (eq_attr "type" "neon_stp_q"))
+ "(em1_sfst * 2)")
(define_insn_reservation
"exynos_m1_neon_store1_1" 1
@@ -761,7 +763,7 @@
"exynos_m1_neon_store1_one" 7
(and (eq_attr "tune" "exynosm1")
(eq_attr "exynos_m1_neon_type" "neon_store1_one"))
- "(em1_fst, em1_st)")
+ "em1_sfst")
(define_insn_reservation
"exynos_m1_neon_store2" 7
@@ -892,7 +894,9 @@
;; Pre-decrement and post-increment addressing modes update the register quickly.
;; TODO: figure out how to tell the addressing mode register from the loaded one.
-(define_bypass 1 "exynos_m1_store*" "exynos_m1_store*")
+(define_bypass 1 "exynos_m1_store*, exynos_m1_neon_store*"
+ "exynos_m1_store*, exynos_m1_neon_store*,
+ exynos_m1_load*, exynos_m1_neon_load*")
;; MLAs can feed other MLAs quickly.
(define_bypass 1 "exynos_m1_mla*" "exynos_m1_mla*")
@@ -908,7 +912,6 @@
(define_bypass 5 "exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step"
"exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\
exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step")
-
(define_bypass 3 "exynos_m1_fp_add"
"exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac")
(define_bypass 3 "exynos_m1_neon_fp_add"
@@ -947,6 +950,11 @@
"exynos_m1_crypto_simple, exynos_m1_crypto_complex,\
exynos_m1_crypto_poly*")
+;; AES{D,E}/AESMC pairs can feed each other instantly.
+(define_bypass 0 "exynos_m1_crypto_simple"
+ "exynos_m1_crypto_simple"
+ "aarch_crypto_can_dual_issue")
+
;; Predicted branches take no time, but mispredicted ones take forever anyway.
(define_bypass 1 "exynos_m1_*"
"exynos_m1_call, exynos_m1_branch")
--
1.9.1