This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[Patch AArch64 2/2] Fix memory sizes to load/store patterns
- From: James Greenhalgh <james dot greenhalgh at arm dot com>
- To: <gcc-patches at gcc dot gnu dot org>
- Cc: <nd at arm dot com>, <richard dot earnshaw at arm dot com>, <macrus dot shawcroft at arm dot com>
- Date: Tue, 12 Sep 2017 15:59:46 +0100
- Subject: [Patch AArch64 2/2] Fix memory sizes to load/store patterns
- Authentication-results: sourceware.org; auth=none
- Authentication-results: spf=pass (sender IP is 217.140.96.140) smtp.mailfrom=arm.com; gcc.gnu.org; dkim=none (message not signed) header.d=none;gcc.gnu.org; dmarc=bestguesspass action=none header.from=arm.com;
- Nodisclaimer: True
- References: <20170727180954.GB8643@arm.com>
- Spamdiagnosticmetadata: NSPM
- Spamdiagnosticoutput: 1:99
On Mon, Jun 12, 2017 at 02:54:00PM +0100, James Greenhalgh wrote:
>
> Hi,
>
> There seems to be a partial misconception in the AArch64 backend that
> load1/load2 referred to the number of registers to load, rather than the
> number of words to load. This patch fixes that using the new "number of
> byte" types added in the previous patch.
>
> That means using the load_16 and store_16 types that were defined in the
> previous patch for the first time in the AArch64 backend. To ensure
> continuity for scheduling models, I've just split this out from load_8.
> Please update your models if this is very wrong!
I've updated this patch on trunk, rechecked it, and committed this patch
as r252026.
Thanks,
James
---
2017-09-12 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64.md (movdi_aarch64): Set load/store
types correctly.
(movti_aarch64): Likewise.
(movdf_aarch64): Likewise.
(movtf_aarch64): Likewise.
(load_pairdi): Likewise.
(store_pairdi): Likewise.
(load_pairdf): Likewise.
(store_pairdf): Likewise.
(loadwb_pair<GPI:mode>_<P:mode>): Likewise.
(storewb_pair<GPI:mode>_<P:mode>): Likewise.
(ldr_got_small_<mode>): Likewise.
(ldr_got_small_28k_<mode>): Likewise.
(ldr_got_tiny): Likewise.
* config/aarch64/iterators.md (ldst_sz): New.
(ldpstp_sz): Likewise.
* config/aarch64/thunderx.md (thunderx_storepair): Split store_8
to store_16.
(thunderx_load): Split load_8 to load_16.
* config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split
load_8 to load_16.
(thunderx2t99_storepair_basic): Split store_8 to store_16.
* config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16.
(xgene1_store_pair): Split store_8 to store_16.
* config/aarch64/falkor.md (falkor_ld_3_ld): Split load_8 to load_16.
(falkor_st_0_st_sd): Split store_8 to store_16.
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 7cbb458..e85376c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -994,8 +994,8 @@
aarch64_expand_mov_immediate (operands[0], operands[1]);
DONE;
}"
- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,\
- load_4,store_4,store_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
+ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_8,\
+ load_8,store_8,store_8,adr,adr,f_mcr,f_mrc,fmov,neon_move")
(set_attr "fp" "*,*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
(set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
)
@@ -1039,7 +1039,8 @@
ldr\\t%q0, %1
str\\t%q1, %0"
[(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
- load_8,store_8,store_8,f_loadd,f_stored")
+ load_16,store_16,store_16,\
+ load_16,store_16")
(set_attr "length" "8,8,8,4,4,4,4,4,4")
(set_attr "simd" "*,*,*,yes,*,*,*,*,*")
(set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
@@ -1142,7 +1143,7 @@
mov\\t%x0, %x1
mov\\t%x0, %1"
[(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
- f_loadd,f_stored,load_4,store_4,mov_reg,\
+ f_loadd,f_stored,load_8,store_8,mov_reg,\
fconstd")
(set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
)
@@ -1187,7 +1188,7 @@
stp\\t%1, %H1, %0
stp\\txzr, xzr, %0"
[(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
- f_loadd,f_stored,load_8,store_8,store_8")
+ f_loadd,f_stored,load_16,store_16,store_16")
(set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
(set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
)
@@ -1251,7 +1252,7 @@
"@
ldp\\t%x0, %x2, %1
ldp\\t%d0, %d2, %1"
- [(set_attr "type" "load_8,neon_load1_2reg")
+ [(set_attr "type" "load_16,neon_load1_2reg")
(set_attr "fp" "*,yes")]
)
@@ -1286,7 +1287,7 @@
"@
stp\\t%x1, %x3, %0
stp\\t%d1, %d3, %0"
- [(set_attr "type" "store_8,neon_store1_2reg")
+ [(set_attr "type" "store_16,neon_store1_2reg")
(set_attr "fp" "*,yes")]
)
@@ -1320,7 +1321,7 @@
"@
ldp\\t%d0, %d2, %1
ldp\\t%x0, %x2, %1"
- [(set_attr "type" "neon_load1_2reg,load_8")
+ [(set_attr "type" "neon_load1_2reg,load_16")
(set_attr "fp" "yes,*")]
)
@@ -1354,7 +1355,7 @@
"@
stp\\t%d1, %d3, %0
stp\\t%x1, %x3, %0"
- [(set_attr "type" "neon_store1_2reg,store_8")
+ [(set_attr "type" "neon_store1_2reg,store_16")
(set_attr "fp" "yes,*")]
)
@@ -1372,7 +1373,7 @@
(match_operand:P 5 "const_int_operand" "n"))))])]
"INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
"ldp\\t%<w>2, %<w>3, [%1], %4"
- [(set_attr "type" "load_8")]
+ [(set_attr "type" "load_<ldpstp_sz>")]
)
(define_insn "loadwb_pair<GPF:mode>_<P:mode>"
@@ -1405,7 +1406,7 @@
(match_operand:GPI 3 "register_operand" "r"))])]
"INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
"stp\\t%<w>2, %<w>3, [%0, %4]!"
- [(set_attr "type" "store_8")]
+ [(set_attr "type" "store_<ldpstp_sz>")]
)
(define_insn "storewb_pair<GPF:mode>_<P:mode>"
@@ -5355,7 +5356,7 @@
UNSPEC_GOTSMALLPIC))]
""
"ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
- [(set_attr "type" "load_4")]
+ [(set_attr "type" "load_<ldst_sz>")]
)
(define_insn "ldr_got_small_sidi"
@@ -5378,7 +5379,7 @@
UNSPEC_GOTSMALLPIC28K))]
""
"ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]"
- [(set_attr "type" "load_4")]
+ [(set_attr "type" "load_<ldst_sz>")]
)
(define_insn "ldr_got_small_28k_sidi"
@@ -5399,7 +5400,7 @@
UNSPEC_GOTTINYPIC))]
""
"ldr\\t%0, %L1"
- [(set_attr "type" "load_4")]
+ [(set_attr "type" "load_8")]
)
(define_insn "aarch64_load_tp_hard"
diff --git a/gcc/config/aarch64/falkor.md b/gcc/config/aarch64/falkor.md
index 66efc8c..83971ce 100644
--- a/gcc/config/aarch64/falkor.md
+++ b/gcc/config/aarch64/falkor.md
@@ -581,7 +581,7 @@
(define_insn_reservation "falkor_ld_3_ld" 3
(and (eq_attr "tune" "falkor")
- (eq_attr "type" "load_4,load_8"))
+ (eq_attr "type" "load_4,load_8,load_16"))
"falkor_ld")
;; Miscellaneous Data-Processing Instructions
@@ -663,7 +663,7 @@
(define_insn_reservation "falkor_st_0_st_sd" 0
(and (eq_attr "tune" "falkor")
- (eq_attr "type" "store_4,store_8"))
+ (eq_attr "type" "store_4,store_8,store_16"))
"falkor_st+falkor_sd")
;; Muliply bypasses.
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 3e38767..477dc35 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -387,6 +387,11 @@
;; 32-bit version and "%x0" in the 64-bit version.
(define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
+;; The size of access, in bytes.
+(define_mode_attr ldst_sz [(SI "4") (DI "8")])
+;; Likewise for load/store pair.
+(define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
+
;; For inequal width int to float conversion
(define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
(define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
index c18da2f..84ac6cd 100644
--- a/gcc/config/aarch64/thunderx.md
+++ b/gcc/config/aarch64/thunderx.md
@@ -100,7 +100,7 @@
;; Store pair are single issued
(define_insn_reservation "thunderx_storepair" 1
(and (eq_attr "tune" "thunderx")
- (eq_attr "type" "store_8"))
+ (eq_attr "type" "store_8,store_16"))
"thunderx_pipe0 + thunderx_pipe1")
;; Prefetch are single issued
@@ -112,7 +112,7 @@
;; loads (and load pairs) from L1 take 3 cycles in pipe 0
(define_insn_reservation "thunderx_load" 3
(and (eq_attr "tune" "thunderx")
- (eq_attr "type" "load_4, load_8"))
+ (eq_attr "type" "load_4, load_8, load_16"))
"thunderx_pipe0")
(define_insn_reservation "thunderx_brj" 1
diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
index 41a45ca..5bcf4ff 100644
--- a/gcc/config/aarch64/thunderx2t99.md
+++ b/gcc/config/aarch64/thunderx2t99.md
@@ -128,7 +128,7 @@
(define_insn_reservation "thunderx2t99_loadpair" 5
(and (eq_attr "tune" "thunderx2t99")
- (eq_attr "type" "load_8"))
+ (eq_attr "type" "load_8,load_16"))
"thunderx2t99_i012,thunderx2t99_ls01")
(define_insn_reservation "thunderx2t99_store_basic" 1
@@ -138,7 +138,7 @@
(define_insn_reservation "thunderx2t99_storepair_basic" 1
(and (eq_attr "tune" "thunderx2t99")
- (eq_attr "type" "store_8"))
+ (eq_attr "type" "store_8,store_16"))
"thunderx2t99_ls01,thunderx2t99_sd")
;; FP data processing instructions.
diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
index d0b17ab..c4b3773 100644
--- a/gcc/config/arm/xgene1.md
+++ b/gcc/config/arm/xgene1.md
@@ -92,12 +92,12 @@
(define_insn_reservation "xgene1_load_pair" 6
(and (eq_attr "tune" "xgene1")
- (eq_attr "type" "load_8"))
+ (eq_attr "type" "load_8, load_16"))
"xgene1_decodeIsolated")
(define_insn_reservation "xgene1_store_pair" 2
(and (eq_attr "tune" "xgene1")
- (eq_attr "type" "store_8"))
+ (eq_attr "type" "store_8, store_16"))
"xgene1_decodeIsolated")
(define_insn_reservation "xgene1_fp_load1" 10