This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Patch AArch64 2/2] Fix memory sizes to load/store patterns


Hi,

There seems to be a partial misconception in the AArch64 backend that
load1/load2 referred to the number of registers to load, rather than the
number of words to load. This patch fixes that using the new "number of
byte" types added in the previous patch.

That means using the load_16 and store_16 types that were defined in the
previous patch for the first time in the AArch64 backend. To ensure
continuity for scheduling models, I've just split this out from load_8.
Please update your models if this is very wrong!

Bootstrapped on aarch64-none-linux-gnu with no issue.

OK?

Thanks,
James

---
2017-06-12  James Greenhalgh  <james.greenhalgh@arm.com>

	* config/aarch64/aarch64.md (movdi_aarch64): Set load/store
	types correctly.
	(movti_aarch64): Likewise.
	(movdf_aarch64): Likewise.
	(movtf_aarch64): Likewise.
	(load_pairdi): Likewise.
	(store_pairdi): Likewise.
	(load_pairdf): Likewise.
	(store_pairdf): Likewise.
	(loadwb_pair<GPI:mode>_<P:mode>): Likewise.
	(storewb_pair<GPI:mode>_<P:mode>): Likewise.
	(ldr_got_small_<mode>): Likewise.
	(ldr_got_small_28k_<mode>): Likewise.
	(ldr_got_tiny): Likewise.
	* config/aarch64/iterators.md (ldst_sz): New.
	(ldpstp_sz): Likewise.
	* config/aarch64/thunderx.md (thunderx_storepair): Split store_8
	to store_16.
	(thunderx_load): Split load_8 to load_16.
	* config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split
	load_8 to load_16.
	(thunderx2t99_storepair_basic): Split store_8 to store_16.
	* config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16.
	(xgene1_store_pair): Split store_8 to store_16.

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 11295a6..a1385e3 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -981,7 +981,7 @@
        DONE;
     }"
   [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,\
-                     load_4,load_4,store_4,store_4,\
+                     load_8,load_8,store_8,store_8,\
                      adr,adr,f_mcr,f_mrc,fmov,neon_move")
    (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
    (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
@@ -1026,7 +1026,8 @@
    ldr\\t%q0, %1
    str\\t%q1, %0"
   [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
-		             load_8,store_8,store_8,f_loadd,f_stored")
+		             load_16,store_16,store_16,\
+                             load_16,store_16")
    (set_attr "length" "8,8,8,4,4,4,4,4,4")
    (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
    (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
@@ -1121,7 +1122,7 @@
    str\\t%x1, %0
    mov\\t%x0, %x1"
   [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
-                     f_loadd,f_stored,load_4,store_4,mov_reg")
+                     f_loadd,f_stored,load_8,store_8,mov_reg")
    (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
 )
 
@@ -1145,7 +1146,7 @@
    stp\\t%1, %H1, %0
    stp\\txzr, xzr, %0"
   [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
-                     f_loadd,f_stored,load_8,store_8,store_8")
+                     f_loadd,f_stored,load_16,store_16,store_16")
    (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
    (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
 )
@@ -1209,7 +1210,7 @@
   "@
    ldp\\t%x0, %x2, %1
    ldp\\t%d0, %d2, %1"
-  [(set_attr "type" "load_8,neon_load1_2reg")
+  [(set_attr "type" "load_16,neon_load1_2reg")
    (set_attr "fp" "*,yes")]
 )
 
@@ -1244,7 +1245,7 @@
   "@
    stp\\t%x1, %x3, %0
    stp\\t%d1, %d3, %0"
-  [(set_attr "type" "store_8,neon_store1_2reg")
+  [(set_attr "type" "store_16,neon_store1_2reg")
    (set_attr "fp" "*,yes")]
 )
 
@@ -1278,7 +1279,7 @@
   "@
    ldp\\t%d0, %d2, %1
    ldp\\t%x0, %x2, %1"
-  [(set_attr "type" "neon_load1_2reg,load_8")
+  [(set_attr "type" "neon_load1_2reg,load_16")
    (set_attr "fp" "yes,*")]
 )
 
@@ -1312,7 +1313,7 @@
   "@
    stp\\t%d1, %d3, %0
    stp\\t%x1, %x3, %0"
-  [(set_attr "type" "neon_store1_2reg,store_8")
+  [(set_attr "type" "neon_store1_2reg,store_16")
    (set_attr "fp" "yes,*")]
 )
 
@@ -1330,7 +1331,7 @@
                    (match_operand:P 5 "const_int_operand" "n"))))])]
   "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
   "ldp\\t%<w>2, %<w>3, [%1], %4"
-  [(set_attr "type" "load_8")]
+  [(set_attr "type" "load_<ldpstp_sz>")]
 )
 
 (define_insn "loadwb_pair<GPF:mode>_<P:mode>"
@@ -1363,7 +1364,7 @@
           (match_operand:GPI 3 "register_operand" "r"))])]
   "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
   "stp\\t%<w>2, %<w>3, [%0, %4]!"
-  [(set_attr "type" "store_8")]
+  [(set_attr "type" "store_<ldpstp_sz>")]
 )
 
 (define_insn "storewb_pair<GPF:mode>_<P:mode>"
@@ -5139,7 +5140,7 @@
 		    UNSPEC_GOTSMALLPIC))]
   ""
   "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
-  [(set_attr "type" "load_4")]
+  [(set_attr "type" "load_<ldst_sz>")]
 )
 
 (define_insn "ldr_got_small_sidi"
@@ -5162,7 +5163,7 @@
 		    UNSPEC_GOTSMALLPIC28K))]
   ""
   "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]"
-  [(set_attr "type" "load_4")]
+  [(set_attr "type" "load_<ldst_sz>")]
 )
 
 (define_insn "ldr_got_small_28k_sidi"
@@ -5183,7 +5184,7 @@
 		   UNSPEC_GOTTINYPIC))]
   ""
   "ldr\\t%0, %L1"
-  [(set_attr "type" "load_4")]
+  [(set_attr "type" "load_8")]
 )
 
 (define_insn "aarch64_load_tp_hard"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 43be7fd..a65c3aa 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -384,6 +384,11 @@
 ;; 32-bit version and "%x0" in the 64-bit version.
 (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
 
+;; The size of access, in bytes.
+(define_mode_attr ldst_sz [(SI "4") (DI "8")])
+;; Likewise for load/store pair.
+(define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
+
 ;; For inequal width int to float conversion
 (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
 (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
index c18da2f..84ac6cd 100644
--- a/gcc/config/aarch64/thunderx.md
+++ b/gcc/config/aarch64/thunderx.md
@@ -100,7 +100,7 @@
 ;; Store pair are single issued
 (define_insn_reservation "thunderx_storepair" 1
   (and (eq_attr "tune" "thunderx")
-       (eq_attr "type" "store_8"))
+       (eq_attr "type" "store_8,store_16"))
   "thunderx_pipe0 + thunderx_pipe1")
 
 ;; Prefetch are single issued
@@ -112,7 +112,7 @@
 ;; loads (and load pairs) from L1 take 3 cycles in pipe 0
 (define_insn_reservation "thunderx_load" 3
   (and (eq_attr "tune" "thunderx")
-       (eq_attr "type" "load_4, load_8"))
+       (eq_attr "type" "load_4, load_8, load_16"))
   "thunderx_pipe0")
 
 (define_insn_reservation "thunderx_brj" 1
diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
index 632396f..4e39610 100644
--- a/gcc/config/aarch64/thunderx2t99.md
+++ b/gcc/config/aarch64/thunderx2t99.md
@@ -128,7 +128,7 @@
 
 (define_insn_reservation "thunderx2t99_loadpair" 5
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "load_8"))
+       (eq_attr "type" "load_8,load_16"))
   "thunderx2t99_i012,thunderx2t99_ls01")
 
 (define_insn_reservation "thunderx2t99_store_basic" 1
@@ -138,7 +138,7 @@
 
 (define_insn_reservation "thunderx2t99_storepair_basic" 1
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "store_8"))
+       (eq_attr "type" "store_8,store_16"))
   "thunderx2t99_ls01,thunderx2t99_sd")
 
 ;; FP data processing instructions.
diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
index 7e70408..0b457ee 100644
--- a/gcc/config/arm/xgene1.md
+++ b/gcc/config/arm/xgene1.md
@@ -92,12 +92,12 @@
 
 (define_insn_reservation "xgene1_load_pair" 6
   (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "load_8"))
+       (eq_attr "type" "load_8, load_16"))
   "xgene1_decodeIsolated")
 
 (define_insn_reservation "xgene1_store_pair" 2
   (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "store_8"))
+       (eq_attr "type" "store_8, store_16"))
   "xgene1_decodeIsolated")
 
 (define_insn_reservation "xgene1_fp_load1" 10

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]