[Patch AArch64 2/2] Fix memory sizes to load/store patterns

Thu Jul 27 18:10:00 GMT 2017

On Mon, Jul 03, 2017 at 11:46:58AM +0100, James Greenhalgh wrote:
> On Wed, Jun 21, 2017 at 11:50:08AM +0100, James Greenhalgh wrote:
> > *ping*
> 
> Ping*2

Ping*3

Thanks,
James

> 
> Thanks,
> James
> 
> > On Mon, Jun 12, 2017 at 02:54:00PM +0100, James Greenhalgh wrote:
> > > 
> > > Hi,
> > > 
> > > There seems to be a partial misconception in the AArch64 backend that
> > > load1/load2 referred to the number of registers to load, rather than the
> > > number of words to load. This patch fixes that using the new "number of
> > > byte" types added in the previous patch.
> > > 
> > > That means using the load_16 and store_16 types that were defined in the
> > > previous patch for the first time in the AArch64 backend. To ensure
> > > continuity for scheduling models, I've just split this out from load_8.
> > > Please update your models if this is very wrong!
> > > 
> > > Bootstrapped on aarch64-none-linux-gnu with no issue.
> > > 
> > > OK?
> > > 
> > > Thanks,
> > > James
> > > 
> > > ---
> > > 2017-06-12  James Greenhalgh  <james.greenhalgh@arm.com>
> > > 
> > > 	* config/aarch64/aarch64.md (movdi_aarch64): Set load/store
> > > 	types correctly.
> > > 	(movti_aarch64): Likewise.
> > > 	(movdf_aarch64): Likewise.
> > > 	(movtf_aarch64): Likewise.
> > > 	(load_pairdi): Likewise.
> > > 	(store_pairdi): Likewise.
> > > 	(load_pairdf): Likewise.
> > > 	(store_pairdf): Likewise.
> > > 	(loadwb_pair<GPI:mode>_<P:mode>): Likewise.
> > > 	(storewb_pair<GPI:mode>_<P:mode>): Likewise.
> > > 	(ldr_got_small_<mode>): Likewise.
> > > 	(ldr_got_small_28k_<mode>): Likewise.
> > > 	(ldr_got_tiny): Likewise.
> > > 	* config/aarch64/iterators.md (ldst_sz): New.
> > > 	(ldpstp_sz): Likewise.
> > > 	* config/aarch64/thunderx.md (thunderx_storepair): Split store_8
> > > 	to store_16.
> > > 	(thunderx_load): Split load_8 to load_16.
> > > 	* config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split
> > > 	load_8 to load_16.
> > > 	(thunderx2t99_storepair_basic): Split store_8 to store_16.
> > > 	* config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16.
> > > 	(xgene1_store_pair): Split store_8 to store_16.
> > > 
> > 
> > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> > > index 11295a6..a1385e3 100644
> > > --- a/gcc/config/aarch64/aarch64.md
> > > +++ b/gcc/config/aarch64/aarch64.md
> > > @@ -981,7 +981,7 @@
> > >         DONE;
> > >      }"
> > >    [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,\
> > > -                     load_4,load_4,store_4,store_4,\
> > > +                     load_8,load_8,store_8,store_8,\
> > >                       adr,adr,f_mcr,f_mrc,fmov,neon_move")
> > >     (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
> > >     (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
> > > @@ -1026,7 +1026,8 @@
> > >     ldr\\t%q0, %1
> > >     str\\t%q1, %0"
> > >    [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
> > > -		             load_8,store_8,store_8,f_loadd,f_stored")
> > > +		             load_16,store_16,store_16,\
> > > +                             load_16,store_16")
> > >     (set_attr "length" "8,8,8,4,4,4,4,4,4")
> > >     (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
> > >     (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
> > > @@ -1121,7 +1122,7 @@
> > >     str\\t%x1, %0
> > >     mov\\t%x0, %x1"
> > >    [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
> > > -                     f_loadd,f_stored,load_4,store_4,mov_reg")
> > > +                     f_loadd,f_stored,load_8,store_8,mov_reg")
> > >     (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
> > >  )
> > >  
> > > @@ -1145,7 +1146,7 @@
> > >     stp\\t%1, %H1, %0
> > >     stp\\txzr, xzr, %0"
> > >    [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
> > > -                     f_loadd,f_stored,load_8,store_8,store_8")
> > > +                     f_loadd,f_stored,load_16,store_16,store_16")
> > >     (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
> > >     (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
> > >  )
> > > @@ -1209,7 +1210,7 @@
> > >    "@
> > >     ldp\\t%x0, %x2, %1
> > >     ldp\\t%d0, %d2, %1"
> > > -  [(set_attr "type" "load_8,neon_load1_2reg")
> > > +  [(set_attr "type" "load_16,neon_load1_2reg")
> > >     (set_attr "fp" "*,yes")]
> > >  )
> > >  
> > > @@ -1244,7 +1245,7 @@
> > >    "@
> > >     stp\\t%x1, %x3, %0
> > >     stp\\t%d1, %d3, %0"
> > > -  [(set_attr "type" "store_8,neon_store1_2reg")
> > > +  [(set_attr "type" "store_16,neon_store1_2reg")
> > >     (set_attr "fp" "*,yes")]
> > >  )
> > >  
> > > @@ -1278,7 +1279,7 @@
> > >    "@
> > >     ldp\\t%d0, %d2, %1
> > >     ldp\\t%x0, %x2, %1"
> > > -  [(set_attr "type" "neon_load1_2reg,load_8")
> > > +  [(set_attr "type" "neon_load1_2reg,load_16")
> > >     (set_attr "fp" "yes,*")]
> > >  )
> > >  
> > > @@ -1312,7 +1313,7 @@
> > >    "@
> > >     stp\\t%d1, %d3, %0
> > >     stp\\t%x1, %x3, %0"
> > > -  [(set_attr "type" "neon_store1_2reg,store_8")
> > > +  [(set_attr "type" "neon_store1_2reg,store_16")
> > >     (set_attr "fp" "yes,*")]
> > >  )
> > >  
> > > @@ -1330,7 +1331,7 @@
> > >                     (match_operand:P 5 "const_int_operand" "n"))))])]
> > >    "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
> > >    "ldp\\t%<w>2, %<w>3, [%1], %4"
> > > -  [(set_attr "type" "load_8")]
> > > +  [(set_attr "type" "load_<ldpstp_sz>")]
> > >  )
> > >  
> > >  (define_insn "loadwb_pair<GPF:mode>_<P:mode>"
> > > @@ -1363,7 +1364,7 @@
> > >            (match_operand:GPI 3 "register_operand" "r"))])]
> > >    "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
> > >    "stp\\t%<w>2, %<w>3, [%0, %4]!"
> > > -  [(set_attr "type" "store_8")]
> > > +  [(set_attr "type" "store_<ldpstp_sz>")]
> > >  )
> > >  
> > >  (define_insn "storewb_pair<GPF:mode>_<P:mode>"
> > > @@ -5139,7 +5140,7 @@
> > >  		    UNSPEC_GOTSMALLPIC))]
> > >    ""
> > >    "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
> > > -  [(set_attr "type" "load_4")]
> > > +  [(set_attr "type" "load_<ldst_sz>")]
> > >  )
> > >  
> > >  (define_insn "ldr_got_small_sidi"
> > > @@ -5162,7 +5163,7 @@
> > >  		    UNSPEC_GOTSMALLPIC28K))]
> > >    ""
> > >    "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]"
> > > -  [(set_attr "type" "load_4")]
> > > +  [(set_attr "type" "load_<ldst_sz>")]
> > >  )
> > >  
> > >  (define_insn "ldr_got_small_28k_sidi"
> > > @@ -5183,7 +5184,7 @@
> > >  		   UNSPEC_GOTTINYPIC))]
> > >    ""
> > >    "ldr\\t%0, %L1"
> > > -  [(set_attr "type" "load_4")]
> > > +  [(set_attr "type" "load_8")]
> > >  )
> > >  
> > >  (define_insn "aarch64_load_tp_hard"
> > > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> > > index 43be7fd..a65c3aa 100644
> > > --- a/gcc/config/aarch64/iterators.md
> > > +++ b/gcc/config/aarch64/iterators.md
> > > @@ -384,6 +384,11 @@
> > >  ;; 32-bit version and "%x0" in the 64-bit version.
> > >  (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
> > >  
> > > +;; The size of access, in bytes.
> > > +(define_mode_attr ldst_sz [(SI "4") (DI "8")])
> > > +;; Likewise for load/store pair.
> > > +(define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
> > > +
> > >  ;; For inequal width int to float conversion
> > >  (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
> > >  (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
> > > diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
> > > index c18da2f..84ac6cd 100644
> > > --- a/gcc/config/aarch64/thunderx.md
> > > +++ b/gcc/config/aarch64/thunderx.md
> > > @@ -100,7 +100,7 @@
> > >  ;; Store pair are single issued
> > >  (define_insn_reservation "thunderx_storepair" 1
> > >    (and (eq_attr "tune" "thunderx")
> > > -       (eq_attr "type" "store_8"))
> > > +       (eq_attr "type" "store_8,store_16"))
> > >    "thunderx_pipe0 + thunderx_pipe1")
> > >  
> > >  ;; Prefetch are single issued
> > > @@ -112,7 +112,7 @@
> > >  ;; loads (and load pairs) from L1 take 3 cycles in pipe 0
> > >  (define_insn_reservation "thunderx_load" 3
> > >    (and (eq_attr "tune" "thunderx")
> > > -       (eq_attr "type" "load_4, load_8"))
> > > +       (eq_attr "type" "load_4, load_8, load_16"))
> > >    "thunderx_pipe0")
> > >  
> > >  (define_insn_reservation "thunderx_brj" 1
> > > diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
> > > index 632396f..4e39610 100644
> > > --- a/gcc/config/aarch64/thunderx2t99.md
> > > +++ b/gcc/config/aarch64/thunderx2t99.md
> > > @@ -128,7 +128,7 @@
> > >  
> > >  (define_insn_reservation "thunderx2t99_loadpair" 5
> > >    (and (eq_attr "tune" "thunderx2t99")
> > > -       (eq_attr "type" "load_8"))
> > > +       (eq_attr "type" "load_8,load_16"))
> > >    "thunderx2t99_i012,thunderx2t99_ls01")
> > >  
> > >  (define_insn_reservation "thunderx2t99_store_basic" 1
> > > @@ -138,7 +138,7 @@
> > >  
> > >  (define_insn_reservation "thunderx2t99_storepair_basic" 1
> > >    (and (eq_attr "tune" "thunderx2t99")
> > > -       (eq_attr "type" "store_8"))
> > > +       (eq_attr "type" "store_8,store_16"))
> > >    "thunderx2t99_ls01,thunderx2t99_sd")
> > >  
> > >  ;; FP data processing instructions.
> > > diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
> > > index 7e70408..0b457ee 100644
> > > --- a/gcc/config/arm/xgene1.md
> > > +++ b/gcc/config/arm/xgene1.md
> > > @@ -92,12 +92,12 @@
> > >  
> > >  (define_insn_reservation "xgene1_load_pair" 6
> > >    (and (eq_attr "tune" "xgene1")
> > > -       (eq_attr "type" "load_8"))
> > > +       (eq_attr "type" "load_8, load_16"))
> > >    "xgene1_decodeIsolated")
> > >  
> > >  (define_insn_reservation "xgene1_store_pair" 2
> > >    (and (eq_attr "tune" "xgene1")
> > > -       (eq_attr "type" "store_8"))
> > > +       (eq_attr "type" "store_8, store_16"))
> > >    "xgene1_decodeIsolated")
> > >  
> > >  (define_insn_reservation "xgene1_fp_load1" 10
> >