This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][AArch64] Improve scheduling model for X-Gene



On 15/11/17 08:49, Dominik Inführ wrote:
Could you please commit it for me? I don’t have commit rights.

Ah, of course.
Committed with r254759.

Thanks,
Kyrill

Thanks,

Dominik

On 13 Nov 2017, at 12:27, Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> wrote:


On 13/11/17 11:09, Dominik Inführ wrote:
Oh sure, I've now successfully bootstrapped on arm-linux-gnueabihf and aarch64-unknown-linux-gnu.

Dominik

Thanks Dominik,

This is ok for trunk.

Kyrill

On 10 Nov 2017, at 10:53, Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> wrote:

Hi Dominic,

On 10/11/17 09:36, Dominik Inführ wrote:
Hi,

this patch tries to refine the instruction scheduling model for X-Gene. Improved performance for 456.hmmer and 464.h264ref (about 1%). Also splits the model into multiple automatons, therefore smaller binary and faster build time. Survives bootstrap.

Best,
Dominik
The changes look ok to me, but as the description is shared between the arm and aarch64 ports can you please also do a sanity check
by building (and preferably bootstrapping) an arm compiler?

Thanks,
Kyrill

gcc/ChangeLog:
2017-10-09  Dominik Infuehr <dominik.infuehr@theobroma-systems.com>

	* config/arm/xgene1.md (xgene1): Split into automatons
	xgene1_main, xgene1_decoder, xgene1_div, xgene1_simd.
	(xgene1_f_load): Adjust reservations and/or types.
	(xgene1_f_store): Likewise.
	(xgene1_load_pair): Likewise.
	(xgene1_store_pair): Likewise.
	(xgene1_fp_load1): Likewise.
	(xgene1_load1): Likewise.
	(xgene1_store1): Likewise.
	(xgene1_move): Likewise.
	(xgene1_alu): Likewise.
	(xgene1_simd): Likewise.
	(xgene1_bfm): Likewise.
	(xgene1_neon_load1): Likewise.
	(xgene1_neon_store1): Likewise.
	(xgene1_neon_logic): Likewise.
	(xgene1_neon_st1): Likewise.
	(xgene1_neon_ld1r): Likewise.
	(xgene1_alu_cond): Added.
	(xgene1_shift_reg): Likwise.
	(xgene1_bfx): Likewise.
	(xgene1_mul): Split into xgene1_mul32, xgene1_mul64.

—
diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
index c4b3773..cf0694a 100644
--- a/gcc/config/arm/xgene1.md
+++ b/gcc/config/arm/xgene1.md
@@ -20,17 +20,26 @@

  ;; Pipeline description for the xgene1 micro-architecture

-(define_automaton "xgene1")
+(define_automaton "xgene1_main, xgene1_decoder, xgene1_div, xgene1_simd")

-(define_cpu_unit "xgene1_decode_out0" "xgene1")
-(define_cpu_unit "xgene1_decode_out1" "xgene1")
-(define_cpu_unit "xgene1_decode_out2" "xgene1")
-(define_cpu_unit "xgene1_decode_out3" "xgene1")
+(define_cpu_unit "xgene1_decode_out0" "xgene1_decoder")
+(define_cpu_unit "xgene1_decode_out1" "xgene1_decoder")
+(define_cpu_unit "xgene1_decode_out2" "xgene1_decoder")
+(define_cpu_unit "xgene1_decode_out3" "xgene1_decoder")

-(define_cpu_unit "xgene1_divide" "xgene1")
-(define_cpu_unit "xgene1_fp_divide" "xgene1")
-(define_cpu_unit "xgene1_fsu" "xgene1")
-(define_cpu_unit "xgene1_fcmp" "xgene1")
+(define_cpu_unit "xgene1_IXA" "xgene1_main")
+(define_cpu_unit "xgene1_IXB" "xgene1_main")
+(define_cpu_unit "xgene1_IXB_compl" "xgene1_main")
+
+(define_reservation "xgene1_IXn" "(xgene1_IXA | xgene1_IXB)")
+
+(define_cpu_unit "xgene1_multiply" "xgene1_main")
+(define_cpu_unit "xgene1_divide" "xgene1_div")
+(define_cpu_unit "xgene1_fp_divide" "xgene1_div")
+(define_cpu_unit "xgene1_fsu" "xgene1_simd")
+(define_cpu_unit "xgene1_fcmp" "xgene1_simd")
+(define_cpu_unit "xgene1_ld" "xgene1_main")
+(define_cpu_unit "xgene1_st" "xgene1_main")

  (define_reservation "xgene1_decode1op"
          "( xgene1_decode_out0 )
@@ -68,12 +77,12 @@
  (define_insn_reservation "xgene1_f_load" 10
    (and (eq_attr "tune" "xgene1")
         (eq_attr "type" "f_loadd,f_loads"))
-  "xgene1_decode2op")
+  "xgene1_decode2op, xgene1_ld")

  (define_insn_reservation "xgene1_f_store" 4
    (and (eq_attr "tune" "xgene1")
         (eq_attr "type" "f_stored,f_stores"))
-  "xgene1_decode2op")
+  "xgene1_decode2op, xgene1_st")

  (define_insn_reservation "xgene1_fmov" 2
    (and (eq_attr "tune" "xgene1")
@@ -92,85 +101,108 @@

  (define_insn_reservation "xgene1_load_pair" 6
    (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "load_8, load_16"))
-  "xgene1_decodeIsolated")
+       (eq_attr "type" "load_16"))
+  "xgene1_decodeIsolated, xgene1_ld*2")

  (define_insn_reservation "xgene1_store_pair" 2
    (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "store_8, store_16"))
-  "xgene1_decodeIsolated")
+       (eq_attr "type" "store_16"))
+  "xgene1_decodeIsolated, xgene1_st*2")

  (define_insn_reservation "xgene1_fp_load1" 10
    (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "load_4")
+       (eq_attr "type" "load_4, load_8")
         (eq_attr "fp" "yes"))
-  "xgene1_decode1op")
+  "xgene1_decode1op, xgene1_ld")

  (define_insn_reservation "xgene1_load1" 5
    (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "load_4"))
-  "xgene1_decode1op")
+       (eq_attr "type" "load_4, load_8"))
+  "xgene1_decode1op, xgene1_ld")

-(define_insn_reservation "xgene1_store1" 2
+(define_insn_reservation "xgene1_store1" 1
    (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "store_4"))
-  "xgene1_decode2op")
+       (eq_attr "type" "store_4, store_8"))
+  "xgene1_decode1op, xgene1_st")

  (define_insn_reservation "xgene1_move" 1
    (and (eq_attr "tune" "xgene1")
         (eq_attr "type" "mov_reg,mov_imm,mrs"))
-  "xgene1_decode1op")
+  "xgene1_decode1op, xgene1_IXn")
+
+(define_insn_reservation "xgene1_alu_cond" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "csel"))
+  "xgene1_decode1op, xgene1_IXn")

  (define_insn_reservation "xgene1_alu" 1
    (and (eq_attr "tune" "xgene1")
         (eq_attr "type" "alu_imm,alu_sreg,alu_shift_imm,\
-                        alu_ext,adc_reg,csel,logic_imm,\
+                        alu_ext,adc_reg,logic_imm,\
                          logic_reg,logic_shift_imm,clz,\
-                        rbit,shift_reg,adr,mov_reg,\
-                        mov_imm,extend"))
-  "xgene1_decode1op")
+                        rbit,adr,mov_reg,shift_imm,\
+                        mov_imm,extend,multiple"))
+  "xgene1_decode1op, xgene1_IXn")
+
+(define_insn_reservation "xgene1_shift_rotate" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "shift_reg"))
+  "xgene1_decode1op, xgene1_IXB, xgene1_IXB_compl")

-(define_insn_reservation "xgene1_simd" 1
+(define_insn_reservation "xgene1_simd" 2
    (and (eq_attr "tune" "xgene1")
         (eq_attr "type" "rev"))
-  "xgene1_decode1op")
+  "xgene1_decode1op, xgene1_IXB, xgene1_IXB_compl")

  (define_insn_reservation "xgene1_alus" 1
    (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "alus_imm,alu_sreg,alus_shift_imm,\
+       (eq_attr "type" "alus_imm,alus_sreg,alus_shift_imm,\
                          alus_ext,logics_imm,logics_reg,\
                          logics_shift_imm"))
-  "xgene1_decode1op")
+  "xgene1_decode1op, xgene1_IXB, xgene1_IXB_compl")
+
+(define_bypass 2 "xgene1_alus"
+  "xgene1_alu_cond, xgene1_branch")

-(define_insn_reservation "xgene1_mul" 6
+(define_insn_reservation "xgene1_mul32" 4
    (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "mul,mla,smull,umull,smlal,umlal"))
-  "xgene1_decode2op")
+       (eq_attr "mul32" "yes"))
+  "xgene1_decode2op, xgene1_IXB + xgene1_multiply, xgene1_multiply, nothing, xgene1_IXB_compl")
+
+(define_insn_reservation "xgene1_mul64" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "mul64" "yes"))
+  "xgene1_decode2op, xgene1_IXB + xgene1_multiply, xgene1_multiply, nothing*2, xgene1_IXB_compl")

  (define_insn_reservation "xgene1_div" 34
    (and (eq_attr "tune" "xgene1")
         (eq_attr "type" "sdiv,udiv"))
-  "xgene1_decode1op,xgene1_divide*7")
+  "xgene1_decode1op, xgene1_IXB + xgene1_divide*7")

  (define_insn_reservation "xgene1_fcmp" 10
    (and (eq_attr "tune" "xgene1")
         (eq_attr "type" "fcmpd,fcmps,fccmpd,fccmps"))
-  "xgene1_decode1op,xgene1_fsu+xgene1_fcmp*3")
+  "xgene1_decode1op, xgene1_fsu + xgene1_fcmp*3")

  (define_insn_reservation "xgene1_fcsel" 3
    (and (eq_attr "tune" "xgene1")
         (eq_attr "type" "fcsel"))
-  "xgene1_decode1op,xgene1_fsu")
+  "xgene1_decode1op, xgene1_fsu")
+
+(define_insn_reservation "xgene1_bfx" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "bfx"))
+  "xgene1_decode1op, xgene1_IXn")

  (define_insn_reservation "xgene1_bfm" 2
    (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "bfm,bfx"))
-  "xgene1_decode1op,xgene1_fsu")
+       (eq_attr "type" "bfm"))
+  "xgene1_decode1op, xgene1_IXB, xgene1_IXB_compl")

  (define_insn_reservation "xgene1_f_rint" 5
    (and (eq_attr "tune" "xgene1")
         (eq_attr "type" "f_rintd,f_rints"))
-  "xgene1_decode1op,xgene1_fsu")
+  "xgene1_decode1op, xgene1_fsu")

  (define_insn_reservation "xgene1_f_cvt" 3
    (and (eq_attr "tune" "xgene1")
@@ -225,12 +257,12 @@
  (define_insn_reservation "xgene1_neon_load1" 11
    (and (eq_attr "tune" "xgene1")
         (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q"))
-  "xgene1_decode2op,xgene1_fsu")
+  "xgene1_decode2op, xgene1_ld")

  (define_insn_reservation "xgene1_neon_store1" 5
    (and (eq_attr "tune" "xgene1")
         (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q"))
-  "xgene1_decode2op,xgene1_fsu")
+  "xgene1_decode2op, xgene1_st")

  (define_insn_reservation "xgene1_neon_logic" 2
    (and (eq_attr "tune" "xgene1")
@@ -300,6 +332,8 @@
                          neon_compare_zero_q,\
                          neon_tst,\
                          neon_tst_q,\
+                        neon_minmax,\
+                        neon_minmax_q,\
                         "))
    "xgene1_decode1op,xgene1_fsu")

@@ -439,8 +473,10 @@
    (and (eq_attr "tune" "xgene1")
         (eq_attr "type" "neon_store1_one_lane,\
                          neon_store1_one_lane_q,\
+                        neon_stp,\
+                        neon_stp_q,\
                         "))
-  "xgene1_decode1op")
+  "xgene1_decodeIsolated, xgene1_st")

  (define_insn_reservation "xgene1_neon_halve_narrow" 6
    (and (eq_attr "tune" "xgene1")
@@ -499,7 +535,7 @@
    (and (eq_attr "tune" "xgene1")
         (eq_attr "type" "neon_load1_all_lanes,\
                         "))
-  "xgene1_decode1op")
+  "xgene1_decode1op, xgene1_ld")

  (define_insn_reservation "xgene1_neon_fp_recp" 3
    (and (eq_attr "tune" "xgene1")



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]