[rtlopt] model reorder buffer for Athlon and other improvements
Jan Hubicka
jh@suse.cz
Sun Dec 29 02:23:00 GMT 2002
Sun Dec 29 11:17:10 CET 2002 Jan Hubicka <jh@suse.cz>
* athlon.md: Reorganize to match better reality.
* i386.md (ix86_issue_rate): Set to 6 for Athlon and K8.
(ix86_sched_init): Issue the initialization insn.
(ia32_multipass_dfa_lookahead): Set to 6 for athlon/k8.
* i386.md (type attribute): add ssecomi and initpipe
(initpipe): New insn.
(comi patterns): Update type.
Index: config/i386/athlon.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/athlon.md,v
retrieving revision 1.1.14.4
diff -c -3 -p -r1.1.14.4 athlon.md
*** config/i386/athlon.md 23 Dec 2002 13:55:38 -0000 1.1.14.4
--- config/i386/athlon.md 29 Dec 2002 10:16:35 -0000
***************
*** 43,53 ****
;; \ / \ | / fadd fmul fstore
;; imul load/store (2x) fadd fmul fstore
! (define_automaton "athlon,athlon_load,athlon_mult,athlon_fp")
! (define_cpu_unit "athlon-decode0" "athlon")
! (define_cpu_unit "athlon-decode1" "athlon")
! (define_cpu_unit "athlon-decode2" "athlon")
! (define_cpu_unit "athlon-decodev" "athlon")
;; Model the fact that double decoded instruction may take 2 cycles
;; to decode when decoder2 and decoder0 in next cycle
;; is used (this is needed to allow troughput of 1.5 double decoded
--- 43,63 ----
;; \ / \ | / fadd fmul fstore
;; imul load/store (2x) fadd fmul fstore
! (define_automaton "athlon_agu,athlon_decode,athlon_load,athlon_mult,athlon_fp,athlon_ieu")
!
! ;; Model of decoder includes on-chip scheduler simplified into FIFO, so the
! ;; scheduler can see partly the independency between decoding and instruction
! ;; issue. The cycle0 of modeled pipe is not the cycle when instruction is
! ;; decoded, but it is the time when instruction is issued minus 4.
! ;; Decoding happends at earliest available stage of the 4 cycle queue.
! ;;
! ;; The model depends heavily on the fact that genautomata attempts to apply
! ;; the allocation in deterministic automata in the order they are written in
! ;; machine description.
!
! (define_cpu_unit "athlon-decode0" "athlon_decode")
! (define_cpu_unit "athlon-decode1" "athlon_decode")
! (define_cpu_unit "athlon-decode2" "athlon_decode")
;; Model the fact that double decoded instruction may take 2 cycles
;; to decode when decoder2 and decoder0 in next cycle
;; is used (this is needed to allow troughput of 1.5 double decoded
***************
*** 62,103 ****
;; too. Vector decoded instructions then can't be issued when
;; modeled as consuming decoder0+decoder1+decoder2.
;; We solve that by specialized vector decoder unit and exclusion set.
! (presence_set "athlon-decode2" "athlon-decode0")
! (exclusion_set "athlon-decodev" "athlon-decode0,athlon-decode1,athlon-decode2")
! (define_reservation "athlon-vector" "nothing,athlon-decodev")
! (define_reservation "athlon-direct0" "nothing,athlon-decode0")
! (define_reservation "athlon-direct" "nothing,
! (athlon-decode0 | athlon-decode1
! | athlon-decode2)")
;; Double instructions behaves like two direct instructions.
! (define_reservation "athlon-double" "((athlon-decode2, athlon-decode0)
! | (nothing,(athlon-decode0 + athlon-decode1))
! | (nothing,(athlon-decode1 + athlon-decode2)))")
!
! ;; Agu and ieu unit results in extremly large automatons and
! ;; in our approximation they are hardly filled in. Only ieu
! ;; unit can, as issue rate is 3 and agu unit is always used
! ;; first in the insn reservations. Skip the models.
!
! ;(define_cpu_unit "athlon-ieu0" "athlon_ieu")
! ;(define_cpu_unit "athlon-ieu1" "athlon_ieu")
! ;(define_cpu_unit "athlon-ieu2" "athlon_ieu")
! ;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)")
! (define_reservation "athlon-ieu" "nothing")
! (define_cpu_unit "athlon-ieu0" "athlon")
! ;(define_cpu_unit "athlon-agu0" "athlon_agu")
! ;(define_cpu_unit "athlon-agu1" "athlon_agu")
! ;(define_cpu_unit "athlon-agu2" "athlon_agu")
! ;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)")
! (define_reservation "athlon-agu" "nothing,nothing")
(define_cpu_unit "athlon-mult" "athlon_mult")
(define_cpu_unit "athlon-load0" "athlon_load")
(define_cpu_unit "athlon-load1" "athlon_load")
(define_reservation "athlon-load" "athlon-agu,
! (athlon-load0 | athlon-load1)")
! (define_reservation "athlon-store" "nothing")
;; The three fp units are fully pipelined with latency of 3
(define_cpu_unit "athlon-fadd" "athlon_fp")
--- 72,145 ----
;; too. Vector decoded instructions then can't be issued when
;; modeled as consuming decoder0+decoder1+decoder2.
;; We solve that by specialized vector decoder unit and exclusion set.
!
! ;; Eat the free decoders in current cycle. Used before vector decoded
! ;; instructions to avoid later instructions from being decoded in the
! ;; left out decoder from current cycle.
! (define_reservation "athlon-decodebubble" "(athlon-decode1+athlon-decode2)|athlon-decode2|nothing")
!
! ;; Vector decoder is modelled as consuming all three units together.
! (define_reservation "athlon-decodev" "athlon-decode0+athlon-decode1+athlon-decode2")
!
! ;; Issue vector/direct/double instruction in the current cycle.
! (define_reservation "athlon-vectornow" "athlon-decodebubble,athlon-decodev")
! (define_reservation "athlon-direct0now" "athlon-decodebubble,athlon-decode0")
! (define_reservation "athlon-directnow" "nothing,
! (athlon-decode0 | athlon-decode1
! | athlon-decode2)")
;; Double instructions behaves like two direct instructions.
! (define_reservation "athlon-doublenow" "((athlon-decode2, athlon-decode0)
! | (nothing,(athlon-decode0 + athlon-decode1))
! | (nothing,(athlon-decode1 + athlon-decode2)))")
! (define_reservation "athlon-vector" "((athlon-vectornow,nothing,nothing,nothing)
! | (nothing,athlon-vectornow,nothing,nothing)
! | (nothing,nothing,athlon-vectornow,nothing)
! | (nothing,nothing,nothing,athlon-vectornow))")
! ;; Call is vector decoded insn, but the real effect is seen after returning
! ;; from the call. Pesimistically expect the FIFO to be empty.
! (define_reservation "athlon-vectorcall" "((athlon-vectornow,athlon-decodev,athlon-decodev,athlon-decodev)
! | (nothing,athlon-vectornow,athlon-decodev,athlon-decodev)
! | (nothing,nothing,athlon-vectornow,athlon-decodev)
! | (nothing,nothing,nothing,athlon-vectornow))")
! (define_reservation "athlon-direct0" "((athlon-direct0now,nothing,nothing,nothing)
! | (nothing,athlon-direct0now,nothing,nothing)
! | (nothing,nothing,athlon-direct0now,nothing)
! | (nothing,nothing,nothing,athlon-direct0now))")
! (define_reservation "athlon-direct" "((athlon-directnow,nothing,nothing,nothing)
! | (nothing,athlon-directnow,nothing,nothing)
! | (nothing,nothing,athlon-directnow,nothing)
! | (nothing,nothing,nothing,athlon-directnow))")
! (define_reservation "athlon-double" "((athlon-doublenow,nothing,nothing,nothing)
! | (nothing,athlon-doublenow,nothing,nothing)
! | (nothing,nothing,athlon-doublenow,nothing)
! | (nothing,nothing,nothing,athlon-doublenow))")
! (define_reservation "athlon-initialdecoder" "(athlon-decodev,athlon-decodev,athlon-decodev,athlon-decodev,athlon-decodev)")
!
! (define_cpu_unit "athlon-ieu0" "athlon_ieu")
! (define_cpu_unit "athlon-ieu1" "athlon_ieu")
! (define_cpu_unit "athlon-ieu2" "athlon_ieu")
! (define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)")
! ;(define_reservation "athlon-ieu" "nothing")
! ;(define_cpu_unit "athlon-ieu0" "athlon")
! (define_cpu_unit "athlon-agu0" "athlon_agu")
! (define_cpu_unit "athlon-agu1" "athlon_agu")
! (define_cpu_unit "athlon-agu2" "athlon_agu")
! (define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)")
! ;(define_reservation "athlon-agu" "nothing,nothing")
(define_cpu_unit "athlon-mult" "athlon_mult")
(define_cpu_unit "athlon-load0" "athlon_load")
(define_cpu_unit "athlon-load1" "athlon_load")
(define_reservation "athlon-load" "athlon-agu,
! (athlon-load0 | (athlon-load1)),nothing")
! (define_reservation "athlon-doubleload" "(athlon-load0 + athlon-load1)")
! (define_reservation "athlon-orderingload" "athlon-agu,
! ((athlon-load0 + athlon-load1) | (athlon-load1)),athlon-doubleload")
! ;; Stores occupy same slots in the queue as loads, so probably 2 loads can't
! ;; happen in parallel with 2 stores.
! (define_reservation "athlon-store" "((athlon-load0) | athlon-load1)")
! (define_reservation "athlon-fpustore" "((athlon-agu,athlon-store)+athlon-fstore)")
;; The three fp units are fully pipelined with latency of 3
(define_cpu_unit "athlon-fadd" "athlon_fp")
***************
*** 107,136 ****
(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)")
;; Jump instructions are executed in the branch unit compltetely transparent to us
(define_insn_reservation "athlon_branch" 0
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "ibr"))
! "athlon-direct")
! (define_insn_reservation "athlon_call" 0
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "call,callv"))
! "athlon-vector")
;; Latency of push operation is 3 cycles, but ESP value is available
;; earlier
(define_insn_reservation "athlon_push" 2
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "push"))
! "athlon-direct,nothing,athlon-store")
(define_insn_reservation "athlon_pop" 4
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "pop"))
! "athlon-vector,athlon-ieu,athlon-load")
(define_insn_reservation "athlon_pop_k8" 3
(and (eq_attr "cpu" "k8")
(eq_attr "type" "pop"))
! "athlon-double,athlon-ieu,athlon-load")
(define_insn_reservation "athlon_leave" 3
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "leave"))
--- 149,184 ----
(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)")
+ ;; Instruction to bring decoder state into the "completely empty" state at
+ ;; the beggining of basic block.
+ (define_insn_reservation "athlon_init" 0
+ (and (eq_attr "cpu" "athlon,k8")
+ (eq_attr "type" "initpipe"))
+ "athlon-initialdecoder")
;; Jump instructions are executed in the branch unit compltetely transparent to us
(define_insn_reservation "athlon_branch" 0
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "ibr"))
! "athlon-direct,athlon-ieu")
! (define_insn_reservation "athlon_call" 2
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "call,callv"))
! "athlon-vectorcall,athlon-ieu")
;; Latency of push operation is 3 cycles, but ESP value is available
;; earlier
(define_insn_reservation "athlon_push" 2
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "push"))
! "athlon-direct,athlon-agu,athlon-store")
(define_insn_reservation "athlon_pop" 4
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "pop"))
! "athlon-vector,athlon-load,athlon-ieu")
(define_insn_reservation "athlon_pop_k8" 3
(and (eq_attr "cpu" "k8")
(eq_attr "type" "pop"))
! "athlon-double,(athlon-ieu+athlon-load)")
(define_insn_reservation "athlon_leave" 3
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "leave"))
***************
*** 144,185 ****
(define_insn_reservation "athlon_lea" 2
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "lea"))
! "athlon-direct,athlon-agu")
;; Mul executes in special multiplier unit attached to IEU0
(define_insn_reservation "athlon_imul" 5
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "imul")
(eq_attr "memory" "none,unknown")))
! "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
;; ??? Widening multiply is vector or double.
(define_insn_reservation "athlon_imul_k8_DI" 4
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "imul")
(and (eq_attr "mode" "DI")
(eq_attr "memory" "none,unknown"))))
! "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
(define_insn_reservation "athlon_imul_k8" 3
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "imul")
(eq_attr "memory" "none,unknown")))
! "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
(define_insn_reservation "athlon_imul_mem" 8
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "imul")
(eq_attr "memory" "load,both")))
! "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
(define_insn_reservation "athlon_imul_mem_k8_DI" 7
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "imul")
(and (eq_attr "mode" "DI")
(eq_attr "memory" "load,both"))))
! "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu")
(define_insn_reservation "athlon_imul_mem_k8" 6
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "imul")
(eq_attr "memory" "load,both")))
! "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
;; Idiv can not execute in parallel with other instructions. Dealing with it
;; as with short latency vector instruction is good approximation avoiding
--- 192,233 ----
(define_insn_reservation "athlon_lea" 2
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "lea"))
! "athlon-direct,athlon-agu,nothing")
;; Mul executes in special multiplier unit attached to IEU0
(define_insn_reservation "athlon_imul" 5
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "imul")
(eq_attr "memory" "none,unknown")))
! "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0+athlon-ieu1")
;; ??? Widening multiply is vector or double.
(define_insn_reservation "athlon_imul_k8_DI" 4
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "imul")
(and (eq_attr "mode" "DI")
(eq_attr "memory" "none,unknown"))))
! "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0+athlon-ieu1")
(define_insn_reservation "athlon_imul_k8" 3
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "imul")
(eq_attr "memory" "none,unknown")))
! "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0+athlon-ieu1")
(define_insn_reservation "athlon_imul_mem" 8
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "imul")
(eq_attr "memory" "load,both")))
! "athlon-vector,athlon-load,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0+athlon-ieu1")
(define_insn_reservation "athlon_imul_mem_k8_DI" 7
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "imul")
(and (eq_attr "mode" "DI")
(eq_attr "memory" "load,both"))))
! "athlon-vector,athlon-load,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
(define_insn_reservation "athlon_imul_mem_k8" 6
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "imul")
(eq_attr "memory" "load,both")))
! "athlon-vector,athlon-load,athlon-ieu0,athlon-mult,athlon-ieu0")
;; Idiv can not execute in parallel with other instructions. Dealing with it
;; as with short latency vector instruction is good approximation avoiding
***************
*** 192,210 ****
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "idiv")
(eq_attr "memory" "none,unknown")))
! "athlon-vector,athlon-ieu*6")
(define_insn_reservation "athlon_idiv_mem" 9
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "idiv")
(eq_attr "memory" "load,both")))
! "athlon-vector,athlon-load,athlon-ieu*6")
;; The paralelism of string instructions is not documented. Model it same way
;; as idiv to create smaller automata. This probably does not matter much.
(define_insn_reservation "athlon_str" 6
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "str")
(eq_attr "memory" "load,both,store")))
! "athlon-vector,athlon-load,athlon-ieu*6")
(define_insn_reservation "athlon_idirect" 1
(and (eq_attr "cpu" "athlon,k8")
--- 240,258 ----
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "idiv")
(eq_attr "memory" "none,unknown")))
! "athlon-vector,athlon-ieu0*6")
(define_insn_reservation "athlon_idiv_mem" 9
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "idiv")
(eq_attr "memory" "load,both")))
! "athlon-vector,athlon-load,athlon-ieu0*6")
;; The paralelism of string instructions is not documented. Model it same way
;; as idiv to create smaller automata. This probably does not matter much.
(define_insn_reservation "athlon_str" 6
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "str")
(eq_attr "memory" "load,both,store")))
! "athlon-vector,athlon-load,athlon-ieu0*6")
(define_insn_reservation "athlon_idirect" 1
(and (eq_attr "cpu" "athlon,k8")
***************
*** 245,272 ****
(and (eq_attr "athlon_decode" "direct")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "both"))))
! "athlon-direct,athlon-load,athlon-ieu,
athlon-store")
(define_insn_reservation "athlon_ivector_both" 6
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "athlon_decode" "vector")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "both"))))
! "athlon-vector,athlon-load,athlon-ieu,athlon-ieu,
athlon-store")
(define_insn_reservation "athlon_idirect_store" 1
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "athlon_decode" "direct")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "store"))))
! "athlon-direct,athlon-ieu,
athlon-store")
(define_insn_reservation "athlon_ivector_store" 2
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "athlon_decode" "vector")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "store"))))
! "athlon-vector,athlon-ieu,athlon-ieu,
athlon-store")
;; Athlon floatin point unit
--- 293,320 ----
(and (eq_attr "athlon_decode" "direct")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "both"))))
! "athlon-direct,athlon-orderingload,athlon-ieu+athlon-doubleload,
athlon-store")
(define_insn_reservation "athlon_ivector_both" 6
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "athlon_decode" "vector")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "both"))))
! "athlon-vector,athlon-orderingload,athlon-ieu+athlon-doubleload,athlon-ieu+athlon-doubleload,
athlon-store")
(define_insn_reservation "athlon_idirect_store" 1
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "athlon_decode" "direct")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "store"))))
! "athlon-direct,(athlon-ieu+athlon-agu),
athlon-store")
(define_insn_reservation "athlon_ivector_store" 2
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "athlon_decode" "vector")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "store"))))
! "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu+athlon-doubleload,
athlon-store")
;; Athlon floatin point unit
***************
*** 275,323 ****
(and (eq_attr "type" "fmov")
(and (eq_attr "memory" "load")
(eq_attr "mode" "XF"))))
! "athlon-vector,athlon-fany")
(define_insn_reservation "athlon_fldxf_k8" 13
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "fmov")
(and (eq_attr "memory" "load")
(eq_attr "mode" "XF"))))
! "athlon-vector,athlon-fany")
(define_insn_reservation "athlon_fld" 6
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "load")))
! "athlon-direct,athlon-fany,nothing,athlon-load")
(define_insn_reservation "athlon_fld_k8" 4
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "load")))
! "athlon-direct,athlon-fany,athlon-load")
(define_insn_reservation "athlon_fstxf" 10
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fmov")
(and (eq_attr "memory" "store,both")
(eq_attr "mode" "XF"))))
! "athlon-vector,athlon-fstore")
(define_insn_reservation "athlon_fstxf_k8" 8
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "fmov")
(and (eq_attr "memory" "store,both")
(eq_attr "mode" "XF"))))
! "athlon-vector,athlon-fstore")
(define_insn_reservation "athlon_fst" 4
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "store,both")))
! "athlon-direct,athlon-fstore,nothing,athlon-store")
(define_insn_reservation "athlon_fst_k8" 2
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "store,both")))
! "athlon-direct,athlon-fstore,athlon-store")
(define_insn_reservation "athlon_fist" 4
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "fistp"))
! "athlon-direct,athlon-fstore,nothing")
(define_insn_reservation "athlon_fmov" 2
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "fmov"))
--- 323,371 ----
(and (eq_attr "type" "fmov")
(and (eq_attr "memory" "load")
(eq_attr "mode" "XF"))))
! "athlon-vector,athlon-load,athlon-fany")
(define_insn_reservation "athlon_fldxf_k8" 13
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "fmov")
(and (eq_attr "memory" "load")
(eq_attr "mode" "XF"))))
! "athlon-vector,athlon-load,athlon-fany")
(define_insn_reservation "athlon_fld" 6
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "load")))
! "athlon-direct,athlon-load,athlon-fany")
(define_insn_reservation "athlon_fld_k8" 4
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "load")))
! "athlon-direct,athlon-load,athlon-fany")
(define_insn_reservation "athlon_fstxf" 10
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fmov")
(and (eq_attr "memory" "store,both")
(eq_attr "mode" "XF"))))
! "athlon-vector,athlon-fpustore")
(define_insn_reservation "athlon_fstxf_k8" 8
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "fmov")
(and (eq_attr "memory" "store,both")
(eq_attr "mode" "XF"))))
! "athlon-vector,athlon-fpustore")
(define_insn_reservation "athlon_fst" 4
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "store,both")))
! "athlon-direct,athlon-fpustore")
(define_insn_reservation "athlon_fst_k8" 2
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "store,both")))
! "athlon-direct,athlon-fpustore")
(define_insn_reservation "athlon_fist" 4
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "fistp"))
! "athlon-direct,athlon-fpustore")
(define_insn_reservation "athlon_fmov" 2
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "fmov"))
***************
*** 419,467 ****
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "fcmp"))
"athlon-direct,athlon-fadd")
! (define_insn_reservation "athlon_fxch" 2
! (and (eq_attr "cpu" "athlon,k8")
! (eq_attr "type" "fxch"))
! "athlon-direct,athlon-fany")
;; Athlon handle MMX operations in the FPU unit with shorter latencies
(define_insn_reservation "athlon_movlpd_load" 4
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "ssemov")
(match_operand:DF 1 "memory_operand" "")))
! "athlon-direct,athlon-load")
(define_insn_reservation "athlon_movaps_load" 4
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "ssemov")
(and (eq_attr "mode" "V4SF,V2DF,TI")
(eq_attr "memory" "load"))))
! "athlon-double,athlon-load")
(define_insn_reservation "athlon_movss_load" 3
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "ssemov")
(and (eq_attr "mode" "SF,DI")
(eq_attr "memory" "load"))))
! "athlon-double,athlon-load")
(define_insn_reservation "athlon_mmxsseld" 4
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "mmxmov,ssemov")
(eq_attr "memory" "load")))
! "athlon-direct,athlon-fany,athlon-load")
(define_insn_reservation "athlon_mmxssest" 3
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "mmxmov,ssemov")
(and (eq_attr "mode" "V4SF,V2DF,TI")
(eq_attr "memory" "store,both"))))
! "athlon-double,athlon-store")
(define_insn_reservation "athlon_mmxssest_k8" 2
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "mmxmov,ssemov")
(eq_attr "memory" "store,both")))
! "athlon-direct,athlon-store")
(define_insn_reservation "athlon_movaps" 2
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "ssemov")
(eq_attr "mode" "V4SF,V2DF")))
! "athlon-double,athlon-faddmul,athlon-faddmul")
(define_insn_reservation "athlon_mmxssemov" 2
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "mmxmov,ssemov"))
--- 467,517 ----
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "fcmp"))
"athlon-direct,athlon-fadd")
! ;; Never seen by the scheduler.
! ;;(define_insn_reservation "athlon_fxch" 2
! ;; (and (eq_attr "cpu" "athlon,k8")
! ;; (eq_attr "type" "fxch"))
! ;; "athlon-direct,athlon-fany")
! ;;
;; Athlon handle MMX operations in the FPU unit with shorter latencies
(define_insn_reservation "athlon_movlpd_load" 4
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "ssemov")
(match_operand:DF 1 "memory_operand" "")))
! "athlon-direct,athlon-load,athlon-fany")
(define_insn_reservation "athlon_movaps_load" 4
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "ssemov")
(and (eq_attr "mode" "V4SF,V2DF,TI")
(eq_attr "memory" "load"))))
! "athlon-double,athlon-load,athlon-fany")
(define_insn_reservation "athlon_movss_load" 3
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "ssemov")
(and (eq_attr "mode" "SF,DI")
(eq_attr "memory" "load"))))
! "athlon-double,athlon-load,athlon-fany")
(define_insn_reservation "athlon_mmxsseld" 4
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "mmxmov,ssemov")
(eq_attr "memory" "load")))
! "athlon-direct,athlon-load,athlon-fany")
(define_insn_reservation "athlon_mmxssest" 3
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "mmxmov,ssemov")
(and (eq_attr "mode" "V4SF,V2DF,TI")
(eq_attr "memory" "store,both"))))
! "athlon-double,athlon-fpustore")
(define_insn_reservation "athlon_mmxssest_k8" 2
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "mmxmov,ssemov")
(eq_attr "memory" "store,both")))
! "athlon-direct,athlon-fpustore")
(define_insn_reservation "athlon_movaps" 2
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "ssemov")
(eq_attr "mode" "V4SF,V2DF")))
! "athlon-double,(athlon-faddmul+athlon-faddmul)")
(define_insn_reservation "athlon_mmxssemov" 2
(and (eq_attr "cpu" "athlon,k8")
(eq_attr "type" "mmxmov,ssemov"))
***************
*** 504,515 ****
(and (eq_attr "cpu" "k8")
(eq_attr "type" "sselog"))
"athlon-double,athlon-fmul")
! (define_insn_reservation "athlon_ssecmp_load" 5
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "ssecmp")
(and (eq_attr "mode" "SF,DF")
(eq_attr "memory" "load"))))
! "athlon-vector,athlon-load,athlon-fadd")
(define_insn_reservation "athlon_ssecmp" 2
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "ssecmp")
--- 554,574 ----
(and (eq_attr "cpu" "k8")
(eq_attr "type" "sselog"))
"athlon-double,athlon-fmul")
! (define_insn_reservation "athlon_ssecomi_load" 5
! (and (eq_attr "cpu" "athlon,k8")
! (and (eq_attr "type" "ssecomi")
! (eq_attr "memory" "load")))
! "athlon-vector,athlon-load,athlon-fadd")
! (define_insn_reservation "athlon_ssecomi" 3
! (and (eq_attr "cpu" "athlon,k8")
! (eq_attr "type" "ssecomi"))
! "athlon-vector,athlon-fadd")
! (define_insn_reservation "athlon_ssecmp_load" 4
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "ssecmp")
(and (eq_attr "mode" "SF,DF")
(eq_attr "memory" "load"))))
! "athlon-direct,athlon-load,athlon-fadd")
(define_insn_reservation "athlon_ssecmp" 2
(and (eq_attr "cpu" "athlon,k8")
(and (eq_attr "type" "ssecmp")
***************
*** 519,530 ****
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "ssecmp")
(eq_attr "memory" "load")))
! "athlon-vector,athlon-fadd")
(define_insn_reservation "athlon_ssecmpvector_load_k8" 5
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "ssecmp")
(eq_attr "memory" "load")))
! "athlon-double,athlon-fadd")
(define_insn_reservation "athlon_ssecmpvector" 3
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "ssecmp"))
--- 578,589 ----
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "ssecmp")
(eq_attr "memory" "load")))
! "athlon-vector,athlon-load,athlon-fadd")
(define_insn_reservation "athlon_ssecmpvector_load_k8" 5
(and (eq_attr "cpu" "k8")
(and (eq_attr "type" "ssecmp")
(eq_attr "memory" "load")))
! "athlon-double,athlon-load,athlon-fadd")
(define_insn_reservation "athlon_ssecmpvector" 3
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "ssecmp"))
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.446.2.10
diff -c -3 -p -r1.446.2.10 i386.c
*** config/i386/i386.c 18 Dec 2002 10:35:39 -0000 1.446.2.10
--- config/i386/i386.c 29 Dec 2002 10:16:41 -0000
*************** Boston, MA 02111-1307, USA. */
*** 45,50 ****
--- 45,51 ----
#include "target.h"
#include "target-def.h"
#include "langhooks.h"
+ #include "sched-int.h"
#ifndef CHECK_STACK_LIMIT
#define CHECK_STACK_LIMIT (-1)
*************** ix86_issue_rate ()
*** 11402,11410 ****
case PROCESSOR_PENTIUMPRO:
case PROCESSOR_PENTIUM4:
- case PROCESSOR_ATHLON:
- case PROCESSOR_K8:
return 3;
default:
return 1;
--- 11403,11412 ----
case PROCESSOR_PENTIUMPRO:
case PROCESSOR_PENTIUM4:
return 3;
+ case PROCESSOR_K8:
+ case PROCESSOR_ATHLON:
+ return 6;
default:
return 1;
*************** ix86_sched_init (dump, sched_verbose, ve
*** 11686,11691 ****
--- 11688,11699 ----
int sched_verbose ATTRIBUTE_UNUSED;
int veclen ATTRIBUTE_UNUSED;
{
+ if (TARGET_ATHLON_K8)
+ {
+ rtx insn = emit_insn (gen_init_pipe ());
+ state_transition (curr_state, insn);
+ delete_insn (insn);
+ }
memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
}
*************** ia32_multipass_dfa_lookahead ()
*** 11906,11911 ****
--- 11914,11921 ----
{
if (ix86_cpu == PROCESSOR_PENTIUM)
return 2;
+ else if (TARGET_ATHLON_K8)
+ return 6;
else
return 0;
}
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.380.2.12
diff -c -3 -p -r1.380.2.12 i386.md
*** config/i386/i386.md 18 Dec 2002 10:35:43 -0000 1.380.2.12
--- config/i386/i386.md 29 Dec 2002 10:16:50 -0000
***************
*** 146,153 ****
str,cld,
fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,
sselog,sseiadd,sseishft,sseimul,
! sse,ssemov,sseadd,ssemul,ssecmp,ssecvt,ssediv,
! mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
(const_string "other"))
;; Main data type used by the insn
--- 146,153 ----
str,cld,
fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,
sselog,sseiadd,sseishft,sseimul,
! sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssediv,
! mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft,initpipe"
(const_string "other"))
;; Main data type used by the insn
***************
*** 160,166 ****
(cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp")
(const_string "i387")
(eq_attr "type" "sselog,sseiadd,sseishft,sseimul,
! sse,ssemov,sseadd,ssemul,ssecmp,ssecvt,ssediv")
(const_string "sse")
(eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
(const_string "mmx")
--- 160,166 ----
(cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp")
(const_string "i387")
(eq_attr "type" "sselog,sseiadd,sseishft,sseimul,
! sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssediv")
(const_string "sse")
(eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
(const_string "mmx")
***************
*** 227,233 ****
(eq_attr "type"
"imovx,setcc,icmov,
sselog,sseiadd,sseishft,sseimul,
! sse,ssemov,sseadd,ssemul,ssecmp,ssecvt,ssediv,
mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
(const_int 1)
(const_int 0)))
--- 227,233 ----
(eq_attr "type"
"imovx,setcc,icmov,
sselog,sseiadd,sseishft,sseimul,
! sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssediv,
mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
(const_int 1)
(const_int 0)))
***************
*** 310,320 ****
(if_then_else (match_operand 1 "memory_operand" "")
(const_string "both")
(const_string "store"))
! (eq_attr "type" "pop,setcc")
(if_then_else (match_operand 0 "memory_operand" "")
(const_string "both")
(const_string "load"))
! (eq_attr "type" "icmp,test,ssecmp,mmxcmp,fcmp")
(if_then_else (ior (match_operand 0 "memory_operand" "")
(match_operand 1 "memory_operand" ""))
(const_string "load")
--- 310,324 ----
(if_then_else (match_operand 1 "memory_operand" "")
(const_string "both")
(const_string "store"))
! (eq_attr "type" "pop")
(if_then_else (match_operand 0 "memory_operand" "")
(const_string "both")
(const_string "load"))
! (eq_attr "type" "setcc")
! (if_then_else (match_operand 0 "memory_operand" "")
! (const_string "store")
! (const_string "none"))
! (eq_attr "type" "icmp,test,ssecmp,ssecomi,mmxcmp,fcmp")
(if_then_else (ior (match_operand 0 "memory_operand" "")
(match_operand 1 "memory_operand" ""))
(const_string "load")
***************
*** 345,351 ****
"!alu1,negnot,
imov,imovx,icmp,test,
fmov,fcmp,fsgn,
! sse,ssemov,ssecmp,ssecvt,
mmx,mmxmov,mmxcmp,mmxcvt")
(match_operand 2 "memory_operand" ""))
(const_string "load")
--- 349,355 ----
"!alu1,negnot,
imov,imovx,icmp,test,
fmov,fcmp,fsgn,
! sse,ssemov,ssecmp,ssecomi,ssecvt,
mmx,mmxmov,mmxcmp,mmxcvt")
(match_operand 2 "memory_operand" ""))
(const_string "load")
***************
*** 997,1003 ****
&& SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
&& GET_MODE (operands[0]) == GET_MODE (operands[0])"
"* return output_fp_compare (insn, operands, 1, 0);"
! [(set_attr "type" "fcmp,ssecmp")
(set (attr "mode")
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
--- 1001,1007 ----
&& SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
&& GET_MODE (operands[0]) == GET_MODE (operands[0])"
"* return output_fp_compare (insn, operands, 1, 0);"
! [(set_attr "type" "fcmp,ssecomi")
(set (attr "mode")
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
***************
*** 1011,1017 ****
"SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
&& GET_MODE (operands[0]) == GET_MODE (operands[0])"
"* return output_fp_compare (insn, operands, 1, 0);"
! [(set_attr "type" "ssecmp")
(set (attr "mode")
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
--- 1015,1021 ----
"SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
&& GET_MODE (operands[0]) == GET_MODE (operands[0])"
"* return output_fp_compare (insn, operands, 1, 0);"
! [(set_attr "type" "ssecomi")
(set (attr "mode")
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
***************
*** 1045,1051 ****
&& SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
&& GET_MODE (operands[0]) == GET_MODE (operands[1])"
"* return output_fp_compare (insn, operands, 1, 1);"
! [(set_attr "type" "fcmp,ssecmp")
(set (attr "mode")
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
--- 1049,1055 ----
&& SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
&& GET_MODE (operands[0]) == GET_MODE (operands[1])"
"* return output_fp_compare (insn, operands, 1, 1);"
! [(set_attr "type" "fcmp,ssecomi")
(set (attr "mode")
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
***************
*** 1059,1065 ****
"SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
&& GET_MODE (operands[0]) == GET_MODE (operands[1])"
"* return output_fp_compare (insn, operands, 1, 1);"
! [(set_attr "type" "ssecmp")
(set (attr "mode")
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
--- 1063,1069 ----
"SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
&& GET_MODE (operands[0]) == GET_MODE (operands[1])"
"* return output_fp_compare (insn, operands, 1, 1);"
! [(set_attr "type" "ssecomi")
(set (attr "mode")
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
***************
*** 13895,13900 ****
--- 13899,13913 ----
(set_attr "modrm" "0")
(set_attr "ppro_uops" "one")])
+ (define_insn "init_pipe"
+ [(const_int 1)]
+ "TARGET_ATHLON_K8"
+ {
+ abort();
+ }
+ [(set_attr "type" "initpipe")])
+
+
(define_expand "prologue"
[(const_int 1)]
""
***************
*** 19506,19512 ****
(parallel [(const_int 0)]))))]
"TARGET_SSE"
"comiss\t{%1, %0|%0, %1}"
! [(set_attr "type" "ssecmp")
(set_attr "mode" "SF")])
(define_insn "sse_ucomi"
--- 19519,19525 ----
(parallel [(const_int 0)]))))]
"TARGET_SSE"
"comiss\t{%1, %0|%0, %1}"
! [(set_attr "type" "ssecomi")
(set_attr "mode" "SF")])
(define_insn "sse_ucomi"
***************
*** 19519,19525 ****
(parallel [(const_int 0)]))))]
"TARGET_SSE"
"ucomiss\t{%1, %0|%0, %1}"
! [(set_attr "type" "ssecmp")
(set_attr "mode" "SF")])
--- 19532,19538 ----
(parallel [(const_int 0)]))))]
"TARGET_SSE"
"ucomiss\t{%1, %0|%0, %1}"
! [(set_attr "type" "ssecomi")
(set_attr "mode" "SF")])
***************
*** 21081,21087 ****
(parallel [(const_int 0)]))))]
"TARGET_SSE2"
"comisd\t{%1, %0|%0, %1}"
! [(set_attr "type" "ssecmp")
(set_attr "mode" "DF")])
(define_insn "sse2_ucomi"
--- 21094,21100 ----
(parallel [(const_int 0)]))))]
"TARGET_SSE2"
"comisd\t{%1, %0|%0, %1}"
! [(set_attr "type" "ssecomi")
(set_attr "mode" "DF")])
(define_insn "sse2_ucomi"
***************
*** 21094,21100 ****
(parallel [(const_int 0)]))))]
"TARGET_SSE2"
"ucomisd\t{%1, %0|%0, %1}"
! [(set_attr "type" "ssecmp")
(set_attr "mode" "DF")])
;; SSE Strange Moves.
--- 21107,21113 ----
(parallel [(const_int 0)]))))]
"TARGET_SSE2"
"ucomisd\t{%1, %0|%0, %1}"
! [(set_attr "type" "ssecomi")
(set_attr "mode" "DF")])
;; SSE Strange Moves.
More information about the Gcc-patches
mailing list