Athlon/k8 scheduling tweeks

Jan Hubicka jh@suse.cz
Sat Apr 19 17:48:00 GMT 2003


Hi,
this patch fixes several inaccuraties in Athlon pipeline model to match
current optimization guide.   It is also important to get superblock
scheduling pay back on Athlon.

Bootstrapped/regtested on x86-64.  OK for mainline?

Honza

Sat Apr 19 19:40:24 CEST 2003  Jan Hubicka  <jh@suse.cz>
	* athlon.md (athlon-agu, athlon-store, athlon-fany, athlon-faddmul):
	Fix.
	(athlon-load2, athlon-store2, athlon-fpsched, athlon-fpload,
	athlon-fvector): New.
	(athlon_*): Revisit to match new optimization guide.
	* i386.c (ix86_adjust_cost):  Fix memory operand costs on Athlon/k8
	* i386.md (cvt??2?? patterns): Fix modes.
	(fistp patterns): Set modes.
Index: athlon.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/athlon.md,v
retrieving revision 1.5
diff -c -3 -p -r1.5 athlon.md
*** athlon.md	9 Jan 2003 11:02:57 -0000	1.5
--- athlon.md	19 Apr 2003 17:43:09 -0000
***************
*** 89,150 ****
  ;(define_cpu_unit "athlon-agu1" "athlon_agu")
  ;(define_cpu_unit "athlon-agu2" "athlon_agu")
  ;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)")
! (define_reservation "athlon-agu" "nothing,nothing")
  
  (define_cpu_unit "athlon-mult" "athlon_mult")
  
  (define_cpu_unit "athlon-load0" "athlon_load")
  (define_cpu_unit "athlon-load1" "athlon_load")
  (define_reservation "athlon-load" "athlon-agu,
! 				   (athlon-load0 | athlon-load1)")
! (define_reservation "athlon-store" "nothing")
  
  ;; The three fp units are fully pipelined with latency of 3
  (define_cpu_unit "athlon-fadd" "athlon_fp")
  (define_cpu_unit "athlon-fmul" "athlon_fp")
  (define_cpu_unit "athlon-fstore" "athlon_fp")
! (define_reservation "athlon-fany" "(athlon-fadd | athlon-fmul | athlon-fstore)")
! (define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)")
  
  
  ;; Jump instructions are executed in the branch unit completely transparent to us
  (define_insn_reservation "athlon_branch" 0
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "ibr"))
! 			 "athlon-direct")
  (define_insn_reservation "athlon_call" 0
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "call,callv"))
! 			 "athlon-vector")
  
  ;; Latency of push operation is 3 cycles, but ESP value is available
  ;; earlier
  (define_insn_reservation "athlon_push" 2
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "push"))
! 			 "athlon-direct,nothing,athlon-store")
  (define_insn_reservation "athlon_pop" 4
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "pop"))
! 			 "athlon-vector,athlon-ieu,athlon-load")
  (define_insn_reservation "athlon_pop_k8" 3
  			 (and (eq_attr "cpu" "k8")
  			      (eq_attr "type" "pop"))
! 			 "athlon-double,athlon-ieu,athlon-load")
  (define_insn_reservation "athlon_leave" 3
  			 (and (eq_attr "cpu" "athlon")
  			      (eq_attr "type" "leave"))
! 			 "athlon-vector,athlon-load")
  (define_insn_reservation "athlon_leave_k8" 3
  			 (and (eq_attr "cpu" "k8")
  			      (eq_attr "type" "leave"))
! 			 "athlon-double,athlon-load")
  
  ;; Lea executes in AGU unit with 2 cycles latency.
  (define_insn_reservation "athlon_lea" 2
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "lea"))
! 			 "athlon-direct,athlon-agu")
  
  ;; Mul executes in special multiplier unit attached to IEU0
  (define_insn_reservation "athlon_imul" 5
--- 89,172 ----
  ;(define_cpu_unit "athlon-agu1" "athlon_agu")
  ;(define_cpu_unit "athlon-agu2" "athlon_agu")
  ;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)")
! (define_reservation "athlon-agu" "nothing")
  
  (define_cpu_unit "athlon-mult" "athlon_mult")
  
  (define_cpu_unit "athlon-load0" "athlon_load")
  (define_cpu_unit "athlon-load1" "athlon_load")
  (define_reservation "athlon-load" "athlon-agu,
! 				   (athlon-load0 | athlon-load1),nothing")
! ;; 128bit SSE instructions issue two loads at once
! (define_reservation "athlon-load2" "athlon-agu,
! 				   (athlon-load0 + athlon-load1),nothing")
! 
! (define_reservation "athlon-store" "(athlon-load0 | athlon-load1)")
! ;; 128bit SSE instructions issue two stores at once
! (define_reservation "athlon-store2" "(athlon-load0 + athlon-load1)")
! 
! 
! ;; The FP operations start to execute at stage 12 in the pipeline, while
! ;; integer operations start to execute at stage 9 for Athlon and 11 for K8
! ;; Compensate the difference for Athlon because it results in significantly
! ;; smaller automata.
! (define_reservation "athlon-fpsched" "nothing,nothing,nothing")
! ;; The floating point loads.
! (define_reservation "athlon-fpload" "(athlon-fpsched + athlon-load)")
! (define_reservation "athlon-fpload2" "(athlon-fpsched + athlon-load2)")
! (define_reservation "athlon-fploadk8" "(athlon-fpsched + athlon-load)")
! (define_reservation "athlon-fpload2k8" "(athlon-fpsched + athlon-load2)")
! 
  
  ;; The three fp units are fully pipelined with latency of 3
  (define_cpu_unit "athlon-fadd" "athlon_fp")
  (define_cpu_unit "athlon-fmul" "athlon_fp")
  (define_cpu_unit "athlon-fstore" "athlon_fp")
! (define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)")
! (define_reservation "athlon-faddmul" "(athlon-fmul | athlon-fadd)")
! 
! ;; Vector operations usually consume many of pipes.
! (define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)")
  
  
  ;; Jump instructions are executed in the branch unit completely transparent to us
  (define_insn_reservation "athlon_branch" 0
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "ibr"))
! 			 "athlon-direct,athlon-ieu")
  (define_insn_reservation "athlon_call" 0
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "call,callv"))
! 			 "athlon-vector,athlon-ieu")
  
  ;; Latency of push operation is 3 cycles, but ESP value is available
  ;; earlier
  (define_insn_reservation "athlon_push" 2
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "push"))
! 			 "athlon-direct,athlon-agu,athlon-store")
  (define_insn_reservation "athlon_pop" 4
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "pop"))
! 			 "athlon-vector,athlon-load,athlon-ieu")
  (define_insn_reservation "athlon_pop_k8" 3
  			 (and (eq_attr "cpu" "k8")
  			      (eq_attr "type" "pop"))
! 			 "athlon-double,(athlon-ieu+athlon-load)")
  (define_insn_reservation "athlon_leave" 3
  			 (and (eq_attr "cpu" "athlon")
  			      (eq_attr "type" "leave"))
! 			 "athlon-vector,(athlon-ieu+athlon-load)")
  (define_insn_reservation "athlon_leave_k8" 3
  			 (and (eq_attr "cpu" "k8")
  			      (eq_attr "type" "leave"))
! 			 "athlon-double,(athlon-ieu+athlon-load)")
  
  ;; Lea executes in AGU unit with 2 cycles latency.
  (define_insn_reservation "athlon_lea" 2
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "lea"))
! 			 "athlon-direct,athlon-agu,nothing")
  
  ;; Mul executes in special multiplier unit attached to IEU0
  (define_insn_reservation "athlon_imul" 5
***************
*** 180,200 ****
  			      (and (eq_attr "type" "imul")
  				   (eq_attr "memory" "load,both")))
  			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
! (define_insn_reservation "athlon_idiv" 42
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "idiv")
  				   (eq_attr "memory" "none,unknown")))
! 			 "athlon-vector,athlon-ieu*42")
! (define_insn_reservation "athlon_idiv_mem" 45
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "idiv")
  				   (eq_attr "memory" "load,both")))
! 			 "athlon-vector,athlon-load,athlon-ieu*42")
! (define_insn_reservation "athlon_str" 15
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "str")
  				   (eq_attr "memory" "load,both,store")))
! 			 "athlon-vector,athlon-load,athlon-ieu*10")
  
  (define_insn_reservation "athlon_idirect" 1
  			 (and (eq_attr "cpu" "athlon,k8")
--- 202,232 ----
  			      (and (eq_attr "type" "imul")
  				   (eq_attr "memory" "load,both")))
  			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
! 
! ;; Idiv can not execute in parallel with other instructions.  Dealing with it
! ;; as with short latency vector instruction is good approximation avoiding
! ;; scheduler from trying too hard to can hide it's latency by overlap with
! ;; other instructions.
! ;; ??? Experiments show that the idiv can overlap with roughly 6 cycles
! ;; of the other code
! 
! (define_insn_reservation "athlon_idiv" 6
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "idiv")
  				   (eq_attr "memory" "none,unknown")))
! 			 "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))")
! (define_insn_reservation "athlon_idiv_mem" 9
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "idiv")
  				   (eq_attr "memory" "load,both")))
! 			 "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))")
! ;; The paralelism of string instructions is not documented.  Model it same way
! ;; as idiv to create smaller automata.  This probably does not matter much.
! (define_insn_reservation "athlon_str" 6
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "str")
  				   (eq_attr "memory" "load,both,store")))
! 			 "athlon-vector,athlon-load,athlon-ieu0*6")
  
  (define_insn_reservation "athlon_idirect" 1
  			 (and (eq_attr "cpu" "athlon,k8")
***************
*** 235,262 ****
  			      (and (eq_attr "athlon_decode" "direct")
  				   (and (eq_attr "unit" "integer,unknown")
  					(eq_attr "memory" "both"))))
! 			 "athlon-direct,athlon-load,athlon-ieu,
  			  athlon-store")
  (define_insn_reservation "athlon_ivector_both" 6
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "athlon_decode" "vector")
  				   (and (eq_attr "unit" "integer,unknown")
  					(eq_attr "memory" "both"))))
! 			 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu,
  			  athlon-store")
  (define_insn_reservation "athlon_idirect_store" 1
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "athlon_decode" "direct")
  				   (and (eq_attr "unit" "integer,unknown")
  					(eq_attr "memory" "store"))))
! 			 "athlon-direct,athlon-ieu,
  			  athlon-store")
  (define_insn_reservation "athlon_ivector_store" 2
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "athlon_decode" "vector")
  				   (and (eq_attr "unit" "integer,unknown")
  					(eq_attr "memory" "store"))))
! 			 "athlon-vector,athlon-ieu,athlon-ieu,
  			  athlon-store")
  
  ;; Athlon floatin point unit
--- 267,297 ----
  			      (and (eq_attr "athlon_decode" "direct")
  				   (and (eq_attr "unit" "integer,unknown")
  					(eq_attr "memory" "both"))))
! 			 "athlon-direct,athlon-load,
! 			  athlon-ieu,athlon-store,
  			  athlon-store")
  (define_insn_reservation "athlon_ivector_both" 6
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "athlon_decode" "vector")
  				   (and (eq_attr "unit" "integer,unknown")
  					(eq_attr "memory" "both"))))
! 			 "athlon-vector,athlon-load,
! 			  athlon-ieu,
! 			  athlon-ieu,
  			  athlon-store")
  (define_insn_reservation "athlon_idirect_store" 1
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "athlon_decode" "direct")
  				   (and (eq_attr "unit" "integer,unknown")
  					(eq_attr "memory" "store"))))
! 			 "athlon-direct,(athlon-ieu+athlon-agu),
  			  athlon-store")
  (define_insn_reservation "athlon_ivector_store" 2
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "athlon_decode" "vector")
  				   (and (eq_attr "unit" "integer,unknown")
  					(eq_attr "memory" "store"))))
! 			 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
  			  athlon-store")
  
  ;; Athlon floatin point unit
***************
*** 265,665 ****
  			      (and (eq_attr "type" "fmov")
  				   (and (eq_attr "memory" "load")
  					(eq_attr "mode" "XF"))))
! 			 "athlon-vector,athlon-fany")
  (define_insn_reservation "athlon_fldxf_k8" 13
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "fmov")
  				   (and (eq_attr "memory" "load")
  					(eq_attr "mode" "XF"))))
! 			 "athlon-vector,athlon-fany")
! (define_insn_reservation "athlon_fld" 6
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "fmov")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-fany,nothing,athlon-load")
! (define_insn_reservation "athlon_fld_k8" 4
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "fmov")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-fany,athlon-load")
  (define_insn_reservation "athlon_fstxf" 10
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "fmov")
  				   (and (eq_attr "memory" "store,both")
  					(eq_attr "mode" "XF"))))
! 			 "athlon-vector,athlon-fstore")
  (define_insn_reservation "athlon_fstxf_k8" 8
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "fmov")
  				   (and (eq_attr "memory" "store,both")
  					(eq_attr "mode" "XF"))))
! 			 "athlon-vector,athlon-fstore")
  (define_insn_reservation "athlon_fst" 4
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "fmov")
  				   (eq_attr "memory" "store,both")))
! 			 "athlon-direct,athlon-fstore,nothing,athlon-store")
  (define_insn_reservation "athlon_fst_k8" 2
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "fmov")
  				   (eq_attr "memory" "store,both")))
! 			 "athlon-direct,athlon-fstore,athlon-store")
  (define_insn_reservation "athlon_fist" 4
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "fistp"))
! 			 "athlon-direct,athlon-fstore,nothing")
  (define_insn_reservation "athlon_fmov" 2
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "fmov"))
! 			 "athlon-direct,athlon-faddmul")
! (define_insn_reservation "athlon_fadd_load" 7
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "fop")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-load,athlon-fadd")
  (define_insn_reservation "athlon_fadd_load_k8" 6
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "fop")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-load,athlon-fadd")
  (define_insn_reservation "athlon_fadd" 4
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "fop"))
! 			 "athlon-direct,athlon-fadd")
! (define_insn_reservation "athlon_fmul_load" 7
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "fmul")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-load,athlon-fmul")
  (define_insn_reservation "athlon_fmul_load_k8" 6
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "fmul")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-load,athlon-fmul")
  (define_insn_reservation "athlon_fmul" 4
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "fmul"))
! 			 "athlon-direct,athlon-fmul")
  (define_insn_reservation "athlon_fsgn" 2
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "fsgn"))
! 			 "athlon-direct,athlon-fmul")
  (define_insn_reservation "athlon_fdiv_load" 24
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "fdiv")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-load,athlon-fmul")
  (define_insn_reservation "athlon_fdiv_load_k8" 13
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "fdiv")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-load,athlon-fmul")
  (define_insn_reservation "athlon_fdiv" 24
  			 (and (eq_attr "cpu" "athlon")
  			      (eq_attr "type" "fdiv"))
! 			 "athlon-direct,athlon-fmul")
  (define_insn_reservation "athlon_fdiv_k8" 11
  			 (and (eq_attr "cpu" "k8")
  			      (eq_attr "type" "fdiv"))
! 			 "athlon-direct,athlon-fmul")
  (define_insn_reservation "athlon_fpspc_load" 103
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "fpspc")
  				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-load,athlon-fmul")
  (define_insn_reservation "athlon_fpspc" 100
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "fpspc"))
! 			 "athlon-vector,athlon-fmul")
! (define_insn_reservation "athlon_fcmov_load" 10
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "fcmov")
  				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-load,athlon-fmul")
  (define_insn_reservation "athlon_fcmov" 7
  			 (and (eq_attr "cpu" "athlon")
  			      (eq_attr "type" "fcmov"))
! 			 "athlon-vector,athlon-fmul")
  (define_insn_reservation "athlon_fcmov_load_k8" 17
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "fcmov")
  				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-load,athlon-fmul")
  (define_insn_reservation "athlon_fcmov_k8" 15
  			 (and (eq_attr "cpu" "k8")
  			      (eq_attr "type" "fcmov"))
! 			 "athlon-vector,athlon-fmul")
! (define_insn_reservation "athlon_fcomi_load" 6
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "fcmp")
  				   (and (eq_attr "athlon_decode" "vector")
  				        (eq_attr "memory" "load"))))
! 			 "athlon-vector,athlon-load,athlon-fadd")
  (define_insn_reservation "athlon_fcomi" 3
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "athlon_decode" "vector")
  				   (eq_attr "type" "fcmp")))
! 			 "athlon-vector,athlon-fadd")
! (define_insn_reservation "athlon_fcom_load" 5
! 			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "fcmp")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-load,athlon-fadd")
  (define_insn_reservation "athlon_fcom" 2
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "fcmp"))
! 			 "athlon-direct,athlon-fadd")
! (define_insn_reservation "athlon_fxch" 2
! 			 (and (eq_attr "cpu" "athlon,k8")
! 			      (eq_attr "type" "fxch"))
! 			 "athlon-direct,athlon-fany")
  ;; Athlon handle MMX operations in the FPU unit with shorter latencies
! (define_insn_reservation "athlon_movlpd_load" 4
! 			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "ssemov")
  				   (match_operand:DF 1 "memory_operand" "")))
! 			 "athlon-direct,athlon-load")
! (define_insn_reservation "athlon_movaps_load" 4
! 			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "ssemov")
  				   (and (eq_attr "mode" "V4SF,V2DF,TI")
  					(eq_attr "memory" "load"))))
! 			 "athlon-double,athlon-load")
! (define_insn_reservation "athlon_movss_load" 3
! 			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "ssemov")
  				   (and (eq_attr "mode" "SF,DI")
  					(eq_attr "memory" "load"))))
! 			 "athlon-double,athlon-load")
! (define_insn_reservation "athlon_mmxsseld" 4
! 			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "mmxmov,ssemov")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-fany,athlon-load")
  (define_insn_reservation "athlon_mmxssest" 3
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "mmxmov,ssemov")
  				   (and (eq_attr "mode" "V4SF,V2DF,TI")
  					(eq_attr "memory" "store,both"))))
! 			 "athlon-double,athlon-store")
! (define_insn_reservation "athlon_mmxssest_k8" 2
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "mmxmov,ssemov")
  				   (eq_attr "memory" "store,both")))
! 			 "athlon-direct,athlon-store")
  (define_insn_reservation "athlon_movaps" 2
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "ssemov")
! 				   (eq_attr "mode" "V4SF,V2DF")))
! 			 "athlon-double,athlon-faddmul,athlon-faddmul")
  (define_insn_reservation "athlon_mmxssemov" 2
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "mmxmov,ssemov"))
! 			 "athlon-direct,athlon-faddmul")
! (define_insn_reservation "athlon_mmxmul_load" 6
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "mmxmul")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-load,athlon-fmul")
  (define_insn_reservation "athlon_mmxmul" 3
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "mmxmul"))
! 			 "athlon-direct,athlon-fmul")
! (define_insn_reservation "athlon_mmx_load" 5
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "unit" "mmx")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-load,athlon-faddmul")
  (define_insn_reservation "athlon_mmx" 2
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "unit" "mmx"))
! 			 "athlon-direct,athlon-faddmul")
  ;; SSE operations are handled by the i387 unit as well.  The latency
  ;; is same as for i387 operations for scalar operations
! (define_insn_reservation "athlon_sselog_load" 6
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "sselog")
  				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-load,athlon-fmul")
  (define_insn_reservation "athlon_sselog_load_k8" 5
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "sselog")
  				   (eq_attr "memory" "load")))
! 			 "athlon-double,athlon-load,athlon-fmul")
  (define_insn_reservation "athlon_sselog" 3
  			 (and (eq_attr "cpu" "athlon")
  			      (eq_attr "type" "sselog"))
! 			 "athlon-vector,athlon-fmul")
  (define_insn_reservation "athlon_sselog_k8" 3
  			 (and (eq_attr "cpu" "k8")
  			      (eq_attr "type" "sselog"))
! 			 "athlon-double,athlon-fmul")
! (define_insn_reservation "athlon_ssecmp_load" 5
! 			 (and (eq_attr "cpu" "athlon,k8")
! 			      (and (eq_attr "type" "ssecmp,ssecomi")
! 				   (and (eq_attr "mode" "SF,DF")
  					(eq_attr "memory" "load"))))
! 			 "athlon-vector,athlon-load,athlon-fadd")
  (define_insn_reservation "athlon_ssecmp" 2
  			 (and (eq_attr "cpu" "athlon,k8")
! 			      (and (eq_attr "type" "ssecmp,ssecomi")
! 				   (eq_attr "mode" "SF,DF")))
! 			 "athlon-direct,athlon-fadd")
! (define_insn_reservation "athlon_ssecmpvector_load" 6
  			 (and (eq_attr "cpu" "athlon")
! 			      (and (eq_attr "type" "ssecmp,ssecomi")
  				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-fadd")
  (define_insn_reservation "athlon_ssecmpvector_load_k8" 5
  			 (and (eq_attr "cpu" "k8")
! 			      (and (eq_attr "type" "ssecmp,ssecomi")
  				   (eq_attr "memory" "load")))
! 			 "athlon-double,athlon-fadd")
  (define_insn_reservation "athlon_ssecmpvector" 3
  			 (and (eq_attr "cpu" "athlon")
! 			      (eq_attr "type" "ssecmp,ssecomi"))
! 			 "athlon-vector,athlon-fadd")
  (define_insn_reservation "athlon_ssecmpvector_k8" 3
  			 (and (eq_attr "cpu" "k8")
! 			      (eq_attr "type" "ssecmp,ssecomi"))
! 			 "athlon-double,athlon-fadd")
! (define_insn_reservation "athlon_sseadd_load" 7
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "sseadd")
! 				   (and (eq_attr "mode" "SF,DF")
  					(eq_attr "memory" "load"))))
! 			 "athlon-direct,athlon-load,athlon-fadd")
  (define_insn_reservation "athlon_sseadd_load_k8" 6
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "sseadd")
! 				   (and (eq_attr "mode" "SF,DF")
  					(eq_attr "memory" "load"))))
! 			 "athlon-direct,athlon-load,athlon-fadd")
  (define_insn_reservation "athlon_sseadd" 4
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "sseadd")
! 				   (eq_attr "mode" "SF,DF")))
! 			 "athlon-direct,athlon-fadd")
! (define_insn_reservation "athlon_sseaddvector_load" 8
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "sseadd")
  				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-load,athlon-fadd")
  (define_insn_reservation "athlon_sseaddvector_load_k8" 7
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "sseadd")
  				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-load,athlon-fadd")
  (define_insn_reservation "athlon_sseaddvector" 5
  			 (and (eq_attr "cpu" "athlon")
  			      (eq_attr "type" "sseadd"))
! 			 "athlon-vector,athlon-fadd")
! (define_insn_reservation "athlon_sseaddvector_k8" 4
  			 (and (eq_attr "cpu" "k8")
  			      (eq_attr "type" "sseadd"))
! 			 "athlon-vector,athlon-fadd")
! (define_insn_reservation "athlon_ssecvt_load" 5
! 			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "ssecvt")
! 				   (and (eq_attr "mode" "SF,DF")
! 					(eq_attr "memory" "load"))))
! 			 "athlon-direct,athlon-load,athlon-fadd")
! (define_insn_reservation "athlon_ssecvt_load_k8" 4
! 			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "ssecvt")
! 				   (and (eq_attr "mode" "SF,DF")
! 					(eq_attr "memory" "load"))))
! 			 "athlon-direct,athlon-load,athlon-fadd")
! (define_insn_reservation "athlon_ssecvt" 2
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "ssecvt")
! 				   (eq_attr "mode" "SF,DF")))
! 			 "athlon-direct,athlon-fadd")
! (define_insn_reservation "athlon_ssecvtvector_load" 6
! 			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "ssecvt")
! 				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-load,athlon-fadd")
! (define_insn_reservation "athlon_ssecvtvector_load_k8" 5
! 			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "ssecvt")
! 				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-load,athlon-fadd")
! (define_insn_reservation "athlon_ssecvtvector" 5
! 			 (and (eq_attr "cpu" "athlon")
! 			      (eq_attr "type" "ssecvt"))
! 			 "athlon-vector,athlon-fadd")
! (define_insn_reservation "athlon_ssecvtvector_k8" 3
! 			 (and (eq_attr "cpu" "k8")
! 			      (eq_attr "type" "ssecvt"))
! 			 "athlon-vector,athlon-fadd")
! (define_insn_reservation "athlon_ssemul_load" 7
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "ssemul")
  				   (and (eq_attr "mode" "SF,DF")
  					(eq_attr "memory" "load"))))
! 			 "athlon-direct,athlon-load,athlon-fmul")
  (define_insn_reservation "athlon_ssemul_load_k8" 6
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "ssemul")
  				   (and (eq_attr "mode" "SF,DF")
  					(eq_attr "memory" "load"))))
! 			 "athlon-direct,athlon-load,athlon-fmul")
  (define_insn_reservation "athlon_ssemul" 4
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "ssemul")
  				   (eq_attr "mode" "SF,DF")))
! 			 "athlon-direct,athlon-fmul")
! (define_insn_reservation "athlon_ssemulvector_load" 8
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "ssemul")
  				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-load,athlon-fmul")
  (define_insn_reservation "athlon_ssemulvector_load_k8" 7
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "ssemul")
  				   (eq_attr "memory" "load")))
! 			 "athlon-double,athlon-load,athlon-fmul")
  (define_insn_reservation "athlon_ssemulvector" 5
  			 (and (eq_attr "cpu" "athlon")
  			      (eq_attr "type" "ssemul"))
! 			 "athlon-vector,athlon-fmul")
  (define_insn_reservation "athlon_ssemulvector_k8" 5
  			 (and (eq_attr "cpu" "k8")
  			      (eq_attr "type" "ssemul"))
! 			 "athlon-double,athlon-fmul")
! (define_insn_reservation "athlon_ssediv_load" 19
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "ssediv")
  				   (and (eq_attr "mode" "SF,DF")
  					(eq_attr "memory" "load"))))
! 			 "athlon-direct,athlon-load,athlon-fmul")
! (define_insn_reservation "athlon_ssediv_load_k8" 18
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "ssediv")
  				   (and (eq_attr "mode" "SF,DF")
  					(eq_attr "memory" "load"))))
! 			 "athlon-direct,athlon-load,athlon-fmul")
! (define_insn_reservation "athlon_ssediv" 16
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "ssediv")
  				   (eq_attr "mode" "SF,DF")))
! 			 "athlon-direct,athlon-fmul")
! (define_insn_reservation "athlon_ssedivvector_load" 32
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "ssediv")
  				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-load,athlon-fmul")
  (define_insn_reservation "athlon_ssedivvector_load_k8" 35
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "ssediv")
  				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-load,athlon-fmul")
! (define_insn_reservation "athlon_ssedivvector" 29
  			 (and (eq_attr "cpu" "athlon")
  			      (eq_attr "type" "ssediv"))
! 			 "athlon-vector,athlon-fmul")
! (define_insn_reservation "athlon_ssedivvector_k8" 33
  			 (and (eq_attr "cpu" "k8")
  			      (eq_attr "type" "ssediv"))
! 			 "athlon-vector,athlon-fmul")
--- 300,869 ----
  			      (and (eq_attr "type" "fmov")
  				   (and (eq_attr "memory" "load")
  					(eq_attr "mode" "XF"))))
! 			 "athlon-vector,athlon-fpload2,athlon-fvector*9")
  (define_insn_reservation "athlon_fldxf_k8" 13
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "fmov")
  				   (and (eq_attr "memory" "load")
  					(eq_attr "mode" "XF"))))
! 			 "athlon-vector,athlon-fpload2k8,athlon-fvector*9")
! ;; Assume superforwarding to take place so effective latency of fany op is 0.
! (define_insn_reservation "athlon_fld" 0
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "fmov")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-fpload,athlon-fany")
! (define_insn_reservation "athlon_fld_k8" 2
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "fmov")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-fploadk8,athlon-fstore")
! 
  (define_insn_reservation "athlon_fstxf" 10
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "fmov")
  				   (and (eq_attr "memory" "store,both")
  					(eq_attr "mode" "XF"))))
! 			 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))")
  (define_insn_reservation "athlon_fstxf_k8" 8
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "fmov")
  				   (and (eq_attr "memory" "store,both")
  					(eq_attr "mode" "XF"))))
! 			 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*6))")
  (define_insn_reservation "athlon_fst" 4
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "fmov")
  				   (eq_attr "memory" "store,both")))
! 			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
  (define_insn_reservation "athlon_fst_k8" 2
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "fmov")
  				   (eq_attr "memory" "store,both")))
! 			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
  (define_insn_reservation "athlon_fist" 4
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "fistp"))
! 			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
  (define_insn_reservation "athlon_fmov" 2
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "fmov"))
! 			 "athlon-direct,athlon-fpsched,athlon-faddmul")
! (define_insn_reservation "athlon_fadd_load" 4
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "fop")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-fpload,athlon-fadd")
  (define_insn_reservation "athlon_fadd_load_k8" 6
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "fop")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-fploadk8,athlon-fadd")
  (define_insn_reservation "athlon_fadd" 4
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "fop"))
! 			 "athlon-direct,athlon-fpsched,athlon-fadd")
! (define_insn_reservation "athlon_fmul_load" 4
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "fmul")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-fpload,athlon-fmul")
  (define_insn_reservation "athlon_fmul_load_k8" 6
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "fmul")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-fploadk8,athlon-fmul")
  (define_insn_reservation "athlon_fmul" 4
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "fmul"))
! 			 "athlon-direct,athlon-fpsched,athlon-fmul")
  (define_insn_reservation "athlon_fsgn" 2
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "fsgn"))
! 			 "athlon-direct,athlon-fpsched,athlon-fmul")
  (define_insn_reservation "athlon_fdiv_load" 24
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "fdiv")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-fpload,athlon-fmul")
  (define_insn_reservation "athlon_fdiv_load_k8" 13
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "fdiv")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-fploadk8,athlon-fmul")
  (define_insn_reservation "athlon_fdiv" 24
  			 (and (eq_attr "cpu" "athlon")
  			      (eq_attr "type" "fdiv"))
! 			 "athlon-direct,athlon-fpsched,athlon-fmul")
  (define_insn_reservation "athlon_fdiv_k8" 11
  			 (and (eq_attr "cpu" "k8")
  			      (eq_attr "type" "fdiv"))
! 			 "athlon-direct,athlon-fpsched,athlon-fmul")
  (define_insn_reservation "athlon_fpspc_load" 103
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "fpspc")
  				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-fpload,athlon-fvector")
  (define_insn_reservation "athlon_fpspc" 100
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "fpspc"))
! 			 "athlon-vector,athlon-fpsched,athlon-fvector")
! (define_insn_reservation "athlon_fcmov_load" 7
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "fcmov")
  				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-fpload,athlon-fvector")
  (define_insn_reservation "athlon_fcmov" 7
  			 (and (eq_attr "cpu" "athlon")
  			      (eq_attr "type" "fcmov"))
! 			 "athlon-vector,athlon-fpsched,athlon-fvector")
  (define_insn_reservation "athlon_fcmov_load_k8" 17
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "fcmov")
  				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-fploadk8,athlon-fvector")
  (define_insn_reservation "athlon_fcmov_k8" 15
  			 (and (eq_attr "cpu" "k8")
  			      (eq_attr "type" "fcmov"))
! 			 "athlon-vector,athlon-fpsched,athlon-fvector")
! ;; fcomi is vector decoded by uses only one pipe.
! (define_insn_reservation "athlon_fcomi_load" 3
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "fcmp")
  				   (and (eq_attr "athlon_decode" "vector")
  				        (eq_attr "memory" "load"))))
! 			 "athlon-vector,athlon-fpload,athlon-fadd")
! (define_insn_reservation "athlon_fcomi_load_k8" 5
! 			 (and (eq_attr "cpu" "k8")
! 			      (and (eq_attr "type" "fcmp")
! 				   (and (eq_attr "athlon_decode" "vector")
! 				        (eq_attr "memory" "load"))))
! 			 "athlon-vector,athlon-fploadk8,athlon-fadd")
  (define_insn_reservation "athlon_fcomi" 3
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "athlon_decode" "vector")
  				   (eq_attr "type" "fcmp")))
! 			 "athlon-vector,athlon-fpsched,athlon-fadd")
! (define_insn_reservation "athlon_fcom_load" 2
! 			 (and (eq_attr "cpu" "athlon")
! 			      (and (eq_attr "type" "fcmp")
! 				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-fpload,athlon-fadd")
! (define_insn_reservation "athlon_fcom_load_k8" 4
! 			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "fcmp")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-fploadk8,athlon-fadd")
  (define_insn_reservation "athlon_fcom" 2
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "fcmp"))
! 			 "athlon-direct,athlon-fpsched,athlon-fadd")
! ;; Never seen by the scheduler because we still don't do post reg-stack
! ;; scheduling.
! ;(define_insn_reservation "athlon_fxch" 2
! ;			 (and (eq_attr "cpu" "athlon,k8")
! ;			      (eq_attr "type" "fxch"))
! ;			 "athlon-direct,athlon-fpsched,athlon-fany")
! 
  ;; Athlon handle MMX operations in the FPU unit with shorter latencies
! 
! (define_insn_reservation "athlon_movlpd_load" 0
! 			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "ssemov")
  				   (match_operand:DF 1 "memory_operand" "")))
! 			 "athlon-direct,athlon-fpload,athlon-fany")
! (define_insn_reservation "athlon_movlpd_load_k8" 2
! 			 (and (eq_attr "cpu" "k8")
! 			      (and (eq_attr "type" "ssemov")
! 				   (match_operand:DF 1 "memory_operand" "")))
! 			 "athlon-direct,athlon-fploadk8,athlon-fstore")
! (define_insn_reservation "athlon_movaps_load_k8" 2
! 			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "ssemov")
  				   (and (eq_attr "mode" "V4SF,V2DF,TI")
  					(eq_attr "memory" "load"))))
! 			 "athlon-double,athlon-fpload2k8,athlon-fstore,athlon-fstore")
! (define_insn_reservation "athlon_movaps_load" 0
! 			 (and (eq_attr "cpu" "athlon")
! 			      (and (eq_attr "type" "ssemov")
! 				   (and (eq_attr "mode" "V4SF,V2DF,TI")
! 					(eq_attr "memory" "load"))))
! 			 "athlon-vector,athlon-fpload2,(athlon-fany+athlon-fany)")
! (define_insn_reservation "athlon_movss_load" 1
! 			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "ssemov")
  				   (and (eq_attr "mode" "SF,DI")
  					(eq_attr "memory" "load"))))
! 			 "athlon-vector,athlon-fpload,(athlon-fany*2)")
! (define_insn_reservation "athlon_movss_load_k8" 1
! 			 (and (eq_attr "cpu" "k8")
! 			      (and (eq_attr "type" "ssemov")
! 				   (and (eq_attr "mode" "SF,DI")
! 					(eq_attr "memory" "load"))))
! 			 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fany)")
! (define_insn_reservation "athlon_mmxsseld" 0
! 			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "mmxmov,ssemov")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-fpload,athlon-fany")
! (define_insn_reservation "athlon_mmxsseld_k8" 2
! 			 (and (eq_attr "cpu" "k8")
! 			      (and (eq_attr "type" "mmxmov,ssemov")
! 				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-fploadk8,athlon-fstore")
  (define_insn_reservation "athlon_mmxssest" 3
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "mmxmov,ssemov")
  				   (and (eq_attr "mode" "V4SF,V2DF,TI")
  					(eq_attr "memory" "store,both"))))
! 			 "athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
! (define_insn_reservation "athlon_mmxssest_k8" 3
! 			 (and (eq_attr "cpu" "k8")
! 			      (and (eq_attr "type" "mmxmov,ssemov")
! 				   (and (eq_attr "mode" "V4SF,V2DF,TI")
! 					(eq_attr "memory" "store,both"))))
! 			 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
! (define_insn_reservation "athlon_mmxssest_short" 2
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "mmxmov,ssemov")
  				   (eq_attr "memory" "store,both")))
! 			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
  (define_insn_reservation "athlon_movaps" 2
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "ssemov")
! 				   (eq_attr "mode" "V4SF,V2DF,TI")))
! 			 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-faddmul)")
! (define_insn_reservation "athlon_movaps_k8" 2
! 			 (and (eq_attr "cpu" "athlon")
! 			      (and (eq_attr "type" "ssemov")
! 				   (eq_attr "mode" "V4SF,V2DF,TI")))
! 			 "athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)")
  (define_insn_reservation "athlon_mmxssemov" 2
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "mmxmov,ssemov"))
! 			 "athlon-direct,athlon-fpsched,athlon-faddmul")
! (define_insn_reservation "athlon_mmxmul_load" 4
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "mmxmul")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-fpload,athlon-fmul")
  (define_insn_reservation "athlon_mmxmul" 3
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "type" "mmxmul"))
! 			 "athlon-direct,athlon-fpsched,athlon-fmul")
! (define_insn_reservation "athlon_mmx_load" 3
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "unit" "mmx")
  				   (eq_attr "memory" "load")))
! 			 "athlon-direct,athlon-fpload,athlon-faddmul")
  (define_insn_reservation "athlon_mmx" 2
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (eq_attr "unit" "mmx"))
! 			 "athlon-direct,athlon-fpsched,athlon-faddmul")
  ;; SSE operations are handled by the i387 unit as well.  The latency
  ;; is same as for i387 operations for scalar operations
! 
! (define_insn_reservation "athlon_sselog_load" 3
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "sselog")
  				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
  (define_insn_reservation "athlon_sselog_load_k8" 5
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "sselog")
  				   (eq_attr "memory" "load")))
! 			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
  (define_insn_reservation "athlon_sselog" 3
  			 (and (eq_attr "cpu" "athlon")
  			      (eq_attr "type" "sselog"))
! 			 "athlon-vector,athlon-fpsched,athlon-fmul*2")
  (define_insn_reservation "athlon_sselog_k8" 3
  			 (and (eq_attr "cpu" "k8")
  			      (eq_attr "type" "sselog"))
! 			 "athlon-double,athlon-fpsched,athlon-fmul")
! ;; ??? pcmp executes in addmul, probably not wortwhile to brother about that.
! (define_insn_reservation "athlon_ssecmp_load" 2
! 			 (and (eq_attr "cpu" "athlon")
! 			      (and (eq_attr "type" "ssecmp")
! 				   (and (eq_attr "mode" "SF,DF,DI")
! 					(eq_attr "memory" "load"))))
! 			 "athlon-direct,athlon-fpload,athlon-fadd")
! (define_insn_reservation "athlon_ssecmp_load_k8" 4
! 			 (and (eq_attr "cpu" "k8")
! 			      (and (eq_attr "type" "ssecmp")
! 				   (and (eq_attr "mode" "SF,DF,DI,TI")
  					(eq_attr "memory" "load"))))
! 			 "athlon-direct,athlon-fploadk8,athlon-fadd")
  (define_insn_reservation "athlon_ssecmp" 2
  			 (and (eq_attr "cpu" "athlon,k8")
! 			      (and (eq_attr "type" "ssecmp")
! 				   (eq_attr "mode" "SF,DF,DI,TI")))
! 			 "athlon-direct,athlon-fpsched,athlon-fadd")
! (define_insn_reservation "athlon_ssecmpvector_load" 3
  			 (and (eq_attr "cpu" "athlon")
! 			      (and (eq_attr "type" "ssecmp")
  				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
  (define_insn_reservation "athlon_ssecmpvector_load_k8" 5
  			 (and (eq_attr "cpu" "k8")
! 			      (and (eq_attr "type" "ssecmp")
  				   (eq_attr "memory" "load")))
! 			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
  (define_insn_reservation "athlon_ssecmpvector" 3
  			 (and (eq_attr "cpu" "athlon")
! 			      (eq_attr "type" "ssecmp"))
! 			 "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
  (define_insn_reservation "athlon_ssecmpvector_k8" 3
  			 (and (eq_attr "cpu" "k8")
! 			      (eq_attr "type" "ssecmp"))
! 			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
! (define_insn_reservation "athlon_ssecomi_load" 4
! 			 (and (eq_attr "cpu" "athlon")
! 			      (and (eq_attr "type" "ssecomi")
! 				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-fpload,athlon-fadd")
! (define_insn_reservation "athlon_ssecomi_load_k8" 6
! 			 (and (eq_attr "cpu" "k8")
! 			      (and (eq_attr "type" "ssecomi")
! 				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-fploadk8,athlon-fadd")
! (define_insn_reservation "athlon_ssecomi" 4
! 			 (and (eq_attr "cpu" "athlon,k8")
! 			      (eq_attr "type" "ssecmp"))
! 			 "athlon-vector,athlon-fpsched,athlon-fadd")
! (define_insn_reservation "athlon_sseadd_load" 4
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "sseadd")
! 				   (and (eq_attr "mode" "SF,DF,DI")
  					(eq_attr "memory" "load"))))
! 			 "athlon-direct,athlon-fpload,athlon-fadd")
  (define_insn_reservation "athlon_sseadd_load_k8" 6
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "sseadd")
! 				   (and (eq_attr "mode" "SF,DF,DI")
  					(eq_attr "memory" "load"))))
! 			 "athlon-direct,athlon-fploadk8,athlon-fadd")
  (define_insn_reservation "athlon_sseadd" 4
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "sseadd")
! 				   (eq_attr "mode" "SF,DF,DI")))
! 			 "athlon-direct,athlon-fpsched,athlon-fadd")
! (define_insn_reservation "athlon_sseaddvector_load" 5
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "sseadd")
  				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
  (define_insn_reservation "athlon_sseaddvector_load_k8" 7
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "sseadd")
  				   (eq_attr "memory" "load")))
! 			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
  (define_insn_reservation "athlon_sseaddvector" 5
  			 (and (eq_attr "cpu" "athlon")
  			      (eq_attr "type" "sseadd"))
! 			 "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
! (define_insn_reservation "athlon_sseaddvector_k8" 5
  			 (and (eq_attr "cpu" "k8")
  			      (eq_attr "type" "sseadd"))
! 			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
! 
! ;; Conversions behaves very irregulary and the scheduling is critical here.
! ;; Take each instruction separately.  Assume that the mode is always set to the
! ;; destination one and athlon_decode is set to the K8 versions.
! 
! ;; cvtss2sd
! (define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4
! 			 (and (eq_attr "cpu" "k8,athlon")
  			      (and (eq_attr "type" "ssecvt")
! 				   (and (eq_attr "athlon_decode" "direct")
! 					(and (eq_attr "mode" "DF")
! 					     (eq_attr "memory" "load")))))
! 			 "athlon-direct,athlon-fploadk8,athlon-fstore")
! (define_insn_reservation "athlon_ssecvt_cvtss2sd" 2
! 			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "ssecvt")
! 				   (and (eq_attr "athlon_decode" "direct")
! 					(eq_attr "mode" "DF"))))
! 			 "athlon-direct,athlon-fpsched,athlon-fstore")
! ;; cvtps2pd.  Model same way the other double decoded FP conversions.
! (define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5
! 			 (and (eq_attr "cpu" "k8,athlon")
! 			      (and (eq_attr "type" "ssecvt")
! 				   (and (eq_attr "athlon_decode" "double")
! 					(and (eq_attr "mode" "V2DF,V4SF,TI")
! 					     (eq_attr "memory" "load")))))
! 			 "athlon-double,athlon-fpload2k8,(athlon-fstore*2)")
! (define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3
! 			 (and (eq_attr "cpu" "k8,athlon")
! 			      (and (eq_attr "type" "ssecvt")
! 				   (and (eq_attr "athlon_decode" "double")
! 					(eq_attr "mode" "V2DF,V4SF,TI"))))
! 			 "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore")
! ;; cvtsi2sd mem,reg is directpath path  (cvtsi2sd reg,reg is doublepath)
! ;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6
! (define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6
! 			 (and (eq_attr "cpu" "athlon,k8")
! 			      (and (eq_attr "type" "sseicvt")
! 				   (and (eq_attr "athlon_decode" "direct")
! 					(and (eq_attr "mode" "SF,DF")
! 					     (eq_attr "memory" "load")))))
! 			 "athlon-direct,athlon-fploadk8,athlon-fstore")
! ;; cvtsi2ss mem, reg is doublepath
! (define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9
! 			 (and (eq_attr "cpu" "athlon")
! 			      (and (eq_attr "type" "sseicvt")
! 				   (and (eq_attr "athlon_decode" "double")
! 					(and (eq_attr "mode" "SF,DF")
! 					     (eq_attr "memory" "load")))))
! 			 "athlon-vector,athlon-fpload,(athlon-fstore*2)")
! (define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9
! 			 (and (eq_attr "cpu" "k8")
! 			      (and (eq_attr "type" "sseicvt")
! 				   (and (eq_attr "athlon_decode" "double")
! 					(and (eq_attr "mode" "SF,DF")
! 					     (eq_attr "memory" "load")))))
! 			 "athlon-double,athlon-fploadk8,(athlon-fstore*2)")
! ;; cvtsi2sd reg,reg is double decoded (vector on Athlon)
! (define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11
! 			 (and (eq_attr "cpu" "k8,athlon")
! 			      (and (eq_attr "type" "sseicvt")
! 				   (and (eq_attr "athlon_decode" "double")
! 					(and (eq_attr "mode" "SF,DF")
! 					     (eq_attr "memory" "none")))))
! 			 "athlon-double,athlon-fploadk8,athlon-fstore")
! ;; cvtsi2ss reg, reg is doublepath
! (define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14
  			 (and (eq_attr "cpu" "athlon,k8")
+ 			      (and (eq_attr "type" "sseicvt")
+ 				   (and (eq_attr "athlon_decode" "vector")
+ 					(and (eq_attr "mode" "SF,DF")
+ 					     (eq_attr "memory" "none")))))
+ 			 "athlon-vector,athlon-fploadk8,(athlon-fvector*2)")
+ ;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9
+ (define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9
+ 			 (and (eq_attr "cpu" "k8,athlon")
  			      (and (eq_attr "type" "ssecvt")
! 				   (and (eq_attr "athlon_decode" "double")
! 					(and (eq_attr "mode" "SF")
! 					     (eq_attr "memory" "load")))))
! 			 "athlon-double,athlon-fploadk8,(athlon-fstore*3)")
! ;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12
! (define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12
! 			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "ssecvt")
! 				   (and (eq_attr "athlon_decode" "vector")
! 					(and (eq_attr "mode" "SF")
! 					     (eq_attr "memory" "none")))))
! 			 "athlon-vector,athlon-fpsched,(athlon-fvector*3)")
! (define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8
! 			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "ssecvt")
! 				   (and (eq_attr "athlon_decode" "vector")
! 					(and (eq_attr "mode" "V4SF,V2DF,TI")
! 					     (eq_attr "memory" "load")))))
! 			 "athlon-double,athlon-fpload2k8,(athlon-fstore*3)")
! ;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10
! ;; ??? Why it is fater than cvtsd2ss?
! (define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8
! 			 (and (eq_attr "cpu" "athlon,k8")
! 			      (and (eq_attr "type" "ssecvt")
! 				   (and (eq_attr "athlon_decode" "vector")
! 					(and (eq_attr "mode" "V4SF,V2DF,TI")
! 					     (eq_attr "memory" "none")))))
! 			 "athlon-vector,athlon-fpsched,athlon-fvector*2")
! ;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9
! (define_insn_reservation "athlon_secvt_cvtsX2si_load" 9
! 			 (and (eq_attr "cpu" "athlon,k8")
! 			      (and (eq_attr "type" "sseicvt")
! 				   (and (eq_attr "athlon_decode" "vector")
! 					(and (eq_attr "mode" "SI,DI")
! 					     (eq_attr "memory" "load")))))
! 			 "athlon-vector,athlon-fploadk8,athlon-fvector")
! ;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9
! (define_insn_reservation "athlon_ssecvt_cvtsX2si" 9
! 			 (and (eq_attr "cpu" "athlon")
! 			      (and (eq_attr "type" "sseicvt")
! 				   (and (eq_attr "athlon_decode" "double")
! 					(and (eq_attr "mode" "SI,DI")
! 					     (eq_attr "memory" "none")))))
! 			 "athlon-vector,athlon-fpsched,athlon-fvector")
! (define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9
! 			 (and (eq_attr "cpu" "k8")
! 			      (and (eq_attr "type" "sseicvt")
! 				   (and (eq_attr "athlon_decode" "double")
! 					(and (eq_attr "mode" "SI,DI")
! 					     (eq_attr "memory" "none")))))
! 			 "athlon-double,athlon-fpsched,athlon-fstore")
! 
! 
! (define_insn_reservation "athlon_ssemul_load" 4
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "ssemul")
  				   (and (eq_attr "mode" "SF,DF")
  					(eq_attr "memory" "load"))))
! 			 "athlon-direct,athlon-fpload,athlon-fmul")
  (define_insn_reservation "athlon_ssemul_load_k8" 6
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "ssemul")
  				   (and (eq_attr "mode" "SF,DF")
  					(eq_attr "memory" "load"))))
! 			 "athlon-direct,athlon-fploadk8,athlon-fmul")
  (define_insn_reservation "athlon_ssemul" 4
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "ssemul")
  				   (eq_attr "mode" "SF,DF")))
! 			 "athlon-direct,athlon-fpsched,athlon-fmul")
! (define_insn_reservation "athlon_ssemulvector_load" 5
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "ssemul")
  				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
  (define_insn_reservation "athlon_ssemulvector_load_k8" 7
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "ssemul")
  				   (eq_attr "memory" "load")))
! 			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
  (define_insn_reservation "athlon_ssemulvector" 5
  			 (and (eq_attr "cpu" "athlon")
  			      (eq_attr "type" "ssemul"))
! 			 "athlon-vector,athlon-fpsched,(athlon-fmul*2)")
  (define_insn_reservation "athlon_ssemulvector_k8" 5
  			 (and (eq_attr "cpu" "k8")
  			      (eq_attr "type" "ssemul"))
! 			 "athlon-double,athlon-fpsched,(athlon-fmul*2)")
! ;; divsd timmings.  divss is faster
! (define_insn_reservation "athlon_ssediv_load" 20
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "ssediv")
  				   (and (eq_attr "mode" "SF,DF")
  					(eq_attr "memory" "load"))))
! 			 "athlon-direct,athlon-fpload,athlon-fmul*17")
! (define_insn_reservation "athlon_ssediv_load_k8" 22
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "ssediv")
  				   (and (eq_attr "mode" "SF,DF")
  					(eq_attr "memory" "load"))))
! 			 "athlon-direct,athlon-fploadk8,athlon-fmul*17")
! (define_insn_reservation "athlon_ssediv" 20
  			 (and (eq_attr "cpu" "athlon,k8")
  			      (and (eq_attr "type" "ssediv")
  				   (eq_attr "mode" "SF,DF")))
! 			 "athlon-direct,athlon-fpsched,athlon-fmul*17")
! (define_insn_reservation "athlon_ssedivvector_load" 39
  			 (and (eq_attr "cpu" "athlon")
  			      (and (eq_attr "type" "ssediv")
  				   (eq_attr "memory" "load")))
! 			 "athlon-vector,athlon-fpload2,athlon-fmul*34")
  (define_insn_reservation "athlon_ssedivvector_load_k8" 35
  			 (and (eq_attr "cpu" "k8")
  			      (and (eq_attr "type" "ssediv")
  				   (eq_attr "memory" "load")))
! 			 "athlon-double,athlon-fpload2k8,athlon-fmul*34")
! (define_insn_reservation "athlon_ssedivvector" 39
  			 (and (eq_attr "cpu" "athlon")
  			      (eq_attr "type" "ssediv"))
! 			 "athlon-vector,athlon-fmul*34")
! (define_insn_reservation "athlon_ssedivvector_k8" 39
  			 (and (eq_attr "cpu" "k8")
  			      (eq_attr "type" "ssediv"))
! 			 "athlon-double,athlon-fmul*34")
Index: i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.560
diff -c -3 -p -r1.560 i386.c
*** i386.c	11 Apr 2003 21:27:13 -0000	1.560
--- i386.c	19 Apr 2003 17:43:11 -0000
*************** ix86_adjust_cost (insn, link, dep_insn, 
*** 12125,12137 ****
        if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
  	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
   	{
! 	  /* Claim moves to take one cycle, as core can issue one load
! 	     at time and the next load can start cycle later.  */
! 	  if (dep_insn_type == TYPE_IMOV
! 	      || dep_insn_type == TYPE_FMOV)
! 	    cost = 0;
! 	  else if (cost >= 3)
! 	    cost -= 3;
  	  else
  	    cost = 0;
  	}
--- 12125,12145 ----
        if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
  	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
   	{
! 	  enum attr_unit unit = get_attr_unit (insn);
! 	  int loadcost = 3;
! 
! 	  /* Because of the difference between the length of integer and
! 	     floating unit pipeline preparation stages, the memory operands
! 	     for floating point are cheaper. 
! 
! 	     ??? For Athlon it the difference is most propbably 2.  */
! 	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
! 	    loadcost = 3;
! 	  else
! 	    loadcost = TARGET_ATHLON ? 2 : 0;
! 
! 	  if (cost >= loadcost)
! 	    cost -= loadcost;
  	  else
  	    cost = 0;
  	}
Index: i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.455
diff -c -3 -p -r1.455 i386.md
*** i386.md	15 Apr 2003 13:33:57 -0000	1.455
--- i386.md	19 Apr 2003 17:43:12 -0000
***************
*** 3990,3996 ****
  }
    [(set_attr "type" "ssecvt,ssecvt,fmov")
     (set_attr "athlon_decode" "vector,double,*")
!    (set_attr "mode" "DF,DF,SF")])
  
  (define_insn "*truncdfsf2_2_nooverlap"
    [(set (match_operand:SF 0 "nonimmediate_operand" "=&Y,!m")
--- 3990,3996 ----
  }
    [(set_attr "type" "ssecvt,ssecvt,fmov")
     (set_attr "athlon_decode" "vector,double,*")
!    (set_attr "mode" "SF,SF,SF")])
  
  (define_insn "*truncdfsf2_2_nooverlap"
    [(set (match_operand:SF 0 "nonimmediate_operand" "=&Y,!m")
***************
*** 4037,4043 ****
    "cvtsd2ss\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssecvt")
     (set_attr "athlon_decode" "vector,double")
!    (set_attr "mode" "DF")])
  
  (define_insn "*truncdfsf2_sse_only_nooverlap"
    [(set (match_operand:SF 0 "register_operand" "=&Y")
--- 4037,4043 ----
    "cvtsd2ss\t{%1, %0|%0, %1}"
    [(set_attr "type" "ssecvt")
     (set_attr "athlon_decode" "vector,double")
!    (set_attr "mode" "SF")])
  
  (define_insn "*truncdfsf2_sse_only_nooverlap"
    [(set (match_operand:SF 0 "register_operand" "=&Y")
***************
*** 4443,4449 ****
      }
    DONE;
  }
!   [(set_attr "type" "fistp")])
  
  (define_insn "fix_truncdi_nomemory"
    [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
--- 4443,4450 ----
      }
    DONE;
  }
!   [(set_attr "type" "fistp")
!    (set_attr "mode" "DI")])
  
  (define_insn "fix_truncdi_nomemory"
    [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
***************
*** 4455,4461 ****
    "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
     && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)"
    "#"
!   [(set_attr "type" "fistp")])
  
  (define_insn "fix_truncdi_memory"
    [(set (match_operand:DI 0 "memory_operand" "=m")
--- 4456,4463 ----
    "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
     && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)"
    "#"
!   [(set_attr "type" "fistp")
!    (set_attr "mode" "DI")])
  
  (define_insn "fix_truncdi_memory"
    [(set (match_operand:DI 0 "memory_operand" "=m")
***************
*** 4466,4472 ****
    "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
     && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)"
    "* operands[5] = operands[4]; return output_fix_trunc (insn, operands);"
!   [(set_attr "type" "fistp")])
  
  (define_split 
    [(set (match_operand:DI 0 "register_operand" "")
--- 4468,4475 ----
    "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
     && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)"
    "* operands[5] = operands[4]; return output_fix_trunc (insn, operands);"
!   [(set_attr "type" "fistp")
!    (set_attr "mode" "DI")])
  
  (define_split 
    [(set (match_operand:DI 0 "register_operand" "")
***************
*** 4504,4509 ****
--- 4507,4513 ----
    "TARGET_64BIT && TARGET_SSE"
    "cvttss2si{q}\t{%1, %0|%0, %1}"
    [(set_attr "type" "sseicvt")
+    (set_attr "mode" "SF")
     (set_attr "athlon_decode" "double,vector")])
  
  ;; Avoid vector decoded form of the instruction.
***************
*** 4522,4527 ****
--- 4526,4532 ----
    "TARGET_64BIT && TARGET_SSE2"
    "cvttsd2si{q}\t{%1, %0|%0, %1}"
    [(set_attr "type" "sseicvt,sseicvt")
+    (set_attr "mode" "DF")
     (set_attr "athlon_decode" "double,vector")])
  
  ;; Avoid vector decoded form of the instruction.
***************
*** 4605,4611 ****
      }
    DONE;
  }
!   [(set_attr "type" "fistp")])
  
  (define_insn "fix_truncsi_nomemory"
    [(set (match_operand:SI 0 "nonimmediate_operand" "=m,?r")
--- 4610,4617 ----
      }
    DONE;
  }
!   [(set_attr "type" "fistp")
!    (set_attr "mode" "SI")])
  
  (define_insn "fix_truncsi_nomemory"
    [(set (match_operand:SI 0 "nonimmediate_operand" "=m,?r")
***************
*** 4616,4622 ****
    "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
     && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
    "#"
!   [(set_attr "type" "fistp")])
  
  (define_insn "fix_truncsi_memory"
    [(set (match_operand:SI 0 "memory_operand" "=m")
--- 4622,4629 ----
    "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
     && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
    "#"
!   [(set_attr "type" "fistp")
!    (set_attr "mode" "SI")])
  
  (define_insn "fix_truncsi_memory"
    [(set (match_operand:SI 0 "memory_operand" "=m")
***************
*** 4626,4632 ****
    "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
     && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
    "* return output_fix_trunc (insn, operands);"
!   [(set_attr "type" "fistp")])
  
  ;; When SSE available, it is always faster to use it!
  (define_insn "fix_truncsfsi_sse"
--- 4633,4640 ----
    "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
     && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
    "* return output_fix_trunc (insn, operands);"
!   [(set_attr "type" "fistp")
!    (set_attr "mode" "SI")])
  
  ;; When SSE available, it is always faster to use it!
  (define_insn "fix_truncsfsi_sse"
***************
*** 4635,4640 ****
--- 4643,4649 ----
    "TARGET_SSE"
    "cvttss2si\t{%1, %0|%0, %1}"
    [(set_attr "type" "sseicvt")
+    (set_attr "mode" "DF")
     (set_attr "athlon_decode" "double,vector")])
  
  ;; Avoid vector decoded form of the instruction.
***************
*** 4653,4658 ****
--- 4662,4668 ----
    "TARGET_SSE2"
    "cvttsd2si\t{%1, %0|%0, %1}"
    [(set_attr "type" "sseicvt")
+    (set_attr "mode" "DF")
     (set_attr "athlon_decode" "double,vector")])
  
  ;; Avoid vector decoded form of the instruction.
***************
*** 4743,4749 ****
      }
    DONE;
  }
!   [(set_attr "type" "fistp")])
  
  (define_insn "fix_trunchi_nomemory"
    [(set (match_operand:HI 0 "nonimmediate_operand" "=m,?r")
--- 4753,4760 ----
      }
    DONE;
  }
!   [(set_attr "type" "fistp")
!    (set_attr "mode" "HI")])
  
  (define_insn "fix_trunchi_nomemory"
    [(set (match_operand:HI 0 "nonimmediate_operand" "=m,?r")
***************
*** 4754,4760 ****
    "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
     && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
    "#"
!   [(set_attr "type" "fistp")])
  
  (define_insn "fix_trunchi_memory"
    [(set (match_operand:HI 0 "memory_operand" "=m")
--- 4765,4772 ----
    "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
     && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
    "#"
!   [(set_attr "type" "fistp")
!    (set_attr "mode" "HI")])
  
  (define_insn "fix_trunchi_memory"
    [(set (match_operand:HI 0 "memory_operand" "=m")
***************
*** 4764,4770 ****
    "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
     && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
    "* return output_fix_trunc (insn, operands);"
!   [(set_attr "type" "fistp")])
  
  (define_split 
    [(set (match_operand:HI 0 "memory_operand" "")
--- 4776,4783 ----
    "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
     && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
    "* return output_fix_trunc (insn, operands);"
!   [(set_attr "type" "fistp")
!    (set_attr "mode" "HI")])
  
  (define_split 
    [(set (match_operand:HI 0 "memory_operand" "")
***************
*** 20262,20268 ****
    "cvtss2si\t{%1, %0|%0, %1}"
    [(set_attr "type" "sseicvt")
     (set_attr "athlon_decode" "double,vector")
!    (set_attr "mode" "SF")])
  
  (define_insn "cvtss2siq"
    [(set (match_operand:DI 0 "register_operand" "=r,r")
--- 20275,20281 ----
    "cvtss2si\t{%1, %0|%0, %1}"
    [(set_attr "type" "sseicvt")
     (set_attr "athlon_decode" "double,vector")
!    (set_attr "mode" "SI")])
  
  (define_insn "cvtss2siq"
    [(set (match_operand:DI 0 "register_operand" "=r,r")
***************
*** 20273,20279 ****
    "cvtss2siq\t{%1, %0|%0, %1}"
    [(set_attr "type" "sseicvt")
     (set_attr "athlon_decode" "double,vector")
!    (set_attr "mode" "SF")])
  
  (define_insn "cvttss2si"
    [(set (match_operand:SI 0 "register_operand" "=r,r")
--- 20286,20292 ----
    "cvtss2siq\t{%1, %0|%0, %1}"
    [(set_attr "type" "sseicvt")
     (set_attr "athlon_decode" "double,vector")
!    (set_attr "mode" "DI")])
  
  (define_insn "cvttss2si"
    [(set (match_operand:SI 0 "register_operand" "=r,r")
***************
*** 21891,21912 ****
  ;; Conversions between SI and DF
  
  (define_insn "cvtsd2si"
!   [(set (match_operand:SI 0 "register_operand" "=r")
! 	(fix:SI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm")
  			       (parallel [(const_int 0)]))))]
    "TARGET_SSE2"
    "cvtsd2si\t{%1, %0|%0, %1}"
    [(set_attr "type" "sseicvt")
     (set_attr "mode" "SI")])
  
  (define_insn "cvtsd2siq"
    [(set (match_operand:DI 0 "register_operand" "=r")
! 	(fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm")
  			       (parallel [(const_int 0)]))))]
    "TARGET_SSE2 && TARGET_64BIT"
    "cvtsd2siq\t{%1, %0|%0, %1}"
    [(set_attr "type" "sseicvt")
!    (set_attr "mode" "SI")])
  
  (define_insn "cvttsd2si"
    [(set (match_operand:SI 0 "register_operand" "=r,r")
--- 21904,21927 ----
  ;; Conversions between SI and DF
  
  (define_insn "cvtsd2si"
!   [(set (match_operand:SI 0 "register_operand" "=r,r")
! 	(fix:SI (vec_select:DF (match_operand:V2DF 1 "register_operand" "x,m")
  			       (parallel [(const_int 0)]))))]
    "TARGET_SSE2"
    "cvtsd2si\t{%1, %0|%0, %1}"
    [(set_attr "type" "sseicvt")
+    (set_attr "athlon_decode" "double,vector")
     (set_attr "mode" "SI")])
  
  (define_insn "cvtsd2siq"
    [(set (match_operand:DI 0 "register_operand" "=r")
! 	(fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "x,m")
  			       (parallel [(const_int 0)]))))]
    "TARGET_SSE2 && TARGET_64BIT"
    "cvtsd2siq\t{%1, %0|%0, %1}"
    [(set_attr "type" "sseicvt")
!    (set_attr "athlon_decode" "double,vector")
!    (set_attr "mode" "DI")])
  
  (define_insn "cvttsd2si"
    [(set (match_operand:SI 0 "register_operand" "=r,r")



More information about the Gcc-patches mailing list