This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Pentium DFA description




This is basically Jan's Pentium DFA description with a few minor changes:

  1. Jan made a change to mark a variety of FP instructions as pairable
     in the U pipe.  That is wrong.  The only pairing for FP instructions
     I'm aware of is when they pair with fxch which we don't made any attempt
     to model in either the old or the new scheduling description.

  2. A minor bug in the pent_call reservation caused genattrtab to fail a
     consistency check when building the DFA.  That minor bug was fixed.

  3. The insn reservations have been properly formatted ;-)


Benchmarking was done with spec92 -- not because I particularly like spec92,
but because I can make a run in less than 24 hrs on this ancient hardware :-0

The Pentium DFA description makes no statistically significant difference 
in the integer suite.  The DFA description did result in a measurable 
improvement
for FP codes, roughly 1.2% across the entire SPECfp suite.

I also benchmarked the time genattrtab took to build insn-attrtab.c for the 
old pipeline description as well as the new DFA description.

For the old pipeline description, genattrtab took 58.22 seconds of wall clock
time (57.92 of that in user time).  Contrast that to the DFA description which
took 11.17 seconds (11.06 in user time).

Now before we all jump for joy, the trend I've noticed benchmarking the time
to build insn-attrtab has been the opposite.  ie, the DFA descriptions have
consistently taken longer than the old descriptions.  Whatever the old Pentium
description was doing (maybe the conflict list stuff?) was particularly 
expensive
while doing the equivalent with a DFA description is apparently very cheap.

Just for completeness, I did a bootstrap with -march=i586 -mcpu=i586 and a
normal testrun with no regressions :-)  Installed into the mainline sources.

	* i386.c (ia32_use_dfa_pipeline_interface): New function.  Use
	the DFA interface for Pentium processors.
	(TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE): DEFINE.
	(attr_pent_pair, ix86_pent_find_pair): Remove.
	(ix86_sched_reorder_pentium): Remove.
	(ix86_sched_reorder): Remove reordering for Pentium.
	* i386.md (Pentium scheduling): Rewrite using DFA description.

Index: i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.393
diff -c -3 -p -r1.393 i386.c
*** i386.c	6 May 2002 16:07:03 -0000	1.393
--- i386.c	9 May 2002 13:39:12 -0000
*************** Boston, MA 02111-1307, USA.  */
*** 44,49 ****
--- 44,62 ----
  #include "target-def.h"
  #include "langhooks.h"
  
+ static int ia32_use_dfa_pipeline_interface PARAMS ((void));
+ 
+ #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE 
+ #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE ia32_use_dfa_pipeline_interface
+ 
+ static int
+ ia32_use_dfa_pipeline_interface ()
+ {
+   if (ix86_cpu == PROCESSOR_PENTIUM)
+     return 1;
+   return 0;
+ }
+ 
  #ifndef CHECK_STACK_LIMIT
  #define CHECK_STACK_LIMIT (-1)
  #endif
*************** static int ix86_flags_dependant PARAMS (
*** 659,670 ****
  static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
  static int ix86_safe_length PARAMS ((rtx));
  static enum attr_memory ix86_safe_memory PARAMS ((rtx));
- static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
  static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
  static void ix86_dump_ppro_packet PARAMS ((FILE *));
  static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
- static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
- 					 rtx));
  static void ix86_init_machine_status PARAMS ((struct function *));
  static void ix86_mark_machine_status PARAMS ((struct function *));
  static void ix86_free_machine_status PARAMS ((struct function *));
--- 672,680 ----
*************** static void ix86_emit_save_regs PARAMS (
*** 675,681 ****
  static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
  static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
  static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
- static void ix86_sched_reorder_pentium PARAMS ((rtx *, rtx *));
  static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
  static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
  static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
--- 685,690 ----
*************** ix86_safe_memory (insn)
*** 10311,10326 ****
      return MEMORY_UNKNOWN;
  }
  
- static enum attr_pent_pair
- ix86_safe_pent_pair (insn)
-      rtx insn;
- {
-   if (recog_memoized (insn) >= 0)
-     return get_attr_pent_pair (insn);
-   else
-     return PENT_PAIR_NP;
- }
- 
  static enum attr_ppro_uops
  ix86_safe_ppro_uops (insn)
       rtx insn;
--- 10320,10325 ----
*************** ix86_reorder_insn (insnp, slot)
*** 10374,10502 ****
      }
  }
  
- /* Find an instruction with given pairability and minimal amount of cycles
-    lost by the fact that the CPU waits for both pipelines to finish before
-    reading next instructions.  Also take care that both instructions together
-    can not exceed 7 bytes.  */
- 
- static rtx *
- ix86_pent_find_pair (e_ready, ready, type, first)
-      rtx *e_ready;
-      rtx *ready;
-      enum attr_pent_pair type;
-      rtx first;
- {
-   int mincycles, cycles;
-   enum attr_pent_pair tmp;
-   enum attr_memory memory;
-   rtx *insnp, *bestinsnp = NULL;
- 
-   if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
-     return NULL;
- 
-   memory = ix86_safe_memory (first);
-   cycles = result_ready_cost (first);
-   mincycles = INT_MAX;
- 
-   for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
-     if ((tmp = ix86_safe_pent_pair (*insnp)) == type
- 	&& ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
-       {
- 	enum attr_memory second_memory;
- 	int secondcycles, currentcycles;
- 
- 	second_memory = ix86_safe_memory (*insnp);
- 	secondcycles = result_ready_cost (*insnp);
- 	currentcycles = abs (cycles - secondcycles);
- 
- 	if (secondcycles >= 1 && cycles >= 1)
- 	  {
- 	    /* Two read/modify/write instructions together takes two
- 	       cycles longer.  */
- 	    if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
- 	      currentcycles += 2;
- 
- 	    /* Read modify/write instruction followed by read/modify
- 	       takes one cycle longer.  */
- 	    if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
- 	        && tmp != PENT_PAIR_UV
- 	        && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
- 	      currentcycles += 1;
- 	  }
- 	if (currentcycles < mincycles)
- 	  bestinsnp = insnp, mincycles = currentcycles;
-       }
- 
-   return bestinsnp;
- }
- 
- /* Subroutines of ix86_sched_reorder.  */
- 
- static void
- ix86_sched_reorder_pentium (ready, e_ready)
-      rtx *ready;
-      rtx *e_ready;
- {
-   enum attr_pent_pair pair1, pair2;
-   rtx *insnp;
- 
-   /* This wouldn't be necessary if Haifa knew that static insn ordering
-      is important to which pipe an insn is issued to.  So we have to make
-      some minor rearrangements.  */
- 
-   pair1 = ix86_safe_pent_pair (*e_ready);
- 
-   /* If the first insn is non-pairable, let it be.  */
-   if (pair1 == PENT_PAIR_NP)
-     return;
- 
-   pair2 = PENT_PAIR_NP;
-   insnp = 0;
- 
-   /* If the first insn is UV or PV pairable, search for a PU
-      insn to go with.  */
-   if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
-     {
-       insnp = ix86_pent_find_pair (e_ready-1, ready,
- 				   PENT_PAIR_PU, *e_ready);
-       if (insnp)
- 	pair2 = PENT_PAIR_PU;
-     }
- 
-   /* If the first insn is PU or UV pairable, search for a PV
-      insn to go with.  */
-   if (pair2 == PENT_PAIR_NP
-       && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
-     {
-       insnp = ix86_pent_find_pair (e_ready-1, ready,
- 				   PENT_PAIR_PV, *e_ready);
-       if (insnp)
- 	pair2 = PENT_PAIR_PV;
-     }
- 
-   /* If the first insn is pairable, search for a UV
-      insn to go with.  */
-   if (pair2 == PENT_PAIR_NP)
-     {
-       insnp = ix86_pent_find_pair (e_ready-1, ready,
- 				   PENT_PAIR_UV, *e_ready);
-       if (insnp)
- 	pair2 = PENT_PAIR_UV;
-     }
- 
-   if (pair2 == PENT_PAIR_NP)
-     return;
- 
-   /* Found something!  Decide if we need to swap the order.  */
-   if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
-       || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
- 	  && ix86_safe_memory (*e_ready) == MEMORY_BOTH
- 	  && ix86_safe_memory (*insnp) == MEMORY_LOAD))
-     ix86_reorder_insn (insnp, e_ready);
-   else
-     ix86_reorder_insn (insnp, e_ready - 1);
- }
- 
  static void
  ix86_sched_reorder_ppro (ready, e_ready)
       rtx *ready;
--- 10373,10378 ----
*************** ix86_sched_reorder (dump, sched_verbose,
*** 10601,10610 ****
    switch (ix86_cpu)
      {
      default:
-       break;
- 
-     case PROCESSOR_PENTIUM:
-       ix86_sched_reorder_pentium (ready, e_ready);
        break;
  
      case PROCESSOR_PENTIUMPRO:
--- 10477,10482 ----
Index: i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.353
diff -c -3 -p -r1.353 i386.md
*** i386.md	4 May 2002 17:06:56 -0000	1.353
--- i386.md	9 May 2002 13:39:30 -0000
***************
*** 365,590 ****
  	]
  	(const_string "np")))
  
! ;; Rough readiness numbers.  Fine tuning happens in i386.c.
! ;;
! ;; u	describes pipe U
! ;; v	describes pipe V
! ;; uv	describes either pipe U or V for those that can issue to either
! ;; np	describes not paring
! ;; fpu	describes fpu
! ;; fpm	describes fp insns of different types are not pipelined.
! ;;
! ;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.
  
! (define_function_unit "pent_np" 1 0
!   (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "imul"))
!   11 11)
  
! (define_function_unit "pent_mul" 1 1
    (and (eq_attr "cpu" "pentium")
         (eq_attr "type" "imul"))
!   11 11)
  
! ;; Rep movs takes minimally 12 cycles.
! (define_function_unit "pent_np" 1 0
    (and (eq_attr "cpu" "pentium")
         (eq_attr "type" "str"))
!   12 12)
  
! ; ??? IDIV for SI takes 46 cycles, for HI 30, for QI 22
! (define_function_unit "pent_np" 1 0
    (and (eq_attr "cpu" "pentium")
         (eq_attr "type" "idiv"))
!   46 46)
  
! ; Fp reg-reg moves takes 1 cycle. Loads takes 1 cycle for SF/DF mode,
! ; 3 cycles for XFmode.  Stores takes 2 cycles for SF/DF and 3 for XF.
! ; fldz and fld1 takes 2 cycles.  Only reg-reg moves are pairable.
! ; The integer <-> fp conversion is not modeled correctly. Fild behaves
! ; like normal fp operation and fist takes 6 cycles.
  
! (define_function_unit "fpu" 1 0
    (and (eq_attr "cpu" "pentium")
         (and (eq_attr "type" "fmov")
! 	    (and (eq_attr "memory" "load,store")
! 		 (eq_attr "mode" "XF"))))
!   3 3)
  
! (define_function_unit "pent_np" 1 0
    (and (eq_attr "cpu" "pentium")
         (and (eq_attr "type" "fmov")
  	    (and (eq_attr "memory" "load,store")
  		 (eq_attr "mode" "XF"))))
!   3 3)
! 
! (define_function_unit "fpu" 1 0
!   (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "type" "fmov")
!             (ior (match_operand 1 "immediate_operand" "")
! 	         (eq_attr "memory" "store"))))
!   2 2)
! 
! (define_function_unit "pent_np" 1 0
!   (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "type" "fmov")
!             (ior (match_operand 1 "immediate_operand" "")
! 	         (eq_attr "memory" "store"))))
!   2 2)
  
! (define_function_unit "pent_np" 1 0
!   (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "cld"))
!   2 2)
! 
! (define_function_unit "fpu" 1 0
    (and (eq_attr "cpu" "pentium")
         (and (eq_attr "type" "fmov")
! 	    (eq_attr "memory" "none,load")))
!   1 1)
  
! ; Read/Modify/Write instructions usually take 3 cycles.
! (define_function_unit "pent_u" 1 0
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "type" "alu,alu1,ishift")
! 	    (and (eq_attr "pent_pair" "pu")
! 		 (eq_attr "memory" "both"))))
!   3 3)
  
! (define_function_unit "pent_uv" 2 0
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "type" "alu,alu1,ishift")
! 	    (and (eq_attr "pent_pair" "!np")
! 		 (eq_attr "memory" "both"))))
!   3 3)
  
! (define_function_unit "pent_np" 1 0
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "type" "alu,alu1,negnot,ishift")
! 	    (and (eq_attr "pent_pair" "np")
! 		 (eq_attr "memory" "both"))))
!   3 3)
  
! ; Read/Modify or Modify/Write instructions usually take 2 cycles.
! (define_function_unit "pent_u" 1 0
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "type" "alu,ishift")
! 	    (and (eq_attr "pent_pair" "pu")
! 		 (eq_attr "memory" "load,store"))))
!   2 2)
  
! (define_function_unit "pent_uv" 2 0
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "type" "alu,ishift")
! 	    (and (eq_attr "pent_pair" "!np")
! 		 (eq_attr "memory" "load,store"))))
!   2 2)
  
! (define_function_unit "pent_np" 1 0
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "type" "alu,ishift")
! 	    (and (eq_attr "pent_pair" "np")
! 		 (eq_attr "memory" "load,store"))))
!   2 2)
  
! ; Insns w/o memory operands and move instructions usually take one cycle.
! (define_function_unit "pent_u" 1 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "pent_pair" "pu"))
!   1 1)
  
! (define_function_unit "pent_v" 1 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "pent_pair" "pv"))
!   1 1)
  
! (define_function_unit "pent_uv" 2 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "pent_pair" "!np"))
!   1 1)
  
! (define_function_unit "pent_np" 1 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "pent_pair" "np"))
!   1 1)
  
! ; Pairable insns only conflict with other non-pairable insns.
! (define_function_unit "pent_np" 1 0
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "type" "alu,alu1,ishift")
! 	    (and (eq_attr "pent_pair" "!np")
! 		 (eq_attr "memory" "both"))))
!   3 3
!   [(eq_attr "pent_pair" "np")])
  
! (define_function_unit "pent_np" 1 0
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "type" "alu,alu1,ishift")
! 	    (and (eq_attr "pent_pair" "!np")
! 		 (eq_attr "memory" "load,store"))))
!   2 2
!   [(eq_attr "pent_pair" "np")])
  
! (define_function_unit "pent_np" 1 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "pent_pair" "!np"))
!   1 1
!   [(eq_attr "pent_pair" "np")])
  
! ; Floating point instructions usually blocks cycle longer when combined with
! ; integer instructions, because of the inpaired fxch instruction.
! (define_function_unit "pent_np" 1 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "fmov,fop,fsgn,fmul,fpspc,fcmov,fcmp,fistp"))
!   2 2
!   [(eq_attr "type" "!fmov,fop,fsgn,fmul,fpspc,fcmov,fcmp,fistp")])
  
! (define_function_unit "fpu" 1 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "fcmp,fxch,fsgn"))
!   1 1)
  
! ; Addition takes 3 cycles; assume other random cruft does as well.
! ; ??? Trivial fp operations such as fabs or fchs takes only one cycle.
! (define_function_unit "fpu" 1 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "fop,fistp"))
!   3 1)
  
! ; Multiplication takes 3 cycles and is only half pipelined.
! (define_function_unit "fpu" 1 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "fmul"))
!   3 1)
  
! (define_function_unit "pent_mul" 1 1
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "fmul"))
!   2 2)
  
! ; ??? This is correct only for fdiv and sqrt -- sin/cos take 65-100 cycles. 
! ; They can overlap with integer insns.  Only the last two cycles can overlap
! ; with other fp insns.  Only fsin/fcos can overlap with multiplies.
! ; Only last two cycles of fsin/fcos can overlap with other instructions.
! (define_function_unit "fpu" 1 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "fdiv"))
!   39 37)
  
! (define_function_unit "pent_mul" 1 1
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "fdiv"))
!   39 39)
  
! (define_function_unit "fpu" 1 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "fpspc"))
!   70 68)
  
- (define_function_unit "pent_mul" 1 1
-   (and (eq_attr "cpu" "pentium")
-        (eq_attr "type" "fpspc"))
-   70 70)
  
  ;; Pentium Pro/PII Scheduling
  ;;
--- 365,612 ----
  	]
  	(const_string "np")))
  
! (define_automaton "pentium,pentium_fpu")
  
! ;; Pentium do have U and V pipes.  Instruction to both pipes
! ;; are alwyas issued together, much like on VLIW.
! ;;
! ;;                    predecode
! ;;                   /         \
! ;;               decodeu     decodev
! ;;             /    |           |
! ;;           fpu executeu    executev
! ;;            |     |           |
! ;;           fpu  retire     retire
! ;;            |
! ;;           fpu
! ;; We add dummy "port" pipes allocated only first cycle of
! ;; instruction to specify this behaviour.
! 
! (define_cpu_unit "pentium-portu,pentium-portv" "pentium")
! (define_cpu_unit "pentium-u,pentium-v" "pentium")
! (absence_set "pentium-portu" "pentium-u,pentium-v")
! (presence_set "pentium-portv" "pentium-portu")
! 
! ;; Floating point instructions can overlap with new issue of integer
! ;; instructions.  We model only first cycle of FP pipeline, as it is
! ;; fully pipelined.
! (define_cpu_unit "pentium-fp" "pentium_fpu")
! 
! ;; There is non-pipelined multiplier unit used for complex operations.
! (define_cpu_unit "pentium-fmul" "pentium_fpu")
! 
! ;; Pentium preserves memory ordering, so when load-execute-store
! ;; instruction is executed together with other instruction loading
! ;; data, the execution of the other instruction is delayed to very
! ;; last cycle of first instruction, when data are bypassed.
! ;; We model this by allocating "memory" unit when store is pending
! ;; and using conflicting load units together.
! 
! (define_cpu_unit "pentium-memory" "pentium")
! (define_cpu_unit "pentium-load0" "pentium")
! (define_cpu_unit "pentium-load1" "pentium")
! (absence_set "pentium-load0,pentium-load1" "pentium-memory")
! 
! (define_reservation "pentium-load" "(pentium-load0 | pentium-load1)")
! (define_reservation "pentium-np" "(pentium-u + pentium-v)")
! (define_reservation "pentium-uv" "(pentium-u | pentium-v)")
! (define_reservation "pentium-portuv" "(pentium-portu | pentium-portv)")
! (define_reservation "pentium-firstu" "(pentium-u + pentium-portu)")
! (define_reservation "pentium-firstv" "(pentium-v + pentium-portuv)")
! (define_reservation "pentium-firstuv" "(pentium-uv + pentium-portuv)")
! (define_reservation "pentium-firstuload" "(pentium-load + pentium-firstu)")
! (define_reservation "pentium-firstvload" "(pentium-load + pentium-firstv)")
! (define_reservation "pentium-firstuvload" "(pentium-load + pentium-firstuv)
! 					   | (pentium-firstv,pentium-v,
! 					      (pentium-load+pentium-firstv))")
! (define_reservation "pentium-firstuboth" "(pentium-load + pentium-firstu
! 					   + pentium-memory)")
! (define_reservation "pentium-firstvboth" "(pentium-load + pentium-firstu
! 					   + pentium-memory)")
! (define_reservation "pentium-firstuvboth" "(pentium-load + pentium-firstuv
! 					    + pentium-memory)
! 					   | (pentium-firstv,pentium-v,
! 					      (pentium-load+pentium-firstv))")
  
! ;; Few common long latency instructions
! (define_insn_reservation "pent_mul" 11
    (and (eq_attr "cpu" "pentium")
         (eq_attr "type" "imul"))
!   "pentium-np*11")
  
! (define_insn_reservation "pent_str" 12
    (and (eq_attr "cpu" "pentium")
         (eq_attr "type" "str"))
!   "pentium-np*12")
  
! ;; Integer division and some other long latency instruction block all
! ;; units, including the FP pipe.  There is no value in modeling the
! ;; latency of these instructions and not modeling the latency
! ;; decreases the size of the DFA.
! (define_insn_reservation "pent_block" 1
    (and (eq_attr "cpu" "pentium")
         (eq_attr "type" "idiv"))
!   "pentium-np+pentium-fp")
  
! (define_insn_reservation "pent_cld" 2
!   (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "cld"))
!   "pentium-np*2")
  
! ;;  Moves usually have one cycle penalty, but there are exceptions.
! (define_insn_reservation "pent_fmov" 1
    (and (eq_attr "cpu" "pentium")
         (and (eq_attr "type" "fmov")
! 	    (eq_attr "memory" "none,load")))
!   "(pentium-fp+pentium-np)")
  
! (define_insn_reservation "pent_fpmovxf" 3
    (and (eq_attr "cpu" "pentium")
         (and (eq_attr "type" "fmov")
  	    (and (eq_attr "memory" "load,store")
  		 (eq_attr "mode" "XF"))))
!   "(pentium-fp+pentium-np)*3")
  
! (define_insn_reservation "pent_fpstore" 2
    (and (eq_attr "cpu" "pentium")
         (and (eq_attr "type" "fmov")
! 	    (ior (match_operand 1 "immediate_operand" "")
! 		 (eq_attr "memory" "store"))))
!   "(pentium-fp+pentium-np)*2")
  
! (define_insn_reservation "pent_imov" 1
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "imov"))
!   "pentium-firstuv")
  
! ;; Push and pop instructions have 1 cycle latency and special
! ;; hardware bypass allows them to be paired with other push,pop
! ;; and call instructions.
! (define_bypass 0 "pent_push,pent_pop" "pent_push,pent_pop,pent_call")
! (define_insn_reservation "pent_push" 1
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "type" "push")
! 	    (eq_attr "memory" "store")))
!   "pentium-firstuv")
  
! (define_insn_reservation "pent_pop" 1
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "pop"))
!   "pentium-firstuv")
  
! ;; Call and branch instruction can execute in either pipe, but
! ;; they are only pairable when in the v pipe.
! (define_insn_reservation "pent_call" 10
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "call,callv"))
!   "pentium-firstv,pentium-v*9")
  
! (define_insn_reservation "pent_branch" 1
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "ibr"))
!   "pentium-firstv")
  
! ;; Floating point instruction dispatch in U pipe, but continue
! ;; in FP pipeline allowing other isntructions to be executed.
! (define_insn_reservation "pent_fp" 3
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "fop,fistp"))
!   "(pentium-firstu+pentium-fp),nothing,nothing")
  
! ;; First two cycles of fmul are not pipelined.
! (define_insn_reservation "pent_fmul" 3
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "fmul"))
!   "(pentium-firstuv+pentium-fp+pentium-fmul),pentium-fmul,nothing")
  
! ;; Long latency FP instructions overlap with integer instructions,
! ;; but only last 2 cycles with FP ones.
! (define_insn_reservation "pent_fdiv" 39
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "fdiv"))
!   "(pentium-np+pentium-fp+pentium-fmul),
!    (pentium-fp+pentium-fmul)*36,pentium-fmul*2")
  
! (define_insn_reservation "pent_fpspc" 70
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "fpspc"))
!   "(pentium-np+pentium-fp+pentium-fmul),
!    (pentium-fp+pentium-fmul)*67,pentium-fmul*2")
  
! ;; Integer instructions.  Load/execute/store takes 3 cycles,
! ;; load/execute 2 cycles and execute only one cycle.
! (define_insn_reservation "pent_uv_both" 3
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "pent_pair" "uv")
! 	    (eq_attr "memory" "both")))
!   "pentium-firstuvboth,pentium-uv+pentium-memory,pentium-uv")
  
! (define_insn_reservation "pent_u_both" 3
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "pent_pair" "pu")
! 	    (eq_attr "memory" "both")))
!   "pentium-firstuboth,pentium-u+pentium-memory,pentium-u")
  
! (define_insn_reservation "pent_v_both" 3
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "pent_pair" "pv")
! 	    (eq_attr "memory" "both")))
!   "pentium-firstvboth,pentium-v+pentium-memory,pentium-v")
  
! (define_insn_reservation "pent_np_both" 3
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "pent_pair" "np")
! 	    (eq_attr "memory" "both")))
!   "pentium-np,pentium-np,pentium-np")
  
! (define_insn_reservation "pent_uv_load" 2
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "pent_pair" "uv")
! 	    (eq_attr "memory" "load")))
!   "pentium-firstuvload,pentium-uv")
  
! (define_insn_reservation "pent_u_load" 2
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "pent_pair" "pu")
! 	    (eq_attr "memory" "load")))
!   "pentium-firstuload,pentium-u")
  
! (define_insn_reservation "pent_v_load" 2
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "pent_pair" "pv")
! 	    (eq_attr "memory" "load")))
!   "pentium-firstvload,pentium-v")
  
! (define_insn_reservation "pent_np_load" 2
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "pent_pair" "np")
! 	    (eq_attr "memory" "load")))
!   "pentium-np,pentium-np")
  
! (define_insn_reservation "pent_uv" 1
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "pent_pair" "uv")
! 	    (eq_attr "memory" "none")))
!   "pentium-firstuv")
  
! (define_insn_reservation "pent_u" 1
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "pent_pair" "pu")
! 	    (eq_attr "memory" "none")))
!   "pentium-firstu")
  
! (define_insn_reservation "pent_v" 1
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "pent_pair" "pv")
! 	    (eq_attr "memory" "none")))
!   "pentium-firstv")
  
! (define_insn_reservation "pent_np" 1
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "pent_pair" "np")
! 	    (eq_attr "memory" "none")))
!   "pentium-np")
  
  
  ;; Pentium Pro/PII Scheduling
  ;;



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]