This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Re: New x86 backend and my regstack changes...


On Thu, Jul 01, 1999 at 05:57:07PM +0200, Jan Hubicka wrote:
> Tue Jul 1 15:14:04 CEST 1999  Jan Hubicka <hubicka@freesoft.cz>
> 	* i386.md (memory attribute): New "both" value.
> 	(imm_disp attribute): New attribute.
> 	(Pentium scheduling definitions): More exact model
> 	* i386.c (memory_displacement_operand): New predicate
> 	(memory_address_info): Rename from memory_address_length,
> 	new AIB parameter.
> 	* i386.h (memory_displacement_operand): Declare.
> 	(ix86_safe_length): New function.
> 	(ix86_safe_memory): New function.
> 	(ix86_find_instruction): New function.
> 	(ix86_sched_reorder): Use it.
[...]
> ! ;; these are correct for fdiv.
> ! ;; the fsqrt takes 70 cycles, all can overlap by integer ops, only last two
> ! ;; with fp ops. Can not overlap with fmul/imul instructions.
> ! ;; sin/cos takes 65-100 cycles, only last two can overlap by integer or fp ops.
> ! ;; sin/cos can overlap with fmul/imul.
>   
>   (define_function_unit "fpu" 1 0
>     (and (eq_attr "cpu" "pentium")
>          (eq_attr "type" "fdiv,fpspc"))
> !   39 37)

I went ahead and split these apart, and made fpspc take 70 cycles.

> ! int
> ! memory_address_info (addr, sib)
>        rtx addr;
> +      int sib;

As I mentioned in the previous mail, I killed this change.

> + static enum attr_pent_pair
> + ix86_safe_length (insn)
> +      rtx insn;
> + {
> +   if (recog_memoized (insn) >= 0)
> +     return get_attr_length(insn);
> +   else
> +     return PENT_PAIR_NP;

Careful.  This should have returned an int.

> + static rtx *
> + ix86_find_instruction (rtx * e_ready, rtx * ready, enum attr_pent_pair \
> type, rtx * first)

I renamed this ix86_pent_find_pair.  Careful about the prototypes 
for K&R C.

> + 	    /* Two read modify write instructions together 
> + 	       takes two cycles longer.  */
> + 	    if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
> + 	      currentcycles += 4;

"two" != 4?  I changed it to 2 -- if this is wrong, you'll have
to expand on the comment.

Other than that, I fixed english grammar.  The attached result is 
what I committed.


r~
1999-07-01  Jan Hubicka  <hubicka@freesoft.cz>

        * i386.md (memory attribute): New "both" value.
        (imm_disp attribute): New.
        (Pentium scheduling definitions): More exact model.
        * i386.c (ix86_safe_length): New.
        (ix86_safe_memory): New.
        (ix86_pent_find_pair): New.
        (ix86_sched_reorder): Use it.

Index: i386.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.81.2.6
diff -c -p -d -r1.81.2.6 i386.c
*** i386.c	1999/07/01 17:04:16	1.81.2.6
--- i386.c	1999/07/02 01:31:38
*************** static rtx gen_push PROTO ((rtx));
*** 217,226 ****
--- 217,230 ----
  static int memory_address_length PROTO ((rtx addr));
  static int ix86_flags_dependant PROTO ((rtx, rtx, enum attr_type));
  static int ix86_agi_dependant PROTO ((rtx, rtx, enum attr_type));
+ static int ix86_safe_length PROTO ((rtx));
+ static enum attr_memory ix86_safe_memory PROTO ((rtx));
  static enum attr_pent_pair ix86_safe_pent_pair PROTO ((rtx));
  static enum attr_ppro_uops ix86_safe_ppro_uops PROTO ((rtx));
  static void ix86_dump_ppro_packet PROTO ((FILE *));
  static void ix86_reorder_insn PROTO ((rtx *, rtx *));
+ static rtx * ix86_pent_find_pair PROTO ((rtx *, rtx *, enum attr_pent_pair,
+ 					 rtx));
  
  struct ix86_address
  {
*************** static union
*** 5222,5227 ****
--- 5226,5251 ----
    } ppro;
  } ix86_sched_data;
  
+ static int
+ ix86_safe_length (insn)
+      rtx insn;
+ {
+   if (recog_memoized (insn) >= 0)
+     return get_attr_length(insn);
+   else
+     return 128;
+ }
+ 
+ static enum attr_memory
+ ix86_safe_memory (insn)
+      rtx insn;
+ {
+   if (recog_memoized (insn) >= 0)
+     return get_attr_memory(insn);
+   else
+     return MEMORY_UNKNOWN;
+ }
+ 
  static enum attr_pent_pair
  ix86_safe_pent_pair (insn)
       rtx insn;
*************** ix86_reorder_insn (insnp, slot)
*** 5284,5289 ****
--- 5308,5367 ----
      }
  }
  
+ /* Find an instruction with given pairability and minimal amount of cycles
+    lost by the fact that the CPU waits for both pipelines to finish before
+    reading next instructions.  Also take care that both instructions together
+    can not exceed 7 bytes.  */
+ 
+ static rtx *
+ ix86_pent_find_pair (e_ready, ready, type, first)
+      rtx *e_ready;
+      rtx *ready;
+      enum attr_pent_pair type;
+      rtx first;
+ {
+   int maxlength, mincycles, cycles;
+   enum attr_pent_pair tmp;
+   enum attr_memory memory;
+   rtx *insnp, *bestinsnp = NULL;
+ 
+   maxlength = 7 - ix86_safe_length (*first);
+   memory = ix86_safe_memory (*first);
+   cycles = result_ready_cost (*first);
+   mincycles = INT_MAX;
+ 
+   for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
+     if ((tmp = ix86_safe_pent_pair (*insnp)) == type
+ 	&& ix86_safe_length (*insnp) <= maxlength)
+       {
+ 	enum attr_memory second_memory;
+ 	int secondcycles, currentcycles;
+ 
+ 	second_memory = ix86_safe_memory (*insnp);
+ 	secondcycles = result_ready_cost (*insnp);
+ 	currentcycles = abs (cycles - secondcycles);
+ 
+ 	if (secondcycles >= 1 && cycles >= 1)
+ 	  {
+ 	    /* Two read/modify/write instructions together takes two
+ 	       cycles longer.  */
+ 	    if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
+ 	      currentcycles += 2;
+   
+ 	    /* Read modify/write instruction followed by read/modify
+ 	       takes one cycle longer.  */
+ 	    if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
+ 	        && tmp != PENT_PAIR_UV
+ 	        && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
+ 	      currentcycles += 1;
+ 	  }
+ 	if (currentcycles < mincycles)
+ 	  bestinsnp = insnp, mincycles = currentcycles;
+       }
+ 
+   return bestinsnp;
+ }
+ 
  /* We are about to being issuing insns for this clock cycle.  
     Override the default sort algorithm to better slot instructions.  */
  
*************** ix86_sched_reorder (dump, sched_verbose,
*** 5311,5317 ****
  	 is important to which pipe an insn is issued to.  So we have to make
  	 some minor rearrangements.  */
        {
! 	enum attr_pent_pair pair1, pair2, tmp;
  
  	pair1 = ix86_safe_pent_pair (*e_ready);
  
--- 5389,5395 ----
  	 is important to which pipe an insn is issued to.  So we have to make
  	 some minor rearrangements.  */
        {
! 	enum attr_pent_pair pair1, pair2;
  
  	pair1 = ix86_safe_pent_pair (*e_ready);
  
*************** ix86_sched_reorder (dump, sched_verbose,
*** 5324,5335 ****
  	   insn to go with.  */
  	if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
  	  {
! 	    for (insnp = e_ready - 1; insnp >= ready; --insnp)
! 	      if ((tmp = ix86_safe_pent_pair (*insnp)) == PENT_PAIR_PU)
! 		{
! 		  pair2 = tmp;
! 		  break;
! 		}
  	  }
  
  	/* If the first insn is PU or UV pairable, search for a PV
--- 5402,5411 ----
  	   insn to go with.  */
  	if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
  	  {
! 	    insnp = ix86_pent_find_pair (e_ready-1, ready,
! 					 PENT_PAIR_PU, *e_ready);
! 	    if (insnp)
! 	      pair2 = PENT_PAIR_PU;
  	  }
  
  	/* If the first insn is PU or UV pairable, search for a PV
*************** ix86_sched_reorder (dump, sched_verbose,
*** 5337,5367 ****
  	if (pair2 == PENT_PAIR_NP
  	    && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
  	  {
! 	    for (insnp = e_ready - 1; insnp >= ready; --insnp)
! 	      if ((tmp = ix86_safe_pent_pair (*insnp)) == PENT_PAIR_PV)
! 		{
! 		  pair2 = tmp;
! 		  break;
! 		}
  	  }
  
  	/* If the first insn is pairable, search for a UV
  	   insn to go with.  */
  	if (pair2 == PENT_PAIR_NP)
  	  {
! 	    for (insnp = e_ready - 1; insnp >= ready; --insnp)
! 	      if ((tmp = ix86_safe_pent_pair (*insnp)) == PENT_PAIR_UV)
! 		{
! 		  pair2 = tmp;
! 		  break;
! 		}
  	  }
  
  	if (pair2 == PENT_PAIR_NP)
  	  return;
  
  	/* Found something!  Decide if we need to swap the order.  */
! 	if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU)
  	  ix86_reorder_insn (insnp, e_ready);
  	else
  	  ix86_reorder_insn (insnp, e_ready - 1);
--- 5413,5442 ----
  	if (pair2 == PENT_PAIR_NP
  	    && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
  	  {
! 	    insnp = ix86_pent_find_pair (e_ready-1, ready,
! 					 PENT_PAIR_PV, *e_ready);
! 	    if (insnp)
! 	      pair2 = PENT_PAIR_PV;
  	  }
  
  	/* If the first insn is pairable, search for a UV
  	   insn to go with.  */
  	if (pair2 == PENT_PAIR_NP)
  	  {
! 	    insnp = ix86_pent_find_pair (e_ready-1, ready,
! 					 PENT_PAIR_UV, *e_ready);
! 	    if (insnp)
! 	      pair2 = PENT_PAIR_UV;
  	  }
  
  	if (pair2 == PENT_PAIR_NP)
  	  return;
  
  	/* Found something!  Decide if we need to swap the order.  */
! 	if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
! 	    || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
! 		&& ix86_safe_memory (*e_ready) == MEMORY_BOTH
! 		&& && ix86_safe_memory (*insnp) == MEMORY_LOAD))
  	  ix86_reorder_insn (insnp, e_ready);
  	else
  	  ix86_reorder_insn (insnp, e_ready - 1);
Index: i386.md
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.78.2.5
diff -c -p -d -r1.78.2.5 i386.md
*** i386.md	1999/07/01 13:12:41	1.78.2.5
--- i386.md	1999/07/02 01:31:38
***************
*** 122,145 ****
  ;; `store' if there is a simple memory reference therein, or `unknown'
  ;; if the instruction is complex.
  
! (define_attr "memory" "none,load,store,unknown"
    (cond [(eq_attr "type" "other,multi")
  	   (const_string "unknown")
  	 (eq_attr "type" "lea,fcmov,fpspc")
  	   (const_string "none")
- 	 ;; These aren't really accurate, since these are the only
- 	 ;; simple instructions where it is possible to read and write
- 	 ;; to memory.  But as we are really interested not in the
- 	 ;; bus traffic but in the address generator, they are based
- 	 ;; on the operand.
  	 (eq_attr "type" "push")
  	   (if_then_else (match_operand 1 "memory_operand" "")
! 	     (const_string "load")
! 	     (const_string "none"))
  	 (eq_attr "type" "pop,setcc")
  	   (if_then_else (match_operand 0 "memory_operand" "")
! 	     (const_string "store")
! 	     (const_string "none"))
  	 (eq_attr "type" "icmp")
  	   (if_then_else (ior (match_operand 0 "memory_operand" "")
  			      (match_operand 1 "memory_operand" ""))
--- 122,140 ----
  ;; `store' if there is a simple memory reference therein, or `unknown'
  ;; if the instruction is complex.
  
! (define_attr "memory" "none,load,store,both,unknown"
    (cond [(eq_attr "type" "other,multi")
  	   (const_string "unknown")
  	 (eq_attr "type" "lea,fcmov,fpspc")
  	   (const_string "none")
  	 (eq_attr "type" "push")
  	   (if_then_else (match_operand 1 "memory_operand" "")
! 	     (const_string "both")
! 	     (const_string "store"))
  	 (eq_attr "type" "pop,setcc")
  	   (if_then_else (match_operand 0 "memory_operand" "")
! 	     (const_string "both")
! 	     (const_string "load"))
  	 (eq_attr "type" "icmp")
  	   (if_then_else (ior (match_operand 0 "memory_operand" "")
  			      (match_operand 1 "memory_operand" ""))
***************
*** 157,162 ****
--- 152,163 ----
  	   (if_then_else (match_operand 1 "constant_call_address_operand" "")
  	     (const_string "none")
  	     (const_string "load"))
+ 	 (and (eq_attr "type" "alu1")
+ 	      (match_operand 1 "memory_operand" ""))
+ 	   (const_string "both")
+ 	 (and (match_operand 0 "memory_operand" "")
+ 	      (match_operand 1 "memory_operand" ""))
+ 	   (const_string "both")
  	 (match_operand 0 "memory_operand" "")
  	   (const_string "store")
  	 (match_operand 1 "memory_operand" "")
***************
*** 170,175 ****
--- 171,192 ----
  	]
  	(const_string "none")))
  
+ ;; Indicates if an instruction has both an immediate and a displacement.
+ 
+ (define_attr "imm_disp" "false,true,unknown"
+   (cond [(eq_attr "type" "other,multi")
+ 	   (const_string "unknown")
+ 	 (and (eq_attr "type" "icmp,imov")
+ 	      (and (match_operand 0 "memory_displacement_operand" "")
+ 		   (match_operand 1 "immediate_operand" "")))
+ 	   (const_string "true")
+ 	 (and (eq_attr "type" "alu,ishift,imul,idiv")
+ 	      (and (match_operand 0 "memory_displacement_operand" "")
+ 		   (match_operand 2 "immediate_operand" "")))
+ 	   (const_string "true")
+ 	]
+ 	(const_string "false")))
+ 
  ;; Indicates if an FP operation has an integer source.
  
  (define_attr "fp_int_src" "false,true"
***************
*** 186,202 ****
  
  ;; Categorize how an instruction slots.
  
  (define_attr "pent_pair" "uv,pu,pv,np"
!   (cond [(eq_attr "type" "alu1,alu,imov,icmp,lea,incdec")
! 	   (const_string "uv")
  	 (eq_attr "type" "ibr")
  	   (const_string "pv")
  	 (and (eq_attr "type" "ishift")
  	      (match_operand 2 "const_int_operand" ""))
  	   (const_string "pu")
  	 (and (eq_attr "type" "pop,push")
! 	      (eq_attr "memory" "none"))
! 	   (const_string "uv")
  	 (and (eq_attr "type" "call")
  	      (match_operand 0 "constant_call_address_operand" ""))
  	   (const_string "pv")
--- 203,231 ----
  
  ;; Categorize how an instruction slots.
  
+ ;; The non-MMX Pentium slots an instruction with prefixes on U pipe only,
+ ;; while MMX Pentium can slot it on either U or V.  Model non-MMX Pentium
+ ;; rules, because it results in noticeably better code on non-MMX Pentium
+ ;; and doesn't hurt much on MMX.  (Prefixed instructions are not very
+ ;; common, so the scheduler usualy has a non-prefixed insn to pair).
+ 
  (define_attr "pent_pair" "uv,pu,pv,np"
!   (cond [(eq_attr "imm_disp" "true")
! 	   (const_string "np")
! 	 (eq_attr "type" "alu1,alu,imov,icmp,lea,incdec")
! 	   (if_then_else (eq_attr "length_prefix" "1")
! 	     (const_string "pu")
! 	     (const_string "uv"))
  	 (eq_attr "type" "ibr")
  	   (const_string "pv")
  	 (and (eq_attr "type" "ishift")
  	      (match_operand 2 "const_int_operand" ""))
  	   (const_string "pu")
  	 (and (eq_attr "type" "pop,push")
! 	      (eq_attr "memory" "!both"))
! 	   (if_then_else (eq_attr "length_prefix" "1")
! 	     (const_string "pu")
! 	     (const_string "uv"))
  	 (and (eq_attr "type" "call")
  	      (match_operand 0 "constant_call_address_operand" ""))
  	   (const_string "pv")
***************
*** 217,224 ****
--- 246,314 ----
  ;;
  ;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.
  
+ (define_function_unit "pent_np" 1 0
+   (and (eq_attr "cpu" "pentium")
+        (eq_attr "type" "imul"))
+   11 11)
+ 
+ (define_function_unit "pent_mul" 1 1
+   (and (eq_attr "cpu" "pentium")
+        (eq_attr "type" "imul"))
+   11 11)
+ 
+ ; ??? IDIV for SI takes 46 cycles, for HI 30, for QI 22
+ (define_function_unit "pent_np" 1 0
+   (and (eq_attr "cpu" "pentium")
+        (eq_attr "type" "idiv"))
+   46 46)
+ 
+ ; Read/Modify/Write instructions usually take 3 cycles.
+ (define_function_unit "pent_u" 1 0
+   (and (eq_attr "cpu" "pentium")
+        (and (eq_attr "type" "alu,alu1,ishift")
+ 	    (and (eq_attr "pent_pair" "pu")
+ 		 (eq_attr "memory" "both"))))
+   3 3)
+ 
+ (define_function_unit "pent_uv" 2 0
+   (and (eq_attr "cpu" "pentium")
+        (and (eq_attr "type" "alu,alu1,ishift")
+ 	    (and (eq_attr "pent_pair" "!np")
+ 		 (eq_attr "memory" "both"))))
+   3 3)
+ 
+ (define_function_unit "pent_np" 1 0
+   (and (eq_attr "cpu" "pentium")
+        (and (eq_attr "type" "alu,alu1,ishift")
+ 	    (and (eq_attr "pent_pair" "np")
+ 		 (eq_attr "memory" "both"))))
+   3 3)
+ 
+ ; Read/Modify or Modify/Write instructions usually take 2 cycles.
  (define_function_unit "pent_u" 1 0
    (and (eq_attr "cpu" "pentium")
+        (and (eq_attr "type" "alu,alu1,ishift")
+ 	    (and (eq_attr "pent_pair" "pu")
+ 		 (eq_attr "memory" "load,store"))))
+   2 2)
+ 
+ (define_function_unit "pent_uv" 2 0
+   (and (eq_attr "cpu" "pentium")
+        (and (eq_attr "type" "alu,alu1,ishift")
+ 	    (and (eq_attr "pent_pair" "!np")
+ 		 (eq_attr "memory" "load,store"))))
+   2 2)
+ 
+ (define_function_unit "pent_np" 1 0
+   (and (eq_attr "cpu" "pentium")
+        (and (eq_attr "type" "alu,alu1,ishift")
+ 	    (and (eq_attr "pent_pair" "np")
+ 		 (eq_attr "memory" "load,store"))))
+   2 2)
+ 
+ ; Insns w/o memory operands and move instructions usually take one cycle.
+ (define_function_unit "pent_u" 1 0
+   (and (eq_attr "cpu" "pentium")
         (eq_attr "pent_pair" "pu"))
    1 1)
  
***************
*** 234,294 ****
  
  (define_function_unit "pent_np" 1 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "imul"))
!   11 11)
  
  (define_function_unit "pent_np" 1 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "idiv"))
!   25 25)
  
  (define_function_unit "pent_np" 1 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "pent_pair" "np"))
!   1 1)
  
- ; Pairable insns only conflict with other non-pairable insns.
  (define_function_unit "pent_np" 1 0
    (and (eq_attr "cpu" "pentium")
         (eq_attr "pent_pair" "!np"))
    1 1
    [(eq_attr "pent_pair" "np")])
  
  (define_function_unit "fpu" 1 0
    (and (eq_attr "cpu" "pentium")
         (eq_attr "type" "fmov,fcmp,fxch"))
    1 1)
  
! ; Addition takes 4 cycles; assume other random cruft does as well.
  (define_function_unit "fpu" 1 0
    (and (eq_attr "cpu" "pentium")
         (eq_attr "type" "fop,fop1"))
!   4 1)
  
! ; Multiplication takes 4 cycles and is only half pipelined.
  (define_function_unit "fpu" 1 0
    (and (eq_attr "cpu" "pentium")
         (eq_attr "type" "fmul"))
!   4 2)
  
  (define_function_unit "fpu" 1 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "fdiv,fpspc"))
!   ; ??? just a guess
!   59 59)
  
! ; Model adds and mults not in the pipeline simultaneously.
! (define_function_unit "pent_fpm" 1 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "fop,fop1"))
!   4 4
!   [(eq_attr "type" "fmul")])
  
! (define_function_unit "pent_fpm" 1 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "fmul"))
!   4 4
!   [(eq_attr "type" "fop,fop1")])
  
  ;; Pentium Pro/PII Scheduling
  ;;
--- 324,412 ----
  
  (define_function_unit "pent_np" 1 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "pent_pair" "np"))
!   1 1)
  
+ ; Pairable insns only conflict with other non-pairable insns.
  (define_function_unit "pent_np" 1 0
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "type" "alu,alu1,ishift")
! 	    (and (eq_attr "pent_pair" "!np")
! 		 (eq_attr "memory" "both"))))
!   3 3
!   [(eq_attr "pent_pair" "np")])
  
  (define_function_unit "pent_np" 1 0
    (and (eq_attr "cpu" "pentium")
!        (and (eq_attr "type" "alu,alu1,ishift")
! 	    (and (eq_attr "pent_pair" "!np")
! 		 (eq_attr "memory" "load,store"))))
!   2 2
!   [(eq_attr "pent_pair" "np")])
  
  (define_function_unit "pent_np" 1 0
    (and (eq_attr "cpu" "pentium")
         (eq_attr "pent_pair" "!np"))
    1 1
    [(eq_attr "pent_pair" "np")])
  
+ ; Floating point instructions usually blocks cycle longer when combined with
+ ; integer instructions, because of the inpaired fxch instruction.
+ (define_function_unit "pent_np" 1 0
+   (and (eq_attr "cpu" "pentium")
+        (eq_attr "type" "fmov,fop,fop1,fmul,fpspc,fcmov,fcmp"))
+   2 2
+   [(eq_attr "type" "!fmov,fop,fop1,fmul,fpspc,fcmov,fcmp")])
+ 
+ ; ??? This is correct only for reg to reg moves.  Store insns take
+ ; 3 cycles for XFmode, 2 cycles otherwise, and require the operand to
+ ; be ready one cycle earlier.  An XFmode load insn takes 3 cycles and
+ ; is not pairable.  These should be fixed, perhaps in adjust_cost.
  (define_function_unit "fpu" 1 0
    (and (eq_attr "cpu" "pentium")
         (eq_attr "type" "fmov,fcmp,fxch"))
    1 1)
  
! ; Addition takes 3 cycles; assume other random cruft does as well.
! ; ??? Trivial fp operations such as fabs or fchs takes only one cycle.
  (define_function_unit "fpu" 1 0
    (and (eq_attr "cpu" "pentium")
         (eq_attr "type" "fop,fop1"))
!   3 1)
  
! ; Multiplication takes 3 cycles and is only half pipelined.
  (define_function_unit "fpu" 1 0
    (and (eq_attr "cpu" "pentium")
         (eq_attr "type" "fmul"))
!   3 1)
  
+ (define_function_unit "pent_mul" 1 1
+   (and (eq_attr "cpu" "pentium")
+        (eq_attr "type" "fmul"))
+   2 2)
+ 
+ ; ??? This is correct only for fdiv and sqrt -- sin/cos take 65-100 cycles. 
+ ; They can overlap with integer insns.  Only the last two cycles can overlap
+ ; with other fp insns.  None can overlap with multiplies.
  (define_function_unit "fpu" 1 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "fdiv"))
!   39 37)
  
! (define_function_unit "pent_mul" 1 1
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "fdiv"))
!   39 39)
  
! (define_function_unit "fpu" 1 0
    (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "fpspc"))
!   70 68)
! 
! (define_function_unit "pent_mul" 1 1
!   (and (eq_attr "cpu" "pentium")
!        (eq_attr "type" "fpspc"))
!   70 70)
  
  ;; Pentium Pro/PII Scheduling
  ;;
***************
*** 311,317 ****
  	 (eq_attr "type" "icmov,fcmov")
  	   (const_string "few")
  	 (eq_attr "type" "imov")
! 	   (if_then_else (eq_attr "memory" "store")
  	     (const_string "few")
  	     (const_string "one"))
  	 (eq_attr "memory" "!none")
--- 429,435 ----
  	 (eq_attr "type" "icmov,fcmov")
  	   (const_string "few")
  	 (eq_attr "type" "imov")
! 	   (if_then_else (eq_attr "memory" "store,both")
  	     (const_string "few")
  	     (const_string "one"))
  	 (eq_attr "memory" "!none")
***************
*** 392,404 ****
  (define_function_unit "ppro_p2" 1 0
    (and (eq_attr "cpu" "pentiumpro")
         (ior (eq_attr "type" "pop")
! 	    (eq_attr "memory" "load")))
    3 1)
  
  (define_function_unit "ppro_p34" 1 0
    (and (eq_attr "cpu" "pentiumpro")
         (ior (eq_attr "type" "push")
! 	    (eq_attr "memory" "store")))
    1 1)
  
  (define_function_unit "fpu" 1 0
--- 510,522 ----
  (define_function_unit "ppro_p2" 1 0
    (and (eq_attr "cpu" "pentiumpro")
         (ior (eq_attr "type" "pop")
! 	    (eq_attr "memory" "load,both")))
    3 1)
  
  (define_function_unit "ppro_p34" 1 0
    (and (eq_attr "cpu" "pentiumpro")
         (ior (eq_attr "type" "push")
! 	    (eq_attr "memory" "store,both")))
    1 1)
  
  (define_function_unit "fpu" 1 0
***************
*** 7226,7231 ****
--- 7344,7350 ----
    ; fill in all the blanks.
    [(set_attr "type" "alu")
     (set_attr "memory" "none")
+    (set_attr "imm_disp" "false")
     (set_attr "length" "2")])
  
  (define_insn ""

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]