This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Re: more AGI stall detection improvements...



Hi
So here is updated patch. I've took care to handle ANTI and OUTPUT
dependencies correctly (according to the link value).
I've also cleaned up the ESP bypass detection code and added support
to detect implicit changes+bypass done by CALL and RETURN instructions.

Somehow removing that strange if and handling the link notes improved
scheduling noticeably (30% in some tests maybe 3% overall). I don't know
exactly why, but something was probably rotten in the if.
I also had to move if recognizing JUMP/CALL insns to non-pentium code,
because I am handling AGI stalls caused by this insns now.

Note that adjust_cost seems to be absolutely wrong for PPro/K6.
I will try to make K6 version, but I don't have any access to test PPro.

It produces noticeably better code in recursive function tests.
(the prologues/epilogues are still much slower (about twice), but
they will probably improve by John's patch for leaf function
and by moving the aligning after the pushes as suggested by Richard)

Honza

Thu Apr 15 08:29:09 CEST 1999 Jan Hubicka
	* i386.md (add?i3 and ashl?i3 patterns): Set type to lea when
	outputing lea instruction.
	* i386.c (have_esp_bypass): New function.
	(agi_dependent): More support for ESP conflict bypassing hardware.
	(reg_mentioned_in_mem): Recognize register mentioned in LEA parameter.
	(x86_adjust_cost): Don't handle non-conflicting moves separately,
	handle REG_DEP_ANTI and REG_DEP_OUTPUT notes,
	return -1 for esp bypassed instruction pairs.

*** i386.md.old	Thu Apr 15 07:12:47 1999
--- i386.md	Thu Apr 15 07:15:59 1999
***************
*** 3377,3383 ****
  
    return AS2 (add%L0,%2,%0);
  }"
!   [(set_attr "type" "binary")])
  
  ;; addsi3 is faster, so put this after.
  
--- 3377,3383 ----
  
    return AS2 (add%L0,%2,%0);
  }"
!   [(set_attr "type" "binary,binary,lea")])
  
  ;; addsi3 is faster, so put this after.
  
***************
*** 3496,3502 ****
  
    return AS2 (add%W0,%2,%0);
  }"
!   [(set_attr "type" "binary")])
  
  (define_expand "addqi3"
    [(set (match_operand:QI 0 "general_operand" "")
--- 3496,3502 ----
  
    return AS2 (add%W0,%2,%0);
  }"
!   [(set_attr "type" "binary,binary,lea")])
  
  (define_expand "addqi3"
    [(set (match_operand:QI 0 "general_operand" "")
***************
*** 3541,3547 ****
  
    return AS2 (add%B0,%2,%0);
  }"
!   [(set_attr "type" "binary")])
  
  ;Lennart Augustsson <augustss@cs.chalmers.se>
  ;says this pattern just makes slower code:
--- 3541,3547 ----
  
    return AS2 (add%B0,%2,%0);
  }"
!   [(set_attr "type" "binary,binary,lea")])
  
  ;Lennart Augustsson <augustss@cs.chalmers.se>
  ;says this pattern just makes slower code:
*************** byte_xor_operation:
*** 5142,5148 ****
  	(ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,r")
  		   (match_operand:SI 2 "small_shift_operand" "M,M")))]
    "! optimize_size"
!   "* return output_ashl (insn, operands);")
  
  ;; Generic left shift pattern to catch all cases not handled by the
  ;; shift pattern above.
--- 5142,5149 ----
  	(ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,r")
  		   (match_operand:SI 2 "small_shift_operand" "M,M")))]
    "! optimize_size"
!   "* return output_ashl (insn, operands);"
!   [(set_attr "type" "*,lea")])
  
  ;; Generic left shift pattern to catch all cases not handled by the
  ;; shift pattern above.
*************** byte_xor_operation:
*** 5158,5164 ****
  	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,r")
  		   (match_operand:HI 2 "small_shift_operand" "M,M")))]
    "! optimize_size"
!   "* return output_ashl (insn, operands);")
  
  (define_insn ""
    [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
--- 5159,5166 ----
  	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,r")
  		   (match_operand:HI 2 "small_shift_operand" "M,M")))]
    "! optimize_size"
!   "* return output_ashl (insn, operands);"
!   [(set_attr "type" "*,lea")])
  
  (define_insn ""
    [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
*************** byte_xor_operation:
*** 5172,5178 ****
  	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,q")
  		   (match_operand:QI 2 "small_shift_operand" "M,M")))]
    "! optimize_size"
!   "* return output_ashl (insn, operands);")
  
  ;; Generic left shift pattern to catch all cases not handled by the
  ;; shift pattern above.
--- 5174,5181 ----
  	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,q")
  		   (match_operand:QI 2 "small_shift_operand" "M,M")))]
    "! optimize_size"
!   "* return output_ashl (insn, operands);"
!   [(set_attr "type" "*,lea")])
  
  ;; Generic left shift pattern to catch all cases not handled by the
  ;; shift pattern above.
*** i386.c.old3	Thu Apr 15 05:36:17 1999
--- i386.c	Fri Apr 16 00:02:55 1999
*************** is_fp_store (insn)
*** 5088,5093 ****
--- 5088,5127 ----
  
    return 0;
  }
+ 
+ /* Return 1 for push insn patterns.  */
+ static int
+ have_esp_bypass (insn)
+      rtx insn;
+ {
+   /* Push insn.  */
+   if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SET
+       && GET_CODE (SET_DEST (PATTERN (insn))) == MEM
+       && push_operand (SET_DEST (PATTERN (insn)),
+ 		       GET_MODE (SET_DEST (PATTERN (insn))))
+       && !STACK_REG_P (SET_SRC (PATTERN (insn))))
+     return 1;
+ 
+   /* Pop insn is parallel storing value in first expression and changing
+      stack pointer in second.  */
+ 
+   if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == PARALLEL &&
+       XVECLEN (PATTERN (insn), 0) == 2)
+     {
+       rtx set = XVECEXP (PATTERN (insn), 0, 1);
+       if (GET_CODE (set) == SET && SET_DEST (set) == stack_pointer_rtx)
+ 	return 1;
+     }
+ 
+   /* Call insn.  */
+   if (GET_CODE (insn) == CALL_INSN)
+     return 1;
+ 
+   /* Ret.  */
+   if (GET_CODE (insn) == JUMP_INSN && GET_CODE (PATTERN (insn)) == RETURN)
+     return 1;
+   return 0;
+ }
  
  /* Return 1 if DEP_INSN sets a register which INSN uses as a base
     or index to reference memory.
*************** int
*** 5097,5131 ****
  agi_dependent (insn, dep_insn)
       rtx insn, dep_insn;
  {
!   int push = 0, push_dep = 0;
    if (GET_CODE (dep_insn) == INSN
        && GET_CODE (PATTERN (dep_insn)) == SET
        && GET_CODE (SET_DEST (PATTERN (dep_insn))) == REG
        && reg_mentioned_in_mem (SET_DEST (PATTERN (dep_insn)), insn))
      return 1;
  
!   if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SET
!       && GET_CODE (SET_DEST (PATTERN (insn))) == MEM
!       && push_operand (SET_DEST (PATTERN (insn)),
!                        GET_MODE (SET_DEST (PATTERN (insn)))))
!     push = 1;
! 
!   if (GET_CODE (dep_insn) == INSN && GET_CODE (PATTERN (dep_insn)) == SET
!       && GET_CODE (SET_DEST (PATTERN (dep_insn))) == MEM
!       && push_operand (SET_DEST (PATTERN (dep_insn)),
!                        GET_MODE (SET_DEST (PATTERN (dep_insn)))))
!     push_dep = 1;
  
!   /* CPUs contain special hardware to allow two pushes.  */
!   if (push && push_dep) 
      return 0;
  
!   /* Push operation implicitly change stack pointer causing AGI stalls.  */
!   if (push_dep && reg_mentioned_in_mem (stack_pointer_rtx, insn))
      return 1;
  
!   /* Push also implicitly read stack pointer.  */
!   if (push && modified_in_p (stack_pointer_rtx, dep_insn))
      return 1;
  
    return 0;
--- 5131,5159 ----
  agi_dependent (insn, dep_insn)
       rtx insn, dep_insn;
  {
!   int bypassed_esp = 0, bypassed_esp_dep = 0;
    if (GET_CODE (dep_insn) == INSN
        && GET_CODE (PATTERN (dep_insn)) == SET
        && GET_CODE (SET_DEST (PATTERN (dep_insn))) == REG
        && reg_mentioned_in_mem (SET_DEST (PATTERN (dep_insn)), insn))
      return 1;
  
!   bypassed_esp = have_esp_bypass (insn);
!   bypassed_esp_dep = have_esp_bypass (dep_insn);
  
!   /* CPUs contain special hardware to allow two bypassed instructions.  */
!   if ((bypassed_esp && bypassed_esp_dep))
      return 0;
  
!   /* ??? Some docs says that AGI can happen here, other says it can't.  
!      I am getting better code with this enabled.  */
!   /* Push pop and ret operations implicitly change stack pointer causing
!      AGI stalls.  */
!   if (bypassed_esp_dep && reg_mentioned_in_mem (stack_pointer_rtx, insn))
      return 1;
  
!   /* Push pop and call also implicitly read stack pointer.  */
!   if (bypassed_esp && modified_in_p (stack_pointer_rtx, dep_insn))
      return 1;
  
    return 0;
*************** reg_mentioned_in_mem (reg, rtl)
*** 5163,5168 ****
--- 5191,5213 ----
        break;
      }
  
+   /* The lea have register mentined in memory parameter even in cases
+      where it don't appears to be in RTL representation.  We detect this
+      instruction by using insn attribute TYPE and expect that the pattern is
+      single set where all registers in source will be used as memory parameter.
+      It is true about all lea insn patterns in md file.  But we will have
+      to take care and keep this in the sync.  */
+ 
+   if (reload_completed && code == INSN
+       && get_attr_type (rtl) == TYPE_LEA)
+     {
+       /* Simple check to avoud desynchronization with MD file.  */
+       if (GET_CODE (PATTERN (rtl)) != SET) 
+ 	abort();
+       if (reg_mentioned_p (reg, SET_SRC (PATTERN (rtl))))
+ 	return 1;
+     }
+ 
    if (code == MEM && reg_mentioned_p (reg, rtl))
      return 1;
  
*************** x86_adjust_cost (insn, link, dep_insn, c
*** 5463,5481 ****
  {
    rtx next_inst;
  
-   if (GET_CODE (dep_insn) == CALL_INSN || GET_CODE (insn) == JUMP_INSN)
-     return 0;
- 
-   if (GET_CODE (dep_insn) == INSN
-       && GET_CODE (PATTERN (dep_insn)) == SET
-       && GET_CODE (SET_DEST (PATTERN (dep_insn))) == REG
-       && GET_CODE (insn) == INSN
-       && GET_CODE (PATTERN (insn)) == SET
-       && !reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
- 				   SET_SRC (PATTERN (insn))))
-     return 0;	/* ??? */
- 
- 
    switch (ix86_cpu)
      {
      case PROCESSOR_PENTIUM:
--- 5508,5513 ----
*************** x86_adjust_cost (insn, link, dep_insn, c
*** 5483,5488 ****
--- 5515,5524 ----
  	  && !is_fp_dest (dep_insn))
  	return 0;
  
+       /* Two instruction with special bypass are pairable.  */
+       if (have_esp_bypass (insn) && have_esp_bypass (dep_insn))
+ 	return -1;
+ 
        if (agi_dependent (insn, dep_insn))
  	return cost ? cost + 1 : 2;
  
*************** x86_adjust_cost (insn, link, dep_insn, c
*** 5493,5506 ****
  	  && GET_CODE (next_inst) == JUMP_INSN)
  	/* compare probably paired with jump */
  	return 0;
  
!       /* Stores stalls one cycle longer than other insns.  */
!       if (is_fp_insn (insn) && cost && is_fp_store (dep_insn))
  	cost++;
-       break;
  
      case PROCESSOR_K6:
      default:
        if (!is_fp_dest (dep_insn))
  	{
  	  if(!agi_dependent (insn, dep_insn))
--- 5529,5556 ----
  	  && GET_CODE (next_inst) == JUMP_INSN)
  	/* compare probably paired with jump */
  	return 0;
  
!       /* Anti dependencies are pairable.  */
!       if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
!     	return -1; 
! 
!       /* Integer Output dependency disables pairing, but don't
!          affect anything else. */
!       if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
! 	  && !is_fp_insn (insn) && !is_fp_insn (dep_insn))
! 	return 1;
! 
!       /* FP stores stalls one cycle longer than other insns.  */
!       if (is_fp_insn (insn) && is_fp_store (dep_insn)
! 	  && !REG_NOTE_KIND (link))
  	cost++;
+       break;
  
      case PROCESSOR_K6:
      default:
+       if (GET_CODE (dep_insn) == CALL_INSN || GET_CODE (insn) == JUMP_INSN)
+ 	return 0;
+ 
        if (!is_fp_dest (dep_insn))
  	{
  	  if(!agi_dependent (insn, dep_insn))
*************** x86_adjust_cost (insn, link, dep_insn, c
*** 5508,5513 ****
--- 5558,5567 ----
  	  if (TARGET_486)
  	    return 2;
  	}
+       /* Output and Anti dependencies have no cost.  */
+       else 
+ 	if (REG_NOTE_KIND (link))
+ 	  return 0;
        else
  	if (is_fp_store (insn) && is_fp_insn (dep_insn)
  	    && NEXT_INSN (insn) && NEXT_INSN (NEXT_INSN (insn))


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]