i386 ESP adjustments opt.

Jan Hubicka hubicka@atrey.karlin.mff.cuni.cz
Fri Apr 7 03:28:00 GMT 2000


> 
> > + ;; in order to allow combine_stack_adjustments pass to see nonobfstructated
> 								^^^
> Here too.
Well, I am not sure what is the correct spelling.
> 
> Um, this seems distinctly non-safe.  You've lost the tie between
> esp and ebp.  Were you intending a blockage as you used for the
> two pop cases?

Here is the solution with new push and pop patterns. I am just running
testsuite and it seems to work well.


Fri Apr  7 12:23:04 MET DST 2000  Jan Hubicka  <jh@suse.cz>

	* i386.c (x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8):
	New global variables.
	(ix86_emit_epilogue_adjustment): Do not attempt to use pop for the
	adjustment.
	* i386.h (x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8):
	Declare,
	(TARGET_SUB_ESP_4, TARGET_SUB_ESP_8, TARGET_ADD_ESP_4,
	 TARGET_ADD_ESP_8): New macros.
	* i386.md: Add peep2s to convert esp adjustments to push and pop
	instructions.
	(pushsi_prologue, popsi_epilogue): New patterns.

Index: egcs/gcc/config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.151
diff -c -3 -p -r1.151 i386.c
*** i386.c	2000/03/30 13:46:05	1.151
--- i386.c	2000/04/07 10:21:39
*************** const int x86_qimode_math = ~(0);
*** 218,223 ****
--- 218,227 ----
  const int x86_promote_qi_regs = 0;
  const int x86_himode_math = ~(m_PPRO);
  const int x86_promote_hi_regs = m_PPRO;
+ const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
+ const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
+ const int x86_add_esp_4 = m_ATHLON | m_K6;
+ const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
  
  #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
  
*************** static void
*** 1923,1973 ****
  ix86_emit_epilogue_esp_adjustment (tsize)
       int tsize;
  {
!   /* Intel's docs say that for 4 or 8 bytes of stack frame one should
!      use `pop' and not `add'.  */
!   int use_pop = tsize == 4;
!   rtx edx = 0, ecx;
! 
!   /* Use two pops only for the Pentium processors.  */
!   if (tsize == 8 && !TARGET_386 && !TARGET_486)
!     {
!       rtx retval = current_function_return_rtx;
! 
!       edx = gen_rtx_REG (SImode, 1);
! 
!       /* This case is a bit more complex.  Since we cannot pop into
!          %ecx twice we need a second register.  But this is only
!          available if the return value is not of DImode in which
!          case the %edx register is not available.  */
!       use_pop = (retval == NULL
! 		 || !reg_overlap_mentioned_p (edx, retval));
!     }
! 
!   if (use_pop)
!     {
!       ecx = gen_rtx_REG (SImode, 2);
! 
!       /* We have to prevent the two pops here from being scheduled.
!          GCC otherwise would try in some situation to put other
!          instructions in between them which has a bad effect.  */
!       emit_insn (gen_blockage ());
!       emit_insn (gen_popsi1 (ecx));
!       if (tsize == 8)
! 	emit_insn (gen_popsi1 (edx));
!     }
    else
!     {
!       /* If a frame pointer is present, we must be sure to tie the sp
! 	 to the fp so that we don't mis-schedule.  */
!       if (frame_pointer_needed)
!         emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
! 						  stack_pointer_rtx,
! 						  GEN_INT (tsize),
! 						  hard_frame_pointer_rtx));
!       else
!         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
! 			       GEN_INT (tsize)));
!     }
  }
  
  /* Emit code to restore saved registers using MOV insns.  First register
--- 1927,1942 ----
  ix86_emit_epilogue_esp_adjustment (tsize)
       int tsize;
  {
!   /* If a frame pointer is present, we must be sure to tie the sp
!      to the fp so that we don't mis-schedule.  */
!   if (frame_pointer_needed)
!     emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
! 					      stack_pointer_rtx,
! 					      GEN_INT (tsize),
! 					      hard_frame_pointer_rtx));
    else
!     emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
! 			   GEN_INT (tsize)));
  }
  
  /* Emit code to restore saved registers using MOV insns.  First register
Index: egcs/gcc/config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.h,v
retrieving revision 1.109
diff -c -3 -p -r1.109 i386.h
*** i386.h	2000/04/03 23:25:06	1.109
--- i386.h	2000/04/07 10:21:40
*************** extern const int x86_read_modify, x86_sp
*** 174,179 ****
--- 174,180 ----
  extern const int x86_promote_QImode, x86_single_stringop;
  extern const int x86_himode_math, x86_qimode_math, x86_promote_qi_regs;
  extern const int x86_promote_hi_regs;
+ extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8;
  
  #define TARGET_USE_LEAVE (x86_use_leave & CPUMASK)
  #define TARGET_PUSH_MEMORY (x86_push_memory & CPUMASK)
*************** extern const int x86_promote_hi_regs;
*** 201,206 ****
--- 202,211 ----
  #define TARGET_HIMODE_MATH (x86_himode_math & CPUMASK)
  #define TARGET_PROMOTE_QI_REGS (x86_promote_qi_regs & CPUMASK)
  #define TARGET_PROMOTE_HI_REGS (x86_promote_hi_regs & CPUMASK)
+ #define TARGET_ADD_ESP_4 (x86_add_esp_4 & CPUMASK)
+ #define TARGET_ADD_ESP_8 (x86_add_esp_8 & CPUMASK)
+ #define TARGET_SUB_ESP_4 (x86_sub_esp_4 & CPUMASK)
+ #define TARGET_SUB_ESP_8 (x86_sub_esp_8 & CPUMASK)
  
  #define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
  
Index: egcs/gcc/config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.148
diff -c -3 -p -r1.148 i386.md
*** i386.md	2000/04/03 23:25:07	1.148
--- i386.md	2000/04/07 10:21:45
***************
*** 1307,1312 ****
--- 1307,1330 ----
    "push{l}\\t%1"
    [(set_attr "type" "push")])
  
+ (define_insn "*pushsi2_prologue"
+   [(set (match_operand:SI 0 "push_operand" "=<")
+ 	(match_operand:SI 1 "general_no_elim_operand" "ri*m"))
+    (set (reg:SI 6) (reg:SI 6))]
+   ""
+   "push{l}\\t%1"
+   [(set_attr "type" "push")])
+ 
+ (define_insn "*popsi1_epilogue"
+   [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m")
+ 	(mem:SI (reg:SI 7)))
+    (set (reg:SI 7)
+ 	(plus:SI (reg:SI 7) (const_int 4)))
+    (set (reg:SI 6) (reg:SI 6))]
+   ""
+   "pop{l}\\t%0"
+   [(set_attr "type" "pop")])
+ 
  (define_insn "popsi1"
    [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m")
  	(mem:SI (reg:SI 7)))
***************
*** 9411,9416 ****
--- 9429,9568 ----
    [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))
  	      (clobber (reg:CC 17))])]
    "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
+ 
+ ;; The ESP adjustments can be done by the push and pop instructions.  Resulting
+ ;; code is shorter, since push is only 1 byte, while add imm, %esp 3 bytes.  On
+ ;; many CPUs it is also faster, since special hardware to avoid esp
+ ;; dependancies is present.
+ 
+ ;; While some of these converisons may be done using splitters, we use peepholes
+ ;; in order to allow combine_stack_adjustments pass to see nonobfstructated RTL.
+ 
+ ;; Convert prologue esp substractions to push.
+ ;; We need register to push.  In order to keep verify_flow_info happy we have
+ ;; two choices
+ ;; - use scratch and clobber it in order to avoid dependencies
+ ;; - use already live register
+ ;; We can't use the second way right now, since there is no reliable way how to
+ ;; verify that given register is live.  First choice will also most likely in
+ ;; fewer dependencies.  On the place of esp adjustments it is very likely that
+ ;; call clobbered registers are dead.  We may want to use base pointer as an
+ ;; alternative when no register is available later.
+ 
+ (define_peephole2
+   [(match_scratch:SI 0 "r")
+    (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -4)))
+ 	      (set (reg:SI 6) (reg:SI 6))
+ 	      (clobber (reg:CC 17))])]
+   "optimize_size || !TARGET_SUB_ESP_4"
+   [(clobber (match_dup 0))
+    (parallel [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))
+ 	      (set (reg:SI 6) (reg:SI 6))])])
+ 
+ (define_peephole2
+   [(match_scratch:SI 0 "r")
+    (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
+ 	      (set (reg:SI 6) (reg:SI 6))
+ 	      (clobber (reg:CC 17))])]
+   "optimize_size || !TARGET_SUB_ESP_8"
+   [(clobber (match_dup 0))
+    (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))
+    (parallel [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))
+ 	      (set (reg:SI 6) (reg:SI 6))])])
+ 
+ ;; Convert esp substractions to push.
+ (define_peephole2
+   [(match_scratch:SI 0 "r")
+    (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -4)))
+ 	      (clobber (reg:CC 17))])]
+   "optimize_size || !TARGET_SUB_ESP_4"
+   [(clobber (match_dup 0))
+    (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))])
+ 
+ (define_peephole2
+   [(match_scratch:SI 0 "r")
+    (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
+ 	      (clobber (reg:CC 17))])]
+   "optimize_size || !TARGET_SUB_ESP_8"
+   [(clobber (match_dup 0))
+    (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))
+    (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))])
+ 
+ ;; Convert epilogue deallocator to pop.
+ (define_peephole2
+   [(match_scratch:SI 0 "r")
+    (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
+ 	      (set (reg:SI 6) (reg:SI 6))
+ 	      (clobber (reg:CC 17))])]
+   "optimize_size || !TARGET_ADD_ESP_4"
+   [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+ 	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
+ 	      (set (reg:SI 6) (reg:SI 6))])]
+   "")
+ 
+ ;; Two pops case is tricky, since pop causes dependency on destination register.
+ ;; We use two registers if available.
+ (define_peephole2
+   [(match_scratch:SI 0 "r")
+    (match_scratch:SI 1 "r")
+    (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8)))
+ 	      (set (reg:SI 6) (reg:SI 6))
+ 	      (clobber (reg:CC 17))])]
+   "optimize_size || !TARGET_ADD_ESP_8"
+   [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+ 	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
+ 	      (set (reg:SI 6) (reg:SI 6))])
+    (parallel [(set (match_dup 1) (mem:SI (reg:SI 7)))
+ 	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
+   "")
+ 
+ (define_peephole2
+   [(match_scratch:SI 0 "r")
+    (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8)))
+ 	      (set (reg:SI 6) (reg:SI 6))
+ 	      (clobber (reg:CC 17))])]
+   "optimize_size"
+   [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+ 	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
+ 	      (set (reg:SI 6) (reg:SI 6))])
+    (parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+ 	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
+   "gen_blockage();")
+ 
+ ;; Convert esp additions to pop.
+ (define_peephole2
+   [(match_scratch:SI 0 "r")
+    (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
+ 	      (clobber (reg:CC 17))])]
+   ""
+   [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+ 	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
+   "")
+ 
+ ;; Two pops case is tricky, since pop causes dependency on destination register.
+ ;; We use two registers if available.
+ (define_peephole2
+   [(match_scratch:SI 0 "r")
+    (match_scratch:SI 1 "r")
+    (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8)))
+ 	      (clobber (reg:CC 17))])]
+   ""
+   [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+ 	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])
+    (parallel [(set (match_dup 1) (mem:SI (reg:SI 7)))
+ 	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
+   "")
+ 
+ (define_peephole2
+   [(match_scratch:SI 0 "r")
+    (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8)))
+ 	      (clobber (reg:CC 17))])]
+   "optimize_size"
+   [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+ 	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])
+    (parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+ 	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
+   "")
  
  ;; Call-value patterns last so that the wildcard operand does not
  ;; disrupt insn-recog's switch tables.


More information about the Gcc-patches mailing list