i386 epilogue code tweek

Richard Henderson rth@cygnus.com
Thu Feb 3 14:52:00 GMT 2000


On Thu, Feb 03, 2000 at 07:13:33PM +0100, Jan Hubicka wrote:
> I've changed epilogue expander in case only one register is popped. Now the
> register is popped by move insn and then esp is restored by movl %ebp, %esp.
> This is shorter than previous code involving lea instructions.
> This allows to use lea for all functions that save maximally one register.
> Now switching to SAVE_REGS_FIRST mode seems to be win overall even on the
> K6.
> 
> The patch also attempts to make epilogue expander easier to understand.

Hrm.. I don't think it did a very good job of making the expander
easier to understand.  Also, I think the lea used to remove the
frame needs kind of special dependancy help.

What do you think of this version?



r~

	* i386.md (epilogue_deallocate_stack): Include an offset; emit
	lea for a nonzero offset.
 	* i386.c (SAVE_REGS_FIRST): Remove.
 	(ix86_initial_elimination_offset): Handle only SAVE_REGS_FIRST mode.
 	(ix86_compute_frame_size): Likewise.
 	(ix86_expand_epilogue): Likewise; use mov instead of pop to restore
 	a register when profitable, emit leave when profitable.
	(ix86_emit_restore_regs): Remove.
	(ix86_attr_length_default): Handle epilogue_deallocate_stack
	as a TYPE_LEA insn.

Index: i386.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.132
diff -c -p -d -r1.132 i386.c
*** i386.c	2000/02/02 07:50:20	1.132
--- i386.c	2000/02/03 22:42:10
*************** Boston, MA 02111-1307, USA. */
*** 41,56 ****
  #include "basic-block.h"
  #include "ggc.h"
  
- /* True when we want to do pushes before allocating stack to get better
-    scheduling.
- 
-    Saving registers first is win in the most cases except for LEAVE
-    instruction.  Macro is 0 iff we will use LEAVE.  */
- 
- #define SAVED_REGS_FIRST \
-   (!frame_pointer_needed || (!TARGET_USE_LEAVE && !optimize_size))
- 
- 
  #ifdef EXTRA_CONSTRAINT
  /* If EXTRA_CONSTRAINT is defined, then the 'S'
     constraint in REG_CLASS_FROM_LETTER will no longer work, and various
--- 41,46 ----
*************** static HOST_WIDE_INT ix86_compute_frame_
*** 411,417 ****
  						     int *, int *, int *));
  static int ix86_nsaved_regs PARAMS((void));
  static void ix86_emit_save_regs PARAMS((void));
- static void ix86_emit_restore_regs PARAMS((void));
  static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
  
  struct ix86_address
--- 401,406 ----
*************** ix86_initial_elimination_offset (from, t
*** 1705,1720 ****
  
       saved frame pointer if frame_pointer_needed
  						<- HARD_FRAME_POINTER
!      [saved regs if SAVED_REGS_FIRST]
  
       [padding1]   \
  		   |				<- FRAME_POINTER
       [frame]	   > tsize
  		   |
       [padding2]   /
- 
-      [saved regs if !SAVED_REGS_FIRST]
-      						<- STACK_POINTER
      */
  
    if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
--- 1694,1706 ----
  
       saved frame pointer if frame_pointer_needed
  						<- HARD_FRAME_POINTER
!      [saved regs]
  
       [padding1]   \
  		   |				<- FRAME_POINTER
       [frame]	   > tsize
  		   |
       [padding2]   /
      */
  
    if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
*************** ix86_initial_elimination_offset (from, t
*** 1725,1732 ****
  	   && to == HARD_FRAME_POINTER_REGNUM)
      {
        ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *)0);
!       if (SAVED_REGS_FIRST)
! 	padding1 += nregs * UNITS_PER_WORD;
        return -padding1;
      }
    else
--- 1711,1717 ----
  	   && to == HARD_FRAME_POINTER_REGNUM)
      {
        ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *)0);
!       padding1 += nregs * UNITS_PER_WORD;
        return -padding1;
      }
    else
*************** ix86_initial_elimination_offset (from, t
*** 1743,1752 ****
  	return tsize + nregs * UNITS_PER_WORD + frame_size;
        else if (from != FRAME_POINTER_REGNUM)
  	abort ();
-       else if (SAVED_REGS_FIRST)
- 	return tsize - padding1;
        else
! 	return tsize + nregs * UNITS_PER_WORD - padding1;
      }
  }
  
--- 1728,1735 ----
  	return tsize + nregs * UNITS_PER_WORD + frame_size;
        else if (from != FRAME_POINTER_REGNUM)
  	abort ();
        else
! 	return tsize - padding1;
      }
  }
  
*************** ix86_compute_frame_size (size, nregs_on_
*** 1788,1800 ****
      if (stack_alignment_needed < 4)
        stack_alignment_needed = 4;
  
!     if (stack_alignment_needed > preferred_alignment)
        abort ();
  
!     if (SAVED_REGS_FIRST)
!       offset += nregs * UNITS_PER_WORD;
!     else
!       total_size += nregs * UNITS_PER_WORD;
  
      total_size += offset;
  
--- 1771,1780 ----
      if (stack_alignment_needed < 4)
        stack_alignment_needed = 4;
  
!     if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
        abort ();
  
!     offset += nregs * UNITS_PER_WORD;
  
      total_size += offset;
  
*************** ix86_compute_frame_size (size, nregs_on_
*** 1807,1815 ****
        }
  
      /* Align stack boundary. */
!     if (!current_function_is_leaf)
!       padding2 = ((total_size + preferred_alignment - 1)
! 		  & -preferred_alignment) - total_size;
    }
  #endif
  
--- 1787,1794 ----
        }
  
      /* Align stack boundary. */
!     padding2 = ((total_size + preferred_alignment - 1)
! 		& -preferred_alignment) - total_size;
    }
  #endif
  
*************** ix86_expand_prologue ()
*** 1868,1875 ****
        RTX_FRAME_RELATED_P (insn) = 1;
      }
  
!   if (SAVED_REGS_FIRST)
!     ix86_emit_save_regs ();
  
    if (tsize == 0)
      ;
--- 1847,1853 ----
        RTX_FRAME_RELATED_P (insn) = 1;
      }
  
!   ix86_emit_save_regs ();
  
    if (tsize == 0)
      ;
*************** ix86_expand_prologue ()
*** 1903,1911 ****
  			     CALL_INSN_FUNCTION_USAGE (insn));
      }
  
-   if (!SAVED_REGS_FIRST)
-     ix86_emit_save_regs ();
- 
  #ifdef SUBTARGET_PROLOGUE
    SUBTARGET_PROLOGUE;
  #endif  
--- 1881,1886 ----
*************** ix86_expand_prologue ()
*** 1920,1944 ****
      emit_insn (gen_blockage ());
  }
  
- /* Emit code to pop all registers from stack.  */
- 
- static void
- ix86_emit_restore_regs ()
- {
-   int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
- 				  || current_function_uses_const_pool);
-   int limit = (frame_pointer_needed
- 	       ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
-   int regno;
- 
-   for (regno = 0; regno < limit; regno++)
-     if ((regs_ever_live[regno] && !call_used_regs[regno])
- 	|| (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
-       {
- 	emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
-       }
- }
- 
  /* Emit code to add TSIZE to esp value.  Use POP instruction when
     profitable.  */
  
--- 1895,1900 ----
*************** ix86_emit_epilogue_esp_adjustment (tsize
*** 1991,2050 ****
  void
  ix86_expand_epilogue ()
  {
-   int regno;
    int nregs;
!   int limit;
    int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
  				  || current_function_uses_const_pool);
    int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
    HOST_WIDE_INT offset;
!   HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs, (int *)0,
! 						 (int *)0);
  
-   /* SP is often unreliable so we may have to go off the frame pointer. */
  
!   offset = -(tsize + nregs * UNITS_PER_WORD);
  
!   if (SAVED_REGS_FIRST)
      {
!       if (!sp_valid)
!         {
! 	  if (nregs)
! 	    emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
! 				    gen_rtx_PLUS (SImode, hard_frame_pointer_rtx,
! 						  GEN_INT (- nregs * UNITS_PER_WORD))));
! 	  else
! 	    emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx,
! 						   hard_frame_pointer_rtx));
! 	}
!       else if (tsize)
! 	ix86_emit_epilogue_esp_adjustment (tsize);
!       ix86_emit_restore_regs ();
      }
  
    /* If we're only restoring one register and sp is not valid then
       using a move instruction to restore the register since it's
!      less work than reloading sp and popping the register.  Otherwise,
!      restore sp (if necessary) and pop the registers. */
! 
!   else if (nregs > 1 || sp_valid)
      {
!       if (!sp_valid)
! 	{
! 	  rtx addr_offset;
! 	  addr_offset = adj_offsettable_operand (AT_BP (QImode), offset);
! 	  addr_offset = XEXP (addr_offset, 0);
! 
! 	  emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, addr_offset));
! 	}
  
!       ix86_emit_restore_regs ();
!     }
!   else
!     {
!       limit = (frame_pointer_needed
! 	       ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
!       for (regno = 0; regno < limit; regno++)
  	if ((regs_ever_live[regno] && ! call_used_regs[regno])
  	    || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
  	  {
--- 1947,1982 ----
  void
  ix86_expand_epilogue ()
  {
    int nregs;
!   int regno;
! 
    int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
  				  || current_function_uses_const_pool);
    int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
    HOST_WIDE_INT offset;
!   HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs,
! 						 (int *)0, (int *)0);
  
  
!   /* Calculate start of saved registers relative to ebp.  */
!   offset = -nregs * UNITS_PER_WORD;
  
! #ifdef FUNCTION_BLOCK_PROFILER_EXIT
!   if (profile_block_flag == 2)
      {
!       FUNCTION_BLOCK_PROFILER_EXIT;
      }
+ #endif
  
    /* If we're only restoring one register and sp is not valid then
       using a move instruction to restore the register since it's
!      less work than reloading sp and popping the register.  */
!   if (!sp_valid && nregs <= 1)
      {
!       if (!frame_pointer_needed)
! 	abort();
  
!       for (regno = 0; regno < HARD_FRAME_POINTER_REGNUM; regno++)
  	if ((regs_ever_live[regno] && ! call_used_regs[regno])
  	    || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
  	  {
*************** ix86_expand_epilogue ()
*** 2052,2081 ****
  			    adj_offsettable_operand (AT_BP (Pmode), offset));
  	    offset += 4;
  	  }
-     }
  
-   if (frame_pointer_needed)
-     {
        /* If not an i386, mov & pop is faster than "leave". */
        if (TARGET_USE_LEAVE || optimize_size)
  	emit_insn (gen_leave ());
        else
  	{
! 	  if (!SAVED_REGS_FIRST)
! 	    emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx,
! 						   hard_frame_pointer_rtx));
  	  emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
  	}
      }
!   else if (!SAVED_REGS_FIRST && tsize)
!     ix86_emit_epilogue_esp_adjustment (tsize);
! 
! #ifdef FUNCTION_BLOCK_PROFILER_EXIT
!   if (profile_block_flag == 2)
      {
!       FUNCTION_BLOCK_PROFILER_EXIT;
      }
- #endif
  
    if (current_function_pops_args && current_function_args_size)
      {
--- 1984,2021 ----
  			    adj_offsettable_operand (AT_BP (Pmode), offset));
  	    offset += 4;
  	  }
  
        /* If not an i386, mov & pop is faster than "leave". */
        if (TARGET_USE_LEAVE || optimize_size)
  	emit_insn (gen_leave ());
        else
  	{
! 	  emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx,
! 						    hard_frame_pointer_rtx,
! 						    const0_rtx));
  	  emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
  	}
      }
!   else
      {
!       /* First step is to deallocate the stack frame so that we can
! 	 pop the registers.  */
!       if (!sp_valid)
! 	{
! 	  if (!frame_pointer_needed)
! 	    abort ();
!           emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx,
! 						    hard_frame_pointer_rtx,
! 						    GEN_INT (offset)));
! 	}
!       else if (tsize)
! 	ix86_emit_epilogue_esp_adjustment (tsize);
! 
!       for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
! 	if ((regs_ever_live[regno] && !call_used_regs[regno])
! 	    || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
! 	  emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
      }
  
    if (current_function_pops_args && current_function_args_size)
      {
*************** ix86_attr_length_default (insn)
*** 5853,5866 ****
        {
          /* Irritatingly, single_set doesn't work with REG_UNUSED present,
  	   as we'll get from running life_analysis during reg-stack when
! 	   not optimizing.  */
          rtx set = PATTERN (insn);
          if (GET_CODE (set) == SET)
  	  ;
  	else if (GET_CODE (set) == PARALLEL
  		 && XVECLEN (set, 0) == 2
! 		 && GET_CODE (XVECEXP (set, 0, 0)) == SET
! 		 && GET_CODE (XVECEXP (set, 0, 1)) == CLOBBER)
  	  set = XVECEXP (set, 0, 0);
  	else
  	  abort ();
--- 5793,5807 ----
        {
          /* Irritatingly, single_set doesn't work with REG_UNUSED present,
  	   as we'll get from running life_analysis during reg-stack when
! 	   not optimizing.  Not that it matters anyway, now that
! 	   epilogue_deallocate_stack uses lea, and is by design not
! 	   single_set. */
          rtx set = PATTERN (insn);
          if (GET_CODE (set) == SET)
  	  ;
  	else if (GET_CODE (set) == PARALLEL
  		 && XVECLEN (set, 0) == 2
! 		 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
  	  set = XVECEXP (set, 0, 0);
  	else
  	  abort ();
Index: i386.md
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.133
diff -c -p -d -r1.133 i386.md
*** i386.md	2000/02/03 14:10:02	1.133
--- i386.md	2000/02/03 22:42:11
***************
*** 8831,8841 ****
  
  (define_insn "epilogue_deallocate_stack"
    [(set (match_operand:SI 0 "register_operand" "=r")
! 	(match_operand:SI 1 "register_operand" "+r"))
     (set (match_dup 1) (match_dup 1))]
    ""
!   "mov{l}\\t{%1, %0|%0, %1}"
!   [(set_attr "type" "imov")])
  
  (define_insn "allocate_stack_worker"
    [(unspec:SI [(match_operand:SI 0 "register_operand" "a")] 3)
--- 8831,8854 ----
  
  (define_insn "epilogue_deallocate_stack"
    [(set (match_operand:SI 0 "register_operand" "=r")
! 	(plus:SI (match_operand:SI 1 "register_operand" "+r")
! 	         (match_operand:SI 2 "immediate_operand" "i")))
     (set (match_dup 1) (match_dup 1))]
    ""
!   "*
! {
!   if (get_attr_type (insn) == TYPE_IMOV)
!     return \"mov{l}\\t{%1, %0|%0, %1}\";
!   else
!     {
!       operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
!       return \"lea{l}\\t{%a2, %0|%0, %a2}\";
!     }
! }"
!   [(set (attr "type")
! 	(if_then_else (match_operand:SI 2 "const0_operand" "")
! 	  (const_string "imov")
! 	  (const_string "lea")))])
  
  (define_insn "allocate_stack_worker"
    [(unspec:SI [(match_operand:SI 0 "register_operand" "a")] 3)


More information about the Gcc-patches mailing list