This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

patch to improve i386 epilogue


This change allows gcc to skip the loading of the stack pointer
when it is already known to contain the proper value.  This
optimization is currently only implemented for leaf functions.

Notes:

  1) On a related subject -- it may be possible to replace the loop
     in final.c (leaf_function_p) which looks for CALL_INSN with:

       if ( function_call_count )
         return 0;

ChangeLog:

Thu Sep 17 03:49:28 EDT 1998  John Wehle  (john@feith.com)

	* rtl.def (INLINE_HEADER): Add new field for call_count.
	* rtl.h (FUNCTION_CALL_COUNT): New macro.
	(gen_inline_header_rtx): Update prototype.
	* emit-rtl.c (gen_inline_header_rtx): Add call_count.
	* integrate.c (initialize_for_inline): Pass in function_call_count;
	(copy_rtx_and_substitute): Update function_call_count.
	(output_inline_function): Set function_call_count from
	inline header.
	* i386.c (ix86_epilogue): Optimize the restoring
	of the stack pointer for leaf functions.

Enjoy!

-- John Wehle
------------------8<------------------------8<------------------------
*** gcc/rtl.def.ORIGINAL	Thu Sep 17 22:59:27 1998
--- gcc/rtl.def	Fri Sep 18 00:54:22 1998
*************** DEF_RTL_EXPR(NOTE, "note", "iuusn", 'x')
*** 396,402 ****
     it contains helps to build the mapping function between the rtx's of
     the function to be inlined and the current function being expanded.  */
  
! DEF_RTL_EXPR(INLINE_HEADER, "inline_header", "iuuuiiiiiieeiiEeEssE", 'x')
  
  /* ----------------------------------------------------------------------
     Top level constituents of INSN, JUMP_INSN and CALL_INSN.
--- 396,402 ----
     it contains helps to build the mapping function between the rtx's of
     the function to be inlined and the current function being expanded.  */
  
! DEF_RTL_EXPR(INLINE_HEADER, "inline_header", "iuuuiiiiiieeiiEeEssEi", 'x')
  
  /* ----------------------------------------------------------------------
     Top level constituents of INSN, JUMP_INSN and CALL_INSN.
*** gcc/rtl.h.ORIGINAL	Thu Sep 17 22:59:36 1998
--- gcc/rtl.h	Fri Sep 18 00:54:23 1998
*************** extern char *note_insn_name[];
*** 643,648 ****
--- 643,649 ----
      function.
     INLINE_REGNO_REG_RTX, INLINE_REGNO_POINTER_FLAG, and
      INLINE_REGNO_POINTER_ALIGN are pointers to the corresponding arrays.
+    FUNCTION_CALL_COUNT is the number of function calls made by this function.
  
     We want this to lay down like an INSN.  The PREV_INSN field
     is always NULL.  The NEXT_INSN field always points to the
*************** extern char *note_insn_name[];
*** 666,671 ****
--- 667,673 ----
  #define INLINE_REGNO_POINTER_FLAG(RTX) ((RTX)->fld[17].rtstr)
  #define INLINE_REGNO_POINTER_ALIGN(RTX) ((RTX)->fld[18].rtstr)
  #define PARMREG_STACK_LOC(RTX) ((RTX)->fld[19].rtvec)
+ #define FUNCTION_CALL_COUNT(RTX) ((RTX)->fld[20].rtint)
  
  /* In FUNCTION_FLAGS we save some variables computed when emitting the code
     for the function and which must be `or'ed into the current flag values when
*************** extern rtx gen_label_rtx		PROTO((void));
*** 855,861 ****
  extern rtx gen_inline_header_rtx	PROTO((rtx, rtx, int, int, int, int,
  					       int, int, rtx, rtx, int, int,
  					       rtvec, rtx,
! 					       rtvec, char *, char *, rtvec));
  extern rtx gen_lowpart_common		PROTO((enum machine_mode, rtx));
  extern rtx gen_lowpart			PROTO((enum machine_mode, rtx));
  extern rtx gen_lowpart_if_possible	PROTO((enum machine_mode, rtx));
--- 857,864 ----
  extern rtx gen_inline_header_rtx	PROTO((rtx, rtx, int, int, int, int,
  					       int, int, rtx, rtx, int, int,
  					       rtvec, rtx,
! 					       rtvec, char *, char *, rtvec,
! 					       int));
  extern rtx gen_lowpart_common		PROTO((enum machine_mode, rtx));
  extern rtx gen_lowpart			PROTO((enum machine_mode, rtx));
  extern rtx gen_lowpart_if_possible	PROTO((enum machine_mode, rtx));
*** gcc/emit-rtl.c.ORIGINAL	Thu Sep 17 22:58:57 1998
--- gcc/emit-rtl.c	Fri Sep 18 00:54:23 1998
*************** gen_inline_header_rtx (first_insn, first
*** 1529,1535 ****
  		       pops_args, stack_slots, forced_labels, function_flags,
  		       outgoing_args_size, original_arg_vector,
  		       original_decl_initial, regno_rtx, regno_flag,
! 		       regno_align, parm_reg_stack_loc)
       rtx first_insn, first_parm_insn;
       int first_labelno, last_labelno, max_parm_regnum, max_regnum, args_size;
       int pops_args;
--- 1529,1535 ----
  		       pops_args, stack_slots, forced_labels, function_flags,
  		       outgoing_args_size, original_arg_vector,
  		       original_decl_initial, regno_rtx, regno_flag,
! 		       regno_align, parm_reg_stack_loc, call_count)
       rtx first_insn, first_parm_insn;
       int first_labelno, last_labelno, max_parm_regnum, max_regnum, args_size;
       int pops_args;
*************** gen_inline_header_rtx (first_insn, first
*** 1543,1548 ****
--- 1543,1549 ----
       char *regno_flag;
       char *regno_align;
       rtvec parm_reg_stack_loc;
+      int call_count;
  {
    rtx header = gen_rtx_INLINE_HEADER (VOIDmode,
  				      cur_insn_uid++, NULL_RTX,
*************** gen_inline_header_rtx (first_insn, first
*** 1554,1560 ****
  				      original_arg_vector,
  				      original_decl_initial,
  				      regno_rtx, regno_flag, regno_align,
! 				      parm_reg_stack_loc);
    return header;
  }
  
--- 1555,1562 ----
  				      original_arg_vector,
  				      original_decl_initial,
  				      regno_rtx, regno_flag, regno_align,
! 				      parm_reg_stack_loc,
! 				      call_count);
    return header;
  }
  
*** gcc/integrate.c.ORIGINAL	Thu Sep 17 22:59:23 1998
--- gcc/integrate.c	Fri Sep 18 00:54:23 1998
*************** initialize_for_inline (fndecl, min_label
*** 399,405 ****
  				arg_vector, (rtx) DECL_INITIAL (fndecl),
  				(rtvec) regno_reg_rtx, regno_pointer_flag,
  				regno_pointer_align,
! 				(rtvec) parm_reg_stack_loc);
  }
  
  /* Subroutine for `save_for_inline{copying,nocopy}'.  Finishes up the
--- 399,406 ----
  				arg_vector, (rtx) DECL_INITIAL (fndecl),
  				(rtvec) regno_reg_rtx, regno_pointer_flag,
  				regno_pointer_align,
! 				(rtvec) parm_reg_stack_loc,
! 				function_call_count);
  }
  
  /* Subroutine for `save_for_inline{copying,nocopy}'.  Finishes up the
*************** copy_rtx_and_substitute (orig, map)
*** 2650,2655 ****
--- 2651,2658 ----
        break;
  
      case CALL:
+       function_call_count++;
+ 
        /* This is given special treatment because the first
  	 operand of a CALL is a (MEM ...) which may get
  	 forced into a register for cse.  This is undesirable
*************** output_inline_function (fndecl)
*** 3416,3421 ****
--- 3419,3426 ----
  
    current_function_outgoing_args_size = OUTGOING_ARGS_SIZE (head);
    current_function_pops_args = POPS_ARGS (head);
+ 
+   function_call_count = FUNCTION_CALL_COUNT (head);
  
    /* This is the only thing the expand_function_end call that uses to be here
       actually does and that call can cause problems.  */
*** gcc/config/i386/i386.c.ORIGINAL	Thu Sep 17 23:01:00 1998
--- gcc/config/i386/i386.c	Fri Sep 18 00:54:23 1998
*************** ix86_epilogue (do_rtl)
*** 2294,2299 ****
--- 2294,2301 ----
    rtx xops[3];
    int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
  				  || current_function_uses_const_pool);
+   int sp_valid = !frame_pointer_needed || (function_call_count == 0
+ 					   && !current_function_calls_alloca);
    long tsize = get_frame_size ();
  
    /* Compute the number of registers to pop */
*************** ix86_epilogue (do_rtl)
*** 2307,2318 ****
  	|| (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
        nregs++;
  
!   /* sp is often  unreliable so we must go off the frame pointer.
! 
!      In reality, we may not care if sp is unreliable, because we can restore
!      the register relative to the frame pointer.  In theory, since each move
!      is the same speed as a pop, and we don't need the leal, this is faster.
!      For now restore multiple registers the old way. */
  
    offset = - tsize - (nregs * UNITS_PER_WORD);
  
--- 2309,2315 ----
  	|| (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
        nregs++;
  
!   /* sp is often unreliable so we may have to go off the frame pointer. */
  
    offset = - tsize - (nregs * UNITS_PER_WORD);
  
*************** ix86_epilogue (do_rtl)
*** 2329,2337 ****
    if (flag_pic || profile_flag || profile_block_flag)
      emit_insn (gen_blockage ());
  
!   if (nregs > 1 || ! frame_pointer_needed)
      {
!       if (frame_pointer_needed)
  	{
  	  xops[0] = adj_offsettable_operand (AT_BP (QImode), offset);
  	  if (do_rtl)
--- 2326,2339 ----
    if (flag_pic || profile_flag || profile_block_flag)
      emit_insn (gen_blockage ());
  
!   /* If we're only restoring one register and sp is not valid then
!      using a move instruction to restore the register since it's
!      less work than reloading sp and popping the register.  Otherwise,
!      restore sp (if necessary) and pop the registers. */
! 
!   if (nregs > 1 || sp_valid)
      {
!       if ( !sp_valid )
  	{
  	  xops[0] = adj_offsettable_operand (AT_BP (QImode), offset);
  	  if (do_rtl)
-------------------------------------------------------------------------
|   Feith Systems  |   Voice: 1-215-646-8000  |  Email: john@feith.com  |
|    John Wehle    |     Fax: 1-215-540-5495  |                         |
-------------------------------------------------------------------------



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]