This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
patch to improve i386 epilogue (version 2)
- To: egcs-patches at cygnus dot com
- Subject: patch to improve i386 epilogue (version 2)
- From: John Wehle <john at feith dot com>
- Date: Sat, 19 Sep 1998 03:07:23 -0400 (EDT)
[ The change to jump.c is the only difference from the previous
version of this patch. ]
This change allows gcc to skip the loading of the stack pointer
when it is already known to contain the proper value. This
optimization is currently only implemented for leaf functions.
Notes:
1) On a related subject -- it may be possible to replace the loop
in final.c (leaf_function_p) which looks for CALL_INSN with:
if ( function_call_count )
return 0;
ChangeLog:
Sat Sep 19 02:52:25 EDT 1998 John Wehle (john@feith.com)
* rtl.def (INLINE_HEADER): Add new field for call_count.
* rtl.h (FUNCTION_CALL_COUNT): New macro.
(gen_inline_header_rtx): Update prototype.
* emit-rtl.c (gen_inline_header_rtx): Add call_count.
* integrate.c (initialize_for_inline): Pass in function_call_count;
(copy_rtx_and_substitute): Update function_call_count.
(output_inline_function): Set function_call_count from
inline header.
* jump.c (delete_insn): Update function_call_count.
* i386.c (ix86_epilogue): Optimize the restoring
of the stack pointer for leaf functions.
Enjoy!
-- John Wehle
------------------8<------------------------8<------------------------
*** gcc/rtl.def.ORIGINAL Thu Sep 17 22:59:27 1998
--- gcc/rtl.def Fri Sep 18 00:54:22 1998
*************** DEF_RTL_EXPR(NOTE, "note", "iuusn", 'x')
*** 396,402 ****
it contains helps to build the mapping function between the rtx's of
the function to be inlined and the current function being expanded. */
! DEF_RTL_EXPR(INLINE_HEADER, "inline_header", "iuuuiiiiiieeiiEeEssE", 'x')
/* ----------------------------------------------------------------------
Top level constituents of INSN, JUMP_INSN and CALL_INSN.
--- 396,402 ----
it contains helps to build the mapping function between the rtx's of
the function to be inlined and the current function being expanded. */
! DEF_RTL_EXPR(INLINE_HEADER, "inline_header", "iuuuiiiiiieeiiEeEssEi", 'x')
/* ----------------------------------------------------------------------
Top level constituents of INSN, JUMP_INSN and CALL_INSN.
*** gcc/rtl.h.ORIGINAL Thu Sep 17 22:59:36 1998
--- gcc/rtl.h Fri Sep 18 00:54:23 1998
*************** extern char *note_insn_name[];
*** 643,648 ****
--- 643,649 ----
function.
INLINE_REGNO_REG_RTX, INLINE_REGNO_POINTER_FLAG, and
INLINE_REGNO_POINTER_ALIGN are pointers to the corresponding arrays.
+ FUNCTION_CALL_COUNT is the number of function calls made by this function.
We want this to lay down like an INSN. The PREV_INSN field
is always NULL. The NEXT_INSN field always points to the
*************** extern char *note_insn_name[];
*** 666,671 ****
--- 667,673 ----
#define INLINE_REGNO_POINTER_FLAG(RTX) ((RTX)->fld[17].rtstr)
#define INLINE_REGNO_POINTER_ALIGN(RTX) ((RTX)->fld[18].rtstr)
#define PARMREG_STACK_LOC(RTX) ((RTX)->fld[19].rtvec)
+ #define FUNCTION_CALL_COUNT(RTX) ((RTX)->fld[20].rtint)
/* In FUNCTION_FLAGS we save some variables computed when emitting the code
for the function and which must be `or'ed into the current flag values when
*************** extern rtx gen_label_rtx PROTO((void));
*** 855,861 ****
extern rtx gen_inline_header_rtx PROTO((rtx, rtx, int, int, int, int,
int, int, rtx, rtx, int, int,
rtvec, rtx,
! rtvec, char *, char *, rtvec));
extern rtx gen_lowpart_common PROTO((enum machine_mode, rtx));
extern rtx gen_lowpart PROTO((enum machine_mode, rtx));
extern rtx gen_lowpart_if_possible PROTO((enum machine_mode, rtx));
--- 857,864 ----
extern rtx gen_inline_header_rtx PROTO((rtx, rtx, int, int, int, int,
int, int, rtx, rtx, int, int,
rtvec, rtx,
! rtvec, char *, char *, rtvec,
! int));
extern rtx gen_lowpart_common PROTO((enum machine_mode, rtx));
extern rtx gen_lowpart PROTO((enum machine_mode, rtx));
extern rtx gen_lowpart_if_possible PROTO((enum machine_mode, rtx));
*** gcc/emit-rtl.c.ORIGINAL Thu Sep 17 22:58:57 1998
--- gcc/emit-rtl.c Fri Sep 18 00:54:23 1998
*************** gen_inline_header_rtx (first_insn, first
*** 1529,1535 ****
pops_args, stack_slots, forced_labels, function_flags,
outgoing_args_size, original_arg_vector,
original_decl_initial, regno_rtx, regno_flag,
! regno_align, parm_reg_stack_loc)
rtx first_insn, first_parm_insn;
int first_labelno, last_labelno, max_parm_regnum, max_regnum, args_size;
int pops_args;
--- 1529,1535 ----
pops_args, stack_slots, forced_labels, function_flags,
outgoing_args_size, original_arg_vector,
original_decl_initial, regno_rtx, regno_flag,
! regno_align, parm_reg_stack_loc, call_count)
rtx first_insn, first_parm_insn;
int first_labelno, last_labelno, max_parm_regnum, max_regnum, args_size;
int pops_args;
*************** gen_inline_header_rtx (first_insn, first
*** 1543,1548 ****
--- 1543,1549 ----
char *regno_flag;
char *regno_align;
rtvec parm_reg_stack_loc;
+ int call_count;
{
rtx header = gen_rtx_INLINE_HEADER (VOIDmode,
cur_insn_uid++, NULL_RTX,
*************** gen_inline_header_rtx (first_insn, first
*** 1554,1560 ****
original_arg_vector,
original_decl_initial,
regno_rtx, regno_flag, regno_align,
! parm_reg_stack_loc);
return header;
}
--- 1555,1562 ----
original_arg_vector,
original_decl_initial,
regno_rtx, regno_flag, regno_align,
! parm_reg_stack_loc,
! call_count);
return header;
}
*** gcc/integrate.c.ORIGINAL Thu Sep 17 22:59:23 1998
--- gcc/integrate.c Fri Sep 18 00:54:23 1998
*************** initialize_for_inline (fndecl, min_label
*** 399,405 ****
arg_vector, (rtx) DECL_INITIAL (fndecl),
(rtvec) regno_reg_rtx, regno_pointer_flag,
regno_pointer_align,
! (rtvec) parm_reg_stack_loc);
}
/* Subroutine for `save_for_inline{copying,nocopy}'. Finishes up the
--- 399,406 ----
arg_vector, (rtx) DECL_INITIAL (fndecl),
(rtvec) regno_reg_rtx, regno_pointer_flag,
regno_pointer_align,
! (rtvec) parm_reg_stack_loc,
! function_call_count);
}
/* Subroutine for `save_for_inline{copying,nocopy}'. Finishes up the
*************** copy_rtx_and_substitute (orig, map)
*** 2650,2655 ****
--- 2651,2658 ----
break;
case CALL:
+ function_call_count++;
+
/* This is given special treatment because the first
operand of a CALL is a (MEM ...) which may get
forced into a register for cse. This is undesirable
*************** output_inline_function (fndecl)
*** 3416,3421 ****
--- 3419,3426 ----
current_function_outgoing_args_size = OUTGOING_ARGS_SIZE (head);
current_function_pops_args = POPS_ARGS (head);
+
+ function_call_count = FUNCTION_CALL_COUNT (head);
/* This is the only thing the expand_function_end call that uses to be here
actually does and that call can cause problems. */
*** gcc/jump.c.ORIGINAL Fri Sep 4 11:14:28 1998
--- gcc/jump.c Sat Sep 19 01:38:31 1998
*************** delete_insn (insn)
*** 3866,3871 ****
--- 3866,3880 ----
}
}
+ /* If INSN was a call, update function_call_count. */
+
+ if (GET_CODE (insn) == CALL_INSN)
+ {
+ if ( !function_call_count )
+ abort ();
+ function_call_count--;
+ }
+
return next;
}
*** gcc/config/i386/i386.c.ORIGINAL Thu Sep 17 23:01:00 1998
--- gcc/config/i386/i386.c Fri Sep 18 00:54:23 1998
*************** ix86_epilogue (do_rtl)
*** 2294,2299 ****
--- 2294,2301 ----
rtx xops[3];
int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
|| current_function_uses_const_pool);
+ int sp_valid = !frame_pointer_needed || (function_call_count == 0
+ && !current_function_calls_alloca);
long tsize = get_frame_size ();
/* Compute the number of registers to pop */
*************** ix86_epilogue (do_rtl)
*** 2307,2318 ****
|| (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
nregs++;
! /* sp is often unreliable so we must go off the frame pointer.
!
! In reality, we may not care if sp is unreliable, because we can restore
! the register relative to the frame pointer. In theory, since each move
! is the same speed as a pop, and we don't need the leal, this is faster.
! For now restore multiple registers the old way. */
offset = - tsize - (nregs * UNITS_PER_WORD);
--- 2309,2315 ----
|| (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
nregs++;
! /* sp is often unreliable so we may have to go off the frame pointer. */
offset = - tsize - (nregs * UNITS_PER_WORD);
*************** ix86_epilogue (do_rtl)
*** 2329,2337 ****
if (flag_pic || profile_flag || profile_block_flag)
emit_insn (gen_blockage ());
! if (nregs > 1 || ! frame_pointer_needed)
{
! if (frame_pointer_needed)
{
xops[0] = adj_offsettable_operand (AT_BP (QImode), offset);
if (do_rtl)
--- 2326,2339 ----
if (flag_pic || profile_flag || profile_block_flag)
emit_insn (gen_blockage ());
! /* If we're only restoring one register and sp is not valid then
! using a move instruction to restore the register since it's
! less work than reloading sp and popping the register. Otherwise,
! restore sp (if necessary) and pop the registers. */
!
! if (nregs > 1 || sp_valid)
{
! if ( !sp_valid )
{
xops[0] = adj_offsettable_operand (AT_BP (QImode), offset);
if (do_rtl)
-------------------------------------------------------------------------
| Feith Systems | Voice: 1-215-646-8000 | Email: john@feith.com |
| John Wehle | Fax: 1-215-540-5495 | |
-------------------------------------------------------------------------