This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: i386 epilogue code tweek
- To: Jan Hubicka <hubicka at atrey dot karlin dot mff dot cuni dot cz>
- Subject: Re: i386 epilogue code tweek
- From: Richard Henderson <rth at cygnus dot com>
- Date: Fri, 4 Feb 2000 13:42:59 -0800
- Cc: gcc-patches at gcc dot gnu dot org
- References: <20000203191333.G21465@atrey.karlin.mff.cuni.cz> <20000203145158.A16484@cygnus.com>
I made a couple more tweeks -- commonizing prologue_allocate_stack
and epilogue_deallocate_stack, fixing ix86_emit_epilogue_esp_adjustment
to emit an ebp-safe increment -- and committed.
r~
* i386.c (SAVE_REGS_FIRST): Remove.
(ix86_initial_elimination_offset): Handle only SAVE_REGS_FIRST mode.
(ix86_compute_frame_size): Likewise.
(ix86_expand_prologue): Likewise. Use pro_epilogue_adjust_stack.
(ix86_emit_restore_regs): Remove.
(ix86_emit_epilogue_esp_adjustment): Use pro_epilogue_adjust_stack
when a frame pointer is in use.
(ix86_expand_epilogue): Handle only SAVE_REGS_FIRST mode. Use mov
instead of pop to restore a register when profitable; emit leave
when profitable.
(ix86_attr_length_default): Handle pro_epilogue_adjust_stack
as a TYPE_LEA insn.
(ix86_adjust_cost): Handle pro_epilogue_adjust_stack as TYPE_ALU.
* i386.md (prologue_allocate_stack): Remove.
(epilogue_deallocate_stack): Remove.
(pro_epilogue_adjust_stack): New.
Index: i386.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.132
diff -c -p -d -r1.132 i386.c
*** i386.c 2000/02/02 07:50:20 1.132
--- i386.c 2000/02/04 21:34:40
*************** Boston, MA 02111-1307, USA. */
*** 41,56 ****
#include "basic-block.h"
#include "ggc.h"
- /* True when we want to do pushes before allocating stack to get better
- scheduling.
-
- Saving registers first is win in the most cases except for LEAVE
- instruction. Macro is 0 iff we will use LEAVE. */
-
- #define SAVED_REGS_FIRST \
- (!frame_pointer_needed || (!TARGET_USE_LEAVE && !optimize_size))
-
-
#ifdef EXTRA_CONSTRAINT
/* If EXTRA_CONSTRAINT is defined, then the 'S'
constraint in REG_CLASS_FROM_LETTER will no longer work, and various
--- 41,46 ----
*************** static HOST_WIDE_INT ix86_compute_frame_
*** 411,417 ****
int *, int *, int *));
static int ix86_nsaved_regs PARAMS((void));
static void ix86_emit_save_regs PARAMS((void));
- static void ix86_emit_restore_regs PARAMS((void));
static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
struct ix86_address
--- 401,406 ----
*************** ix86_initial_elimination_offset (from, t
*** 1705,1720 ****
saved frame pointer if frame_pointer_needed
<- HARD_FRAME_POINTER
! [saved regs if SAVED_REGS_FIRST]
[padding1] \
| <- FRAME_POINTER
[frame] > tsize
|
[padding2] /
-
- [saved regs if !SAVED_REGS_FIRST]
- <- STACK_POINTER
*/
if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
--- 1694,1706 ----
saved frame pointer if frame_pointer_needed
<- HARD_FRAME_POINTER
! [saved regs]
[padding1] \
| <- FRAME_POINTER
[frame] > tsize
|
[padding2] /
*/
if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
*************** ix86_initial_elimination_offset (from, t
*** 1725,1732 ****
&& to == HARD_FRAME_POINTER_REGNUM)
{
ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *)0);
! if (SAVED_REGS_FIRST)
! padding1 += nregs * UNITS_PER_WORD;
return -padding1;
}
else
--- 1711,1717 ----
&& to == HARD_FRAME_POINTER_REGNUM)
{
ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *)0);
! padding1 += nregs * UNITS_PER_WORD;
return -padding1;
}
else
*************** ix86_initial_elimination_offset (from, t
*** 1743,1752 ****
return tsize + nregs * UNITS_PER_WORD + frame_size;
else if (from != FRAME_POINTER_REGNUM)
abort ();
- else if (SAVED_REGS_FIRST)
- return tsize - padding1;
else
! return tsize + nregs * UNITS_PER_WORD - padding1;
}
}
--- 1728,1735 ----
return tsize + nregs * UNITS_PER_WORD + frame_size;
else if (from != FRAME_POINTER_REGNUM)
abort ();
else
! return tsize - padding1;
}
}
*************** ix86_compute_frame_size (size, nregs_on_
*** 1788,1800 ****
if (stack_alignment_needed < 4)
stack_alignment_needed = 4;
! if (stack_alignment_needed > preferred_alignment)
abort ();
! if (SAVED_REGS_FIRST)
! offset += nregs * UNITS_PER_WORD;
! else
! total_size += nregs * UNITS_PER_WORD;
total_size += offset;
--- 1771,1780 ----
if (stack_alignment_needed < 4)
stack_alignment_needed = 4;
! if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
abort ();
! offset += nregs * UNITS_PER_WORD;
total_size += offset;
*************** ix86_compute_frame_size (size, nregs_on_
*** 1807,1815 ****
}
/* Align stack boundary. */
! if (!current_function_is_leaf)
! padding2 = ((total_size + preferred_alignment - 1)
! & -preferred_alignment) - total_size;
}
#endif
--- 1787,1794 ----
}
/* Align stack boundary. */
! padding2 = ((total_size + preferred_alignment - 1)
! & -preferred_alignment) - total_size;
}
#endif
*************** ix86_expand_prologue ()
*** 1868,1885 ****
RTX_FRAME_RELATED_P (insn) = 1;
}
! if (SAVED_REGS_FIRST)
! ix86_emit_save_regs ();
if (tsize == 0)
;
else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
{
if (frame_pointer_needed)
! insn = emit_insn (gen_prologue_allocate_stack (stack_pointer_rtx,
! stack_pointer_rtx,
! GEN_INT (-tsize),
! hard_frame_pointer_rtx));
else
insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (-tsize)));
--- 1847,1862 ----
RTX_FRAME_RELATED_P (insn) = 1;
}
! ix86_emit_save_regs ();
if (tsize == 0)
;
else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
{
if (frame_pointer_needed)
! insn = emit_insn (gen_pro_epilogue_adjust_stack
! (stack_pointer_rtx, stack_pointer_rtx,
! GEN_INT (-tsize), hard_frame_pointer_rtx));
else
insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (-tsize)));
*************** ix86_expand_prologue ()
*** 1903,1911 ****
CALL_INSN_FUNCTION_USAGE (insn));
}
- if (!SAVED_REGS_FIRST)
- ix86_emit_save_regs ();
-
#ifdef SUBTARGET_PROLOGUE
SUBTARGET_PROLOGUE;
#endif
--- 1880,1885 ----
*************** ix86_expand_prologue ()
*** 1920,1944 ****
emit_insn (gen_blockage ());
}
- /* Emit code to pop all registers from stack. */
-
- static void
- ix86_emit_restore_regs ()
- {
- int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
- || current_function_uses_const_pool);
- int limit = (frame_pointer_needed
- ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
- int regno;
-
- for (regno = 0; regno < limit; regno++)
- if ((regs_ever_live[regno] && !call_used_regs[regno])
- || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
- {
- emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
- }
- }
-
/* Emit code to add TSIZE to esp value. Use POP instruction when
profitable. */
--- 1894,1899 ----
*************** ix86_emit_epilogue_esp_adjustment (tsize
*** 1980,1988 ****
}
else
{
! /* If there is no frame pointer, we must still release the frame. */
! emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
! GEN_INT (tsize)));
}
}
--- 1935,1950 ----
}
else
{
! /* If a frame pointer is present, we must be sure to tie the sp
! to the fp so that we don't mis-schedule. */
! if (frame_pointer_needed)
! emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
! stack_pointer_rtx,
! GEN_INT (tsize),
! hard_frame_pointer_rtx));
! else
! emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
! GEN_INT (tsize)));
}
}
*************** ix86_emit_epilogue_esp_adjustment (tsize
*** 1991,2050 ****
void
ix86_expand_epilogue ()
{
- int regno;
int nregs;
! int limit;
int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
|| current_function_uses_const_pool);
int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
HOST_WIDE_INT offset;
! HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs, (int *)0,
! (int *)0);
- /* SP is often unreliable so we may have to go off the frame pointer. */
! offset = -(tsize + nregs * UNITS_PER_WORD);
! if (SAVED_REGS_FIRST)
{
! if (!sp_valid)
! {
! if (nregs)
! emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
! gen_rtx_PLUS (SImode, hard_frame_pointer_rtx,
! GEN_INT (- nregs * UNITS_PER_WORD))));
! else
! emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx,
! hard_frame_pointer_rtx));
! }
! else if (tsize)
! ix86_emit_epilogue_esp_adjustment (tsize);
! ix86_emit_restore_regs ();
}
/* If we're only restoring one register and sp is not valid then
using a move instruction to restore the register since it's
! less work than reloading sp and popping the register. Otherwise,
! restore sp (if necessary) and pop the registers. */
!
! else if (nregs > 1 || sp_valid)
{
! if (!sp_valid)
! {
! rtx addr_offset;
! addr_offset = adj_offsettable_operand (AT_BP (QImode), offset);
! addr_offset = XEXP (addr_offset, 0);
!
! emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, addr_offset));
! }
! ix86_emit_restore_regs ();
! }
! else
! {
! limit = (frame_pointer_needed
! ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
! for (regno = 0; regno < limit; regno++)
if ((regs_ever_live[regno] && ! call_used_regs[regno])
|| (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
{
--- 1953,1988 ----
void
ix86_expand_epilogue ()
{
int nregs;
! int regno;
!
int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
|| current_function_uses_const_pool);
int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
HOST_WIDE_INT offset;
! HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs,
! (int *)0, (int *)0);
! /* Calculate start of saved registers relative to ebp. */
! offset = -nregs * UNITS_PER_WORD;
! #ifdef FUNCTION_BLOCK_PROFILER_EXIT
! if (profile_block_flag == 2)
{
! FUNCTION_BLOCK_PROFILER_EXIT;
}
+ #endif
/* If we're only restoring one register and sp is not valid then
using a move instruction to restore the register since it's
! less work than reloading sp and popping the register. */
! if (!sp_valid && nregs <= 1)
{
! if (!frame_pointer_needed)
! abort();
! for (regno = 0; regno < HARD_FRAME_POINTER_REGNUM; regno++)
if ((regs_ever_live[regno] && ! call_used_regs[regno])
|| (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
{
*************** ix86_expand_epilogue ()
*** 2052,2081 ****
adj_offsettable_operand (AT_BP (Pmode), offset));
offset += 4;
}
- }
- if (frame_pointer_needed)
- {
/* If not an i386, mov & pop is faster than "leave". */
if (TARGET_USE_LEAVE || optimize_size)
emit_insn (gen_leave ());
else
{
! if (!SAVED_REGS_FIRST)
! emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx,
! hard_frame_pointer_rtx));
emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
}
}
! else if (!SAVED_REGS_FIRST && tsize)
! ix86_emit_epilogue_esp_adjustment (tsize);
!
! #ifdef FUNCTION_BLOCK_PROFILER_EXIT
! if (profile_block_flag == 2)
{
! FUNCTION_BLOCK_PROFILER_EXIT;
}
- #endif
if (current_function_pops_args && current_function_args_size)
{
--- 1990,2029 ----
adj_offsettable_operand (AT_BP (Pmode), offset));
offset += 4;
}
/* If not an i386, mov & pop is faster than "leave". */
if (TARGET_USE_LEAVE || optimize_size)
emit_insn (gen_leave ());
else
{
! emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
! hard_frame_pointer_rtx,
! const0_rtx,
! hard_frame_pointer_rtx));
emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
}
}
! else
{
! /* First step is to deallocate the stack frame so that we can
! pop the registers. */
! if (!sp_valid)
! {
! if (!frame_pointer_needed)
! abort ();
! emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
! hard_frame_pointer_rtx,
! GEN_INT (offset),
! hard_frame_pointer_rtx));
! }
! else if (tsize)
! ix86_emit_epilogue_esp_adjustment (tsize);
!
! for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
! if ((regs_ever_live[regno] && !call_used_regs[regno])
! || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
! emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
}
if (current_function_pops_args && current_function_args_size)
{
*************** ix86_attr_length_default (insn)
*** 5853,5866 ****
{
/* Irritatingly, single_set doesn't work with REG_UNUSED present,
as we'll get from running life_analysis during reg-stack when
! not optimizing. */
rtx set = PATTERN (insn);
if (GET_CODE (set) == SET)
;
else if (GET_CODE (set) == PARALLEL
! && XVECLEN (set, 0) == 2
! && GET_CODE (XVECEXP (set, 0, 0)) == SET
! && GET_CODE (XVECEXP (set, 0, 1)) == CLOBBER)
set = XVECEXP (set, 0, 0);
else
abort ();
--- 5801,5814 ----
{
/* Irritatingly, single_set doesn't work with REG_UNUSED present,
as we'll get from running life_analysis during reg-stack when
! not optimizing. Not that it matters anyway, now that
! pro_epilogue_adjust_stack uses lea, and is by design not
! single_set. */
rtx set = PATTERN (insn);
if (GET_CODE (set) == SET)
;
else if (GET_CODE (set) == PARALLEL
! && GET_CODE (XVECEXP (set, 0, 0)) == SET)
set = XVECEXP (set, 0, 0);
else
abort ();
*************** ix86_adjust_cost (insn, link, dep_insn,
*** 6011,6027 ****
if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
return cost;
! /* Prologue and epilogue allocators have false dependency on ebp.
! This results in one cycle extra stall on Pentium prologue scheduling, so
! handle this important case manually. */
! if ((dep_insn_code_number == CODE_FOR_prologue_allocate_stack
! || dep_insn_code_number == CODE_FOR_epilogue_deallocate_stack)
&& !reg_mentioned_p (stack_pointer_rtx, insn))
return 0;
-
- insn_type = get_attr_type (insn);
- dep_insn_type = get_attr_type (dep_insn);
switch (ix86_cpu)
{
--- 5959,5974 ----
if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
return cost;
! insn_type = get_attr_type (insn);
! dep_insn_type = get_attr_type (dep_insn);
! /* Prologue and epilogue allocators can have a false dependency on ebp.
! This results in one cycle extra stall on Pentium prologue scheduling,
! so handle this important case manually. */
! if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
! && dep_insn_type == TYPE_ALU
&& !reg_mentioned_p (stack_pointer_rtx, insn))
return 0;
switch (ix86_cpu)
{
Index: i386.md
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.133
diff -c -p -d -r1.133 i386.md
*** i386.md 2000/02/03 14:10:02 1.133
--- i386.md 2000/02/04 21:34:41
***************
*** 8807,8841 ****
;;
;; in proper program order.
! (define_insn "prologue_allocate_stack"
! [(set (match_operand:SI 0 "register_operand" "=r")
! (plus:SI (match_operand:SI 1 "register_operand" "0")
! (match_operand:SI 2 "nonmemory_operand" "ri")))
! (set (match_operand:SI 3 "register_operand" "=r")
(match_dup 3))
(clobber (reg:CC 17))]
""
"*
{
! if (GET_CODE (operands[2]) == CONST_INT
! && (INTVAL (operands[2]) == 128
! || (INTVAL (operands[2]) < 0
! && INTVAL (operands[2]) != -128)))
{
! operands[2] = GEN_INT (-INTVAL (operands[2]));
! return \"sub{l}\\t{%2, %0|%0, %2}\";
}
- return \"add{l}\\t{%2, %0|%0, %2}\";
}"
! [(set_attr "type" "alu")])
!
! (define_insn "epilogue_deallocate_stack"
! [(set (match_operand:SI 0 "register_operand" "=r")
! (match_operand:SI 1 "register_operand" "+r"))
! (set (match_dup 1) (match_dup 1))]
! ""
! "mov{l}\\t{%1, %0|%0, %1}"
! [(set_attr "type" "imov")])
(define_insn "allocate_stack_worker"
[(unspec:SI [(match_operand:SI 0 "register_operand" "a")] 3)
--- 8807,8853 ----
;;
;; in proper program order.
! (define_insn "pro_epilogue_adjust_stack"
! [(set (match_operand:SI 0 "register_operand" "=r,r")
! (plus:SI (match_operand:SI 1 "register_operand" "0,r")
! (match_operand:SI 2 "immediate_operand" "i,i")))
! (set (match_operand:SI 3 "register_operand" "+r,r")
(match_dup 3))
(clobber (reg:CC 17))]
""
"*
{
! switch (get_attr_type (insn))
{
! case TYPE_IMOV:
! return \"mov{l}\\t{%1, %0|%0, %1}\";
!
! case TYPE_ALU:
! if (GET_CODE (operands[2]) == CONST_INT
! && (INTVAL (operands[2]) == 128
! || (INTVAL (operands[2]) < 0
! && INTVAL (operands[2]) != -128)))
! {
! operands[2] = GEN_INT (-INTVAL (operands[2]));
! return \"sub{l}\\t{%2, %0|%0, %2}\";
! }
! return \"add{l}\\t{%2, %0|%0, %2}\";
!
! case TYPE_LEA:
! operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
! return \"lea{l}\\t{%a2, %0|%0, %a2}\";
!
! default:
! abort ();
}
}"
! [(set (attr "type")
! (cond [(eq_attr "alternative" "0")
! (const_string "alu")
! (match_operand:SI 2 "const0_operand" "")
! (const_string "imov")
! ]
! (const_string "lea")))])
(define_insn "allocate_stack_worker"
[(unspec:SI [(match_operand:SI 0 "register_operand" "a")] 3)