This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH 4/4] Split-stack arg pointer init refinement
- From: Alan Modra <amodra at gmail dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Mon, 18 May 2015 12:25:05 +0930
- Subject: [PATCH 4/4] Split-stack arg pointer init refinement
- Authentication-results: sourceware.org; auth=none
This small refinement to the -fsplit-stack prologue arg pointer
initialization improves code generation. Compare the -O2
gcc/testsuite/gcc.dg/split-3.c code for down() below.
before after
mflr 0 mflr 0
std 31,-8(1) std 31,-8(1)
std 0,16(1) mr 12,1
stdu 1,-10144(1) std 0,16(1)
addi 12,1,10144 stdu 1,-10144(1)
bge 7,.L7 bge 7,.L7
mr 12,29 mr 12,29
.L7: .L7:
* config/rs6000/rs6000.c (rs6000_emit_allocate_stack): Return
stack adjusting insn. Formatting.
(rs6000_emit_prologue): Track stack adjusting insn, and use of
r12. If possible, emit first -fsplit-stack arg pointer insn
before stack adjust. Don't use r12 to save cr if split-stack.
diff -urpN gcc-split-stack1/gcc/config/rs6000/rs6000.c gcc-split-stack2/gcc/config/rs6000/rs6000.c
--- gcc-split-stack1/gcc/config/rs6000/rs6000.c 2015-05-18 10:17:11.341628090 +0930
+++ gcc-split-stack2/gcc/config/rs6000/rs6000.c 2015-05-18 10:16:58.758131165 +0930
@@ -22608,7 +22608,7 @@ rs6000_emit_stack_tie (rtx fp, bool hard
If COPY_REG, make sure a copy of the old frame is left there.
The generated code may use hard register 0 as a temporary. */
-static void
+static rtx_insn *
rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
{
rtx_insn *insn;
@@ -22621,7 +22621,7 @@ rs6000_emit_allocate_stack (HOST_WIDE_IN
{
warning (0, "stack frame too large");
emit_insn (gen_trap ());
- return;
+ return 0;
}
if (crtl->limit_stack)
@@ -22672,9 +22672,9 @@ rs6000_emit_allocate_stack (HOST_WIDE_IN
insn = emit_insn (TARGET_32BIT
? gen_movsi_update_stack (stack_reg, stack_reg,
- todec, stack_reg)
+ todec, stack_reg)
: gen_movdi_di_update_stack (stack_reg, stack_reg,
- todec, stack_reg));
+ todec, stack_reg));
/* Since we didn't use gen_frame_mem to generate the MEM, grab
it now and set the alias set/attributes. The above gen_*_update
calls will generate a PARALLEL with the MEM set being the first
@@ -22692,6 +22692,7 @@ rs6000_emit_allocate_stack (HOST_WIDE_IN
add_reg_note (insn, REG_FRAME_RELATED_EXPR,
gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
GEN_INT (-size))));
+ return insn;
}
#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
@@ -23496,6 +23497,10 @@ rs6000_emit_prologue (void)
/* Offset to top of frame for frame_reg and sp respectively. */
HOST_WIDE_INT frame_off = 0;
HOST_WIDE_INT sp_off = 0;
+ /* sp_adjust is the stack adjusting instruction, tracked so that the
+ insn setting up the split-stack arg pointer can be emitted just
+ prior to it, when r12 is not used here for other purposes. */
+ rtx_insn *sp_adjust = 0;
#ifdef ENABLE_CHECKING
/* Track and check usage of r0, r11, r12. */
@@ -23714,7 +23719,10 @@ rs6000_emit_prologue (void)
ptr_off = info->altivec_save_offset + info->altivec_size;
frame_off = -ptr_off;
}
- rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
+ sp_adjust = rs6000_emit_allocate_stack (info->total_size,
+ ptr_reg, ptr_off);
+ if (REGNO (frame_reg_rtx) == 12)
+ sp_adjust = 0;
sp_off = info->total_size;
if (frame_reg_rtx != sp_reg_rtx)
rs6000_emit_stack_tie (frame_reg_rtx, false);
@@ -23755,7 +23763,8 @@ rs6000_emit_prologue (void)
if (!WORLD_SAVE_P (info)
&& info->cr_save_p
&& REGNO (frame_reg_rtx) != cr_save_regno
- && !(using_static_chain_p && cr_save_regno == 11))
+ && !(using_static_chain_p && cr_save_regno == 11)
+ && !(flag_split_stack && cr_save_regno == 12 && sp_adjust))
{
cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
START_USE (cr_save_regno);
@@ -23901,6 +23910,8 @@ rs6000_emit_prologue (void)
int end_save = info->gp_save_offset + info->gp_size;
int ptr_off;
+ if (ptr_regno == 12)
+ sp_adjust = 0;
if (!ptr_set_up)
ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
@@ -24219,7 +24230,10 @@ rs6000_emit_prologue (void)
}
else if (REGNO (frame_reg_rtx) == 1)
frame_off = info->total_size;
- rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
+ sp_adjust = rs6000_emit_allocate_stack (info->total_size,
+ ptr_reg, ptr_off);
+ if (REGNO (frame_reg_rtx) == 12)
+ sp_adjust = 0;
sp_off = info->total_size;
if (frame_reg_rtx != sp_reg_rtx)
rs6000_emit_stack_tie (frame_reg_rtx, false);
@@ -24249,6 +24263,8 @@ rs6000_emit_prologue (void)
gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
NOT_INUSE (0);
+ if (scratch_regno == 12)
+ sp_adjust = 0;
if (end_save + frame_off != 0)
{
rtx offset = GEN_INT (end_save + frame_off);
@@ -24326,7 +24342,7 @@ rs6000_emit_prologue (void)
if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
&& !using_static_chain_p)
save_regno = 11;
- else if (REGNO (frame_reg_rtx) == 12)
+ else if (flag_split_stack || REGNO (frame_reg_rtx) == 12)
{
save_regno = 11;
if (using_static_chain_p)
@@ -24372,6 +24388,7 @@ rs6000_emit_prologue (void)
rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
rtx tmp = gen_rtx_REG (Pmode, 12);
+ sp_adjust = 0;
insn = emit_move_insn (tmp, lr);
RTX_FRAME_RELATED_P (insn) = 1;
@@ -24434,7 +24451,13 @@ rs6000_emit_prologue (void)
__morestack was called, it left the arg pointer to the old
stack in r29. Otherwise, the arg pointer is the top of the
current frame. */
- if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
+ if (sp_adjust)
+ {
+ rtx r12 = gen_rtx_REG (Pmode, 12);
+ rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
+ emit_insn_before (set_r12, sp_adjust);
+ }
+ else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
{
rtx r12 = gen_rtx_REG (Pmode, 12);
if (frame_off == 0)
--
Alan Modra
Australia Development Lab, IBM