This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH/RFC] PR target/15130 SH: A tail call optimization
- From: Kaz Kojima <kkojima at rr dot iij4u dot or dot jp>
- To: joern dot rennecke at superh dot com
- Cc: gcc-patches at gcc dot gnu dot org, aoliva at redhat dot com
- Date: Fri, 07 May 2004 07:41:26 +0900 (JST)
- Subject: Re: [PATCH/RFC] PR target/15130 SH: A tail call optimization
- References: <20040506034057.E5D792272A@meolyon.local>
amylaar@spamcop.net (Joern Rennecke) wrote:
> So, as it stands, we have to assume that all registers in
> reg_class_contents[SIBCALL_REGS] are used, and also all the argument passing
> registers - leaving no call-clobbered register to allocate our temporary in
> for SH1..SH4.
> OTOH, prior to the final adjustment, if any general purpose register was saved,
> it is available. And the final adjustment shouldn't actually need a
> temporary, because the adjustment is of limited size.
> So it is rather unlikely that we'll be short of a temporary.
>
> so possibilities are
[snip]
> 4) Use a push/pop sequence, except when not using a frame pointer,
> you'll need to push one register, use that to calculate an address to
> save a second one at the bottom of the to-be-discarded frame, pop
> the saved value of the first reg into the second, store in at the bottom
> of the frame too, do the adjustment, and then pop both registers.
> Again, this is pretty grotty code - even worse than 3) - but it doesn't
> affect any other parts of the compiler, and it should be simpler to
> implement. I.e.:
> mov r4,@r15
> mov adjust,r4
> add r15,r4
> mov r5,@-r4
> mov @r15,r5
> mov r5,@-r4
> mov r4,r15
> mov @r15+,r4
> mov @r15+,r5
>
> Considering that it is very rare that we have a large frame, but save no
> general purpose registers, I think we should go with 3) or 4) .
Sounds reasonable. How about the appended patch?
It scans live registers if it's for sibcall epilogues and uses (4)
when the scan fails in SH1-4 case.
For SHmedia, it first tries to find the candidate for a temporary
from the usable registers and then from live registers. It would be
enough for SHmedia.
It's regtested on mainline for sh4-/sh64-linux. The bootstrap on
3.4.0 isn't complete yet, but compilers have been built successfully.
Regards,
kaz
--
* config/sh/sh-protos.h (sh_expand_epilogue): Change prototype.
* config/sh/sh.c (output_stack_adjust): Take the sibcall epilogue
into account. Compute the correct number of general registers
for the return value. Generate a special push/pop sequence when
failing to get a temporary register for non SHmedia epilogue.
(sh_expand_epilogue): Add an argument to show whether it's for
sibcall or not. Set the 3rd argument of output_stack_adjust to
-1 if needed.
(sh_need_epilogue): Call sh_expand_epilogue with 0.
* config/sh/sh.md (sibcall_epilogue): Call sh_expand_epilogue
with 1.
(epilogue): Call sh_expand_epilogue with 0.
diff -u3prN ORIG/gcc/gcc/config/sh/sh-protos.h LOCAL/gcc/gcc/config/sh/sh-protos.h
--- ORIG/gcc/gcc/config/sh/sh-protos.h Tue Jan 13 22:31:51 2004
+++ LOCAL/gcc/gcc/config/sh/sh-protos.h Thu May 6 13:53:17 2004
@@ -109,7 +109,7 @@ extern int sh_handle_pragma (int (*)(voi
extern struct rtx_def *get_fpscr_rtx (void);
extern int sh_media_register_for_return (void);
extern void sh_expand_prologue (void);
-extern void sh_expand_epilogue (void);
+extern void sh_expand_epilogue (bool);
extern int sh_need_epilogue (void);
extern void sh_set_return_address (rtx, rtx);
extern int initial_elimination_offset (int, int);
diff -u3prN ORIG/gcc/gcc/config/sh/sh.c LOCAL/gcc/gcc/config/sh/sh.c
--- ORIG/gcc/gcc/config/sh/sh.c Tue Apr 6 13:53:26 2004
+++ LOCAL/gcc/gcc/config/sh/sh.c Thu May 6 17:31:09 2004
@@ -4631,8 +4631,9 @@ static int extra_push;
/* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
- for an epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET
- of all the registers that are about to be restored, and hence dead. */
+ for an epilogue and the minus value means that it's for sibcall. If
+ LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of all the
+ registers that are about to be restored, and hence dead. */
static void
output_stack_adjust (int size, rtx reg, int epilogue_p,
@@ -4667,17 +4668,27 @@ output_stack_adjust (int size, rtx reg,
/* If TEMP is invalid, we could temporarily save a general
register to MACL. However, there is currently no need
to handle this case, so just abort when we see it. */
- if (current_function_interrupt
+ if (epilogue_p < 0
+ || current_function_interrupt
|| ! call_used_regs[temp] || fixed_regs[temp])
temp = -1;
- if (temp < 0 && ! current_function_interrupt)
+ if (temp < 0 && ! current_function_interrupt
+ && (TARGET_SHMEDIA || epilogue_p >= 0))
{
HARD_REG_SET temps;
COPY_HARD_REG_SET (temps, call_used_reg_set);
AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
if (epilogue_p)
{
- for (i = 0; i < HARD_REGNO_NREGS (FIRST_RET_REG, DImode); i++)
+ int nreg = 0;
+ if (current_function_return_rtx)
+ {
+ enum machine_mode mode;
+ mode = GET_MODE (current_function_return_rtx);
+ if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
+ nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
+ }
+ for (i = 0; i < nreg; i++)
CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
if (current_function_calls_eh_return)
{
@@ -4685,8 +4696,11 @@ output_stack_adjust (int size, rtx reg,
for (i = 0; i <= 3; i++)
CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
}
+ if (TARGET_SHMEDIA && epilogue_p < 0)
+ for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
+ CLEAR_HARD_REG_BIT (temps, i);
}
- else
+ if (epilogue_p <= 0)
{
for (i = FIRST_PARM_REG;
i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
@@ -4699,7 +4713,35 @@ output_stack_adjust (int size, rtx reg,
if (temp < 0 && live_regs_mask)
temp = scavenge_reg (live_regs_mask);
if (temp < 0)
- abort ();
+ {
+ /* If we reached here, the most likely case is the (sibcall)
+ epilogue for non SHmedia. Put a special push/pop sequence
+ for such case as the last resort. This looks lengthy but
+ would not be problem because it seems to be very rare. */
+ if (! TARGET_SHMEDIA && epilogue_p)
+ {
+ rtx adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
+ rtx tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
+ rtx mem;
+
+ emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
+ emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
+ emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
+ mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
+ emit_move_insn (mem, tmp_reg);
+ emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
+ mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
+ emit_move_insn (mem, tmp_reg);
+ emit_move_insn (reg, adj_reg);
+ mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
+ emit_move_insn (adj_reg, mem);
+ mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
+ emit_move_insn (tmp_reg, mem);
+ return;
+ }
+ else
+ abort ();
+ }
const_reg = gen_rtx_REG (GET_MODE (reg), temp);
/* If SIZE is negative, subtract the positive value.
@@ -5539,7 +5581,7 @@ sh_expand_prologue (void)
}
void
-sh_expand_epilogue (void)
+sh_expand_epilogue (bool sibcall_p)
{
HARD_REG_SET live_regs_mask;
int d, i;
@@ -5548,6 +5590,7 @@ sh_expand_epilogue (void)
int save_flags = target_flags;
int frame_size, save_size;
int fpscr_deferred = 0;
+ int e = sibcall_p ? -1 : 1;
d = calc_live_regs (&live_regs_mask);
@@ -5582,7 +5625,7 @@ sh_expand_epilogue (void)
if (frame_pointer_needed)
{
- output_stack_adjust (frame_size, frame_pointer_rtx, 1, &live_regs_mask);
+ output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
/* We must avoid moving the stack pointer adjustment past code
which reads from the local frame, else an interrupt could
@@ -5598,7 +5641,7 @@ sh_expand_epilogue (void)
occur after the SP adjustment and clobber data in the local
frame. */
emit_insn (gen_blockage ());
- output_stack_adjust (frame_size, stack_pointer_rtx, 1, &live_regs_mask);
+ output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
}
if (SHMEDIA_REGS_STACK_ADJUST ())
@@ -5771,7 +5814,7 @@ sh_expand_epilogue (void)
output_stack_adjust (extra_push + current_function_pretend_args_size
+ save_size + d_rounding
+ current_function_args_info.stack_regs * 8,
- stack_pointer_rtx, 1, NULL);
+ stack_pointer_rtx, e, NULL);
if (current_function_calls_eh_return)
emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
@@ -5799,7 +5842,7 @@ sh_need_epilogue (void)
rtx epilogue;
start_sequence ();
- sh_expand_epilogue ();
+ sh_expand_epilogue (0);
epilogue = get_insns ();
end_sequence ();
sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
diff -u3prN ORIG/gcc/gcc/config/sh/sh.md LOCAL/gcc/gcc/config/sh/sh.md
--- ORIG/gcc/gcc/config/sh/sh.md Tue Apr 6 13:53:26 2004
+++ LOCAL/gcc/gcc/config/sh/sh.md Thu May 6 13:53:45 2004
@@ -6467,7 +6467,7 @@
""
"
{
- sh_expand_epilogue ();
+ sh_expand_epilogue (1);
if (TARGET_SHCOMPACT)
{
rtx insn, set;
@@ -7348,7 +7348,7 @@ mov.l\\t1f,r0\\n\\
""
"
{
- sh_expand_epilogue ();
+ sh_expand_epilogue (0);
emit_jump_insn (gen_return ());
DONE;
}")