This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH/RFC] PR target/15130 SH: A tail call optimization
- From: Kaz Kojima <kkojima at rr dot iij4u dot or dot jp>
- To: amylaar at spamcop dot net
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Wed, 12 May 2004 21:49:02 +0900 (JST)
- Subject: Re: [PATCH/RFC] PR target/15130 SH: A tail call optimization
- References: <20040512123437.AA809227B3@meolyon.local>
amylaar@spamcop.net (Joern Rennecke) wrote:
>> and replacing 7 with temp in output_stack_adjust calls in
>> sh_expand_epilogue together with the push/pop sequence using
>> r4/r5 in output_stack_adjust for temp < 0 might be ok for 3.3.4.
>> Thoughts?
>
> This seems to make sense, except that for the last output_stack_adjust call -
> the one after restoring the registers - you have to pass -1 in the sibcall_p
> case.
Ah, I've missed it. How about the patch below?
I'm running make bootstrap on 3.3 with it.
Regards,
kaz
--
* config/sh/sh-protos.h (sh_expand_epilogue): Change prototype.
* config/sh/sh.c (output_stack_adjust): Generate a special
push/pop sequence when failing to get a temporary register for
non SHmedia epilogue.
(sh_expand_epilogue): Add an argument to show whether it's for
sibcall or not. Take the sibcall epilogue into account.
(sh_need_epilogue): Call sh_expand_epilogue with 0.
* config/sh/sh.md (sibcall_epilogue): Call sh_expand_epilogue
with 1.
(epilogue): Call sh_expand_epilogue with 0.
diff -uprN ORIG/gcc-3.3/gcc/config/sh/sh-protos.h LOCAL/gcc-3.3/gcc/config/sh/sh-protos.h
--- ORIG/gcc-3.3/gcc/config/sh/sh-protos.h 2004-01-13 02:03:24.000000000 +0900
+++ LOCAL/gcc-3.3/gcc/config/sh/sh-protos.h 2004-05-11 10:55:52.000000000 +0900
@@ -118,7 +118,7 @@ extern struct rtx_def *get_fpscr_rtx PAR
extern void output_file_start PARAMS ((FILE *));
extern int sh_media_register_for_return PARAMS ((void));
extern void sh_expand_prologue PARAMS ((void));
-extern void sh_expand_epilogue PARAMS ((void));
+extern void sh_expand_epilogue PARAMS ((bool));
extern int sh_need_epilogue PARAMS ((void));
extern int initial_elimination_offset PARAMS ((int, int));
extern int fldi_ok PARAMS ((void));
diff -uprN ORIG/gcc-3.3/gcc/config/sh/sh.c LOCAL/gcc-3.3/gcc/config/sh/sh.c
--- ORIG/gcc-3.3/gcc/config/sh/sh.c 2004-01-15 03:11:36.000000000 +0900
+++ LOCAL/gcc-3.3/gcc/config/sh/sh.c 2004-05-12 20:35:40.000000000 +0900
@@ -4277,7 +4277,55 @@ output_stack_adjust (size, reg, temp, em
register to MACL. However, there is currently no need
to handle this case, so just abort when we see it. */
if (temp < 0)
- abort ();
+ {
+ /* If we reached here, the most likely case is the (sibcall)
+ epilogue for non SHmedia. Put a special push/pop sequence
+ for such case as the last resort. This looks lengthy but
+ would not be problem because it seems to be very rare. */
+ if (! TARGET_SHMEDIA && (emit_fn != frame_insn))
+ {
+ rtx adj_reg, tmp_reg, mem;
+
+ /* ??? There is still the slight possibility that r4 or r5
+ have been reserved as fixed registers or assigned as
+ global registers, and they change during an interrupt.
+ There are possible ways to handle this:
+ - If we are adjusting the frame pointer (r14), we can do
+ with a single temp register and an ordinary push / pop
+ on the stack.
+ - Grab any call-used or call-saved registers (i.e. not
+ fixed or globals) for the temps we need. We might
+ also grab r14 if we are adjusting the stack pointer.
+ If we can't find enough available registers, issue
+ a diagnostic and abort - the user must have reserved
+ way too many registers.
+ But since all this is rather unlikely to happen and
+ would require extra testing, we just abort if r4 / r5
+ are not available. */
+ if (fixed_regs[4] || fixed_regs[5]
+ || global_regs[4] || global_regs[5])
+ abort ();
+
+ adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
+ tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
+ emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
+ emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
+ emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
+ mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
+ emit_move_insn (mem, tmp_reg);
+ emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
+ mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
+ emit_move_insn (mem, tmp_reg);
+ emit_move_insn (reg, adj_reg);
+ mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
+ emit_move_insn (adj_reg, mem);
+ mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
+ emit_move_insn (tmp_reg, mem);
+ return;
+ }
+ else
+ abort ();
+ }
const_reg = gen_rtx_REG (GET_MODE (reg), temp);
/* If SIZE is negative, subtract the positive value.
@@ -4843,7 +4891,7 @@ sh_expand_prologue ()
}
void
-sh_expand_epilogue ()
+sh_expand_epilogue (bool sibcall_p)
{
HOST_WIDE_INT live_regs_mask[(FIRST_PSEUDO_REGISTER + 31) / 32];
int d, i;
@@ -4851,9 +4899,22 @@ sh_expand_epilogue ()
int save_flags = target_flags;
int frame_size;
+ int temp;
calc_live_regs (&d, live_regs_mask);
+ if (! sibcall_p)
+ temp = 7;
+ else if (TARGET_SHMEDIA)
+ temp = 1;
+ else
+ {
+ for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
+ if (TEST_HARD_REG_BIT (live_regs_mask, i))
+ break;
+ temp = (i <= LAST_GENERAL_REG) ? i : -1;
+ }
+
if (TARGET_SH5 && d % (STACK_BOUNDARY / BITS_PER_UNIT))
d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
- d % (STACK_BOUNDARY / BITS_PER_UNIT));
@@ -4862,7 +4923,7 @@ sh_expand_epilogue ()
if (frame_pointer_needed)
{
- output_stack_adjust (frame_size, frame_pointer_rtx, 7, emit_insn);
+ output_stack_adjust (frame_size, frame_pointer_rtx, temp, emit_insn);
/* We must avoid moving the stack pointer adjustment past code
which reads from the local frame, else an interrupt could
@@ -4878,7 +4939,7 @@ sh_expand_epilogue ()
occur after the SP adjustment and clobber data in the local
frame. */
emit_insn (gen_blockage ());
- output_stack_adjust (frame_size, stack_pointer_rtx, 7, emit_insn);
+ output_stack_adjust (frame_size, stack_pointer_rtx, temp, emit_insn);
}
if (SHMEDIA_REGS_STACK_ADJUST ())
@@ -5054,7 +5115,7 @@ sh_expand_epilogue ()
output_stack_adjust (extra_push + current_function_pretend_args_size
+ d + d_rounding
+ current_function_args_info.stack_regs * 8,
- stack_pointer_rtx, 7, emit_insn);
+ stack_pointer_rtx, (sibcall_p ? -1 : temp), emit_insn);
/* Switch back to the normal stack if necessary. */
if (sp_switch)
@@ -5078,7 +5139,7 @@ sh_need_epilogue ()
rtx epilogue;
start_sequence ();
- sh_expand_epilogue ();
+ sh_expand_epilogue (0);
epilogue = get_insns ();
end_sequence ();
sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
diff -uprN ORIG/gcc-3.3/gcc/config/sh/sh.md LOCAL/gcc-3.3/gcc/config/sh/sh.md
--- ORIG/gcc-3.3/gcc/config/sh/sh.md 2004-01-13 02:03:25.000000000 +0900
+++ LOCAL/gcc-3.3/gcc/config/sh/sh.md 2004-05-11 10:55:52.000000000 +0900
@@ -6414,7 +6414,7 @@
""
"
{
- sh_expand_epilogue ();
+ sh_expand_epilogue (1);
if (TARGET_SHCOMPACT)
{
rtx insn, set;
@@ -7158,7 +7158,7 @@
""
"
{
- sh_expand_epilogue ();
+ sh_expand_epilogue (0);
emit_jump_insn (gen_return ());
DONE;
}")