This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH/RFC] PR target/15130 SH: A tail call optimization


amylaar@spamcop.net (Joern Rennecke) wrote:
> So, as it stands, we have to assume that all registers in
> reg_class_contents[SIBCALL_REGS] are used, and also all the argument passing
> registers - leaving no call-clobbered register to allocate our temporary in
> for SH1..SH4.
> OTOH, prior to the final adjustment, if any general purpose register was saved,
> it is available.  And the final adjustment shouldn't actually need a
> temporary, because the adjustment is of limited size.
> So it is rather unlikely that we'll be short of a temporary.
> 
> so possibilities are
[snip]
> 4) Use a push/pop sequence, except when not using a frame pointer,
>    you'll need to push one register, use that to calculate an address to
>    save a second one at the bottom of the to-be-discarded frame, pop
>    the saved value of the first reg into the second, store in at the bottom
>    of the frame too, do the adjustment, and then pop both registers.
>    Again, this is pretty grotty code - even worse than 3) - but it doesn't
>    affect any other parts of the compiler, and it should be simpler to
>    implement.  I.e.:
>    mov r4,@r15
>    mov adjust,r4
>    add r15,r4
>    mov r5,@-r4
>    mov @r15,r5
>    mov r5,@-r4
>    mov r4,r15
>    mov @r15+,r4
>    mov @r15+,r5
> 
> Considering that it is very rare that we have a large frame, but save no
> general purpose registers, I think we should go with 3) or 4) .

Sounds reasonable.  How about the appended patch?
It scans live registers if it's for sibcall epilogues and uses (4)
when the scan fails in SH1-4 case.
For SHmedia, it first tries to find the candidate for a temporary
from the usable registers and then from live registers.  It would be
enough for SHmedia.
It's regtested on mainline for sh4-/sh64-linux. The bootstrap on
3.4.0 isn't complete yet, but compilers have been built successfully.

Regards,
	kaz
--
	* config/sh/sh-protos.h (sh_expand_epilogue): Change prototype.
	* config/sh/sh.c (output_stack_adjust): Take the sibcall epilogue
	into account.  Compute the correct number of general registers
	for the return value.  Generate a special push/pop sequence when
	failing to get a temporary register for	non SHmedia epilogue.
	(sh_expand_epilogue): Add an argument to show whether it's for
	sibcall or not.  Set the 3rd argument of output_stack_adjust to
	-1 if needed.
	(sh_need_epilogue): Call sh_expand_epilogue with 0.
	* config/sh/sh.md (sibcall_epilogue): Call sh_expand_epilogue
	with 1.
	(epilogue): Call sh_expand_epilogue with 0.

diff -u3prN ORIG/gcc/gcc/config/sh/sh-protos.h LOCAL/gcc/gcc/config/sh/sh-protos.h
--- ORIG/gcc/gcc/config/sh/sh-protos.h	Tue Jan 13 22:31:51 2004
+++ LOCAL/gcc/gcc/config/sh/sh-protos.h	Thu May  6 13:53:17 2004
@@ -109,7 +109,7 @@ extern int sh_handle_pragma (int (*)(voi
 extern struct rtx_def *get_fpscr_rtx (void);
 extern int sh_media_register_for_return (void);
 extern void sh_expand_prologue (void);
-extern void sh_expand_epilogue (void);
+extern void sh_expand_epilogue (bool);
 extern int sh_need_epilogue (void);
 extern void sh_set_return_address (rtx, rtx);
 extern int initial_elimination_offset (int, int);
diff -u3prN ORIG/gcc/gcc/config/sh/sh.c LOCAL/gcc/gcc/config/sh/sh.c
--- ORIG/gcc/gcc/config/sh/sh.c	Tue Apr  6 13:53:26 2004
+++ LOCAL/gcc/gcc/config/sh/sh.c	Thu May  6 17:31:09 2004
@@ -4631,8 +4631,9 @@ static int extra_push;
 
 /* Adjust the stack by SIZE bytes.  REG holds the rtl of the register to be
    adjusted.  If epilogue_p is zero, this is for a prologue; otherwise, it's
-   for an epilogue.  If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET
-   of all the registers that are about to be restored, and hence dead.  */
+   for an epilogue and the minus value means that it's for sibcall.  If
+   LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of all the
+   registers that are about to be restored, and hence dead.  */
 
 static void
 output_stack_adjust (int size, rtx reg, int epilogue_p,
@@ -4667,17 +4668,27 @@ output_stack_adjust (int size, rtx reg, 
 	  /* If TEMP is invalid, we could temporarily save a general
 	     register to MACL.  However, there is currently no need
 	     to handle this case, so just abort when we see it.  */
-	  if (current_function_interrupt
+	  if (epilogue_p < 0
+	      || current_function_interrupt
 	      || ! call_used_regs[temp] || fixed_regs[temp])
 	    temp = -1;
-	  if (temp < 0 && ! current_function_interrupt)
+	  if (temp < 0 && ! current_function_interrupt
+	      && (TARGET_SHMEDIA || epilogue_p >= 0))
 	    {
 	      HARD_REG_SET temps;
 	      COPY_HARD_REG_SET (temps, call_used_reg_set);
 	      AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
 	      if (epilogue_p)
 		{
-		  for (i = 0; i < HARD_REGNO_NREGS (FIRST_RET_REG, DImode); i++)
+		  int nreg = 0;
+		  if (current_function_return_rtx)
+		    {
+		      enum machine_mode mode;
+		      mode = GET_MODE (current_function_return_rtx);
+		      if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
+			nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
+		    }
+		  for (i = 0; i < nreg; i++)
 		    CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
 		  if (current_function_calls_eh_return)
 		    {
@@ -4685,8 +4696,11 @@ output_stack_adjust (int size, rtx reg, 
 		      for (i = 0; i <= 3; i++)
 			CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
 		    }
+		  if (TARGET_SHMEDIA && epilogue_p < 0)
+		    for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
+		      CLEAR_HARD_REG_BIT (temps, i);
 		}
-	      else
+	      if (epilogue_p <= 0)
 		{
 		  for (i = FIRST_PARM_REG;
 		       i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
@@ -4699,7 +4713,35 @@ output_stack_adjust (int size, rtx reg, 
 	  if (temp < 0 && live_regs_mask)
 	    temp = scavenge_reg (live_regs_mask);
 	  if (temp < 0)
-	    abort ();
+	    {
+	      /* If we reached here, the most likely case is the (sibcall)
+		 epilogue for non SHmedia.  Put a special push/pop sequence
+		 for such case as the last resort.  This looks lengthy but
+		 would not be problem because it seems to be very rare.  */
+	      if (! TARGET_SHMEDIA && epilogue_p)
+		{
+		  rtx adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
+		  rtx tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
+		  rtx mem;
+
+		  emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
+		  emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
+		  emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
+		  mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
+		  emit_move_insn (mem, tmp_reg);
+		  emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
+		  mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
+		  emit_move_insn (mem, tmp_reg);
+		  emit_move_insn (reg, adj_reg);
+		  mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
+		  emit_move_insn (adj_reg, mem);
+		  mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
+		  emit_move_insn (tmp_reg, mem);
+		  return;
+		}
+	      else
+		abort ();
+	    }
 	  const_reg = gen_rtx_REG (GET_MODE (reg), temp);
 
 	  /* If SIZE is negative, subtract the positive value.
@@ -5539,7 +5581,7 @@ sh_expand_prologue (void)
 }
 
 void
-sh_expand_epilogue (void)
+sh_expand_epilogue (bool sibcall_p)
 {
   HARD_REG_SET live_regs_mask;
   int d, i;
@@ -5548,6 +5590,7 @@ sh_expand_epilogue (void)
   int save_flags = target_flags;
   int frame_size, save_size;
   int fpscr_deferred = 0;
+  int e = sibcall_p ? -1 : 1;
 
   d = calc_live_regs (&live_regs_mask);
 
@@ -5582,7 +5625,7 @@ sh_expand_epilogue (void)
 
   if (frame_pointer_needed)
     {
-      output_stack_adjust (frame_size, frame_pointer_rtx, 1, &live_regs_mask);
+      output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
 
       /* We must avoid moving the stack pointer adjustment past code
 	 which reads from the local frame, else an interrupt could
@@ -5598,7 +5641,7 @@ sh_expand_epilogue (void)
 	 occur after the SP adjustment and clobber data in the local
 	 frame.  */
       emit_insn (gen_blockage ());
-      output_stack_adjust (frame_size, stack_pointer_rtx, 1, &live_regs_mask);
+      output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
     }
 
   if (SHMEDIA_REGS_STACK_ADJUST ())
@@ -5771,7 +5814,7 @@ sh_expand_epilogue (void)
   output_stack_adjust (extra_push + current_function_pretend_args_size
 		       + save_size + d_rounding
 		       + current_function_args_info.stack_regs * 8,
-		       stack_pointer_rtx, 1, NULL);
+		       stack_pointer_rtx, e, NULL);
 
   if (current_function_calls_eh_return)
     emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
@@ -5799,7 +5842,7 @@ sh_need_epilogue (void)
       rtx epilogue;
 
       start_sequence ();
-      sh_expand_epilogue ();
+      sh_expand_epilogue (0);
       epilogue = get_insns ();
       end_sequence ();
       sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
diff -u3prN ORIG/gcc/gcc/config/sh/sh.md LOCAL/gcc/gcc/config/sh/sh.md
--- ORIG/gcc/gcc/config/sh/sh.md	Tue Apr  6 13:53:26 2004
+++ LOCAL/gcc/gcc/config/sh/sh.md	Thu May  6 13:53:45 2004
@@ -6467,7 +6467,7 @@
   ""
   "
 {
-  sh_expand_epilogue ();
+  sh_expand_epilogue (1);
   if (TARGET_SHCOMPACT)
     {
       rtx insn, set;
@@ -7348,7 +7348,7 @@ mov.l\\t1f,r0\\n\\
   ""
   "
 {
-  sh_expand_epilogue ();
+  sh_expand_epilogue (0);
   emit_jump_insn (gen_return ());
   DONE;
 }")


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]