This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH/RFC] PR target/15130 SH: A tail call optimization


> Oops.  With that patch
> 
> http://gcc.gnu.org/ml/gcc-patches/2004-05/msg00002.html
> 
> 3.4.0 fails in bootstrapping:
> 
> stage2/xgcc -Bstage2/ -B/usr/gnu/sh4-unknown-linux-gnu/bin/ -c   -g -O2 -DIN_GCC   -W -Wall -Wwrite-strings -Wstrict-prototypes -Wmissing-prototypes -pedantic -Wno-long-long -Wold-style-definition     -DHAVE_CONFIG_H -DGENERATOR_FILE    -I. -I. -I../../LOCAL/gcc-3.4.0/gcc -I../../LOCAL/gcc-3.4.0/gcc/. -I../../LOCAL/gcc-3.4.0/gcc/../include  ../../LOCAL/gcc-3.4.0/gcc/gensupport.c -o gensupport.o
> ../../LOCAL/gcc-3.4.0/gcc/gensupport.c: In function `shift_output_template':
> ../../LOCAL/gcc-3.4.0/gcc/gensupport.c:678: internal compiler error: Segmentation fault
> 
> Removing r0 and r1 from the candidate for the temporary register in
> epilogue case fixes this failure.  It seems that there is something
> I'm missing.

It seems that another latent problem causes this failure.  An optimized
tail call uses one of SIBCALL_REGS for jump target and the current code
doesn't take it into account.  We didn't see the problem before because
it tends to use R1/R0 for the tail call and R7 for the temporary constant.
It would be hard to predict the safe general register for such situation
except for SHmedia.  The attached patch uses MACL to save the general
register as suggested by the original comment of output_stack_ajust.
It looks that the change of the search order in scavenge_reg makes
more chances that that saving with MACL is optimized away, though it
would slow scavenge_reg somehow.
It's tested with bootstrapping 3.4.0 and regression tests for 3.4.0 on
the native sh4-unknown-linux-gnu.  With the same patch, there are no new
failures on i686-linux cross to sh4-/sh64-unknown-linux-gnu.

Regards,
	kaz
--
	* config/sh/sh.c (output_stack_adjust): Get a temporary	register
	by scan for epilogue when sibling call optimization is enabled.
	Take account of non safe registers for that case.  Save a temporary
	register to MACL if needed.  Compute the correct number of general
	registers for the return value.
	(scavenge_reg): Reverse the search order.

--- ORIG/gcc-3.4.0/gcc/config/sh/sh.c	2004-03-09 12:00:12.000000000 +0900
+++ LOCAL/gcc-3.4.0/gcc/config/sh/sh.c	2004-05-04 10:52:12.000000000 +0900
@@ -4565,13 +4565,15 @@ output_stack_adjust (int size, rtx reg, 
 	{
 	  rtx const_reg;
 	  rtx insn;
+	  rtx save_reg;
 	  int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
 	  int i;
 
 	  /* If TEMP is invalid, we could temporarily save a general
 	     register to MACL.  However, there is currently no need
 	     to handle this case, so just abort when we see it.  */
-	  if (current_function_interrupt
+	  if ((epilogue_p && flag_optimize_sibling_calls)
+	      || current_function_interrupt
 	      || ! call_used_regs[temp] || fixed_regs[temp])
 	    temp = -1;
 	  if (temp < 0 && ! current_function_interrupt)
@@ -4581,7 +4583,15 @@ output_stack_adjust (int size, rtx reg, 
 	      AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
 	      if (epilogue_p)
 		{
-		  for (i = 0; i < HARD_REGNO_NREGS (FIRST_RET_REG, DImode); i++)
+		  int nreg = 0;
+		  if (current_function_return_rtx)
+		    {
+		      enum machine_mode mode;
+		      mode = GET_MODE (current_function_return_rtx);
+		      if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
+			nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
+		    }
+		  for (i = 0; i < nreg; i++)
 		    CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
 		  if (current_function_calls_eh_return)
 		    {
@@ -4589,8 +4599,21 @@ output_stack_adjust (int size, rtx reg, 
 		      for (i = 0; i <= 3; i++)
 			CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
 		    }
+		  if (flag_optimize_sibling_calls)
+		    {
+		      /* For SHcompact, R0 may be used for another purposes
+			 and if it's the case, MACL will be used to save R0.
+			 See sibcall_epilogue in sh.md.  Using R0 here will
+			 confuse that process.  */
+		      if (TARGET_SHCOMPACT)
+			CLEAR_HARD_REG_BIT (temps, 0);
+		      /* For SHmedia, SIBCALL_REGS are target registers.  */
+		      else if (TARGET_SHMEDIA)
+			for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
+			  CLEAR_HARD_REG_BIT (temps, i);
+		    }
 		}
-	      else
+	      if (! epilogue_p || flag_optimize_sibling_calls)
 		{
 		  for (i = FIRST_PARM_REG;
 		       i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
@@ -4606,6 +4629,22 @@ output_stack_adjust (int size, rtx reg, 
 	    abort ();
 	  const_reg = gen_rtx_REG (GET_MODE (reg), temp);
 
+	  /* Optimized tail call may use one of SIBCALL_REGS which is
+	     hard to predict here.  We emit the code to save a general
+	     register to MACL and expect that they will be optimized
+	     away.  */
+	  if (! TARGET_SHMEDIA
+	      && epilogue_p && flag_optimize_sibling_calls)
+	    save_reg = gen_rtx_REG (Pmode, MACL_REG);
+	  else
+	    save_reg = NULL_RTX;
+	  if (save_reg)
+	    {
+	      rtx t = emit_move_insn (save_reg, const_reg);
+	      REG_NOTES (t) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
+						 REG_NOTES (t));
+	    }
+
 	  /* If SIZE is negative, subtract the positive value.
 	     This sometimes allows a constant pool entry to be shared
 	     between prologue and epilogue code.  */
@@ -4619,6 +4658,12 @@ output_stack_adjust (int size, rtx reg, 
 	      emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
 	      insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
 	    }
+	  if (save_reg)
+	    {
+	      rtx t = emit_move_insn (const_reg, save_reg);
+	      REG_NOTES (t) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
+						 REG_NOTES (t));
+	    }
 	  if (! epilogue_p)
 	    REG_NOTES (insn)
 	      = (gen_rtx_EXPR_LIST
@@ -9114,7 +9159,10 @@ static int
 scavenge_reg (HARD_REG_SET *s)
 {
   int r;
-  for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
+  /* Searching from the large number register would reduce the posibility
+     that the resulting register is used for the other purpose like as
+     the sibling call.  */
+  for (r = LAST_GENERAL_REG; r >= FIRST_GENERAL_REG; r--)
     if (TEST_HARD_REG_BIT (*s, r))
       return r;
   return -1;


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]