This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RS6000] Stack tie fix.


Hello Alan,

Thanks a lot for following up on this one. Coincidentally, I was just
about to submit the alternate approach we had discussed about, after
David's comment at http://gcc.gnu.org/ml/gcc-patches/2011-11/msg01842.html.

We have experienced with it for a while on our gcc-4.5 series of
compilers and we are working on a transition to gcc 4.7.

This is of course a much heavier hammer so it would be nice if we can
indeed have a subtler way out :-)

Olivier

On Apr 3, 2012, at 10:40 , Alan Modra wrote:

> This patch merges the rs6000 stack_tie and frame_tie rtl, so that we
> now should use a tie insn that mentions all base regs involved in
> register saves and restores.  That should avoid any alias analysis
> problems eg. http://gcc.gnu.org/ml/gcc-patches/2011-11/msg01156.html.
> I chose to put the regs of interest on the destination of the fake
> set, rather than in the source as is currently done, because I figure
> that relying on the compiler to not reorder reads is fragile.  Here's
> an example of the new stack tie:
> 
> (set (mem:BLK (unspec:SI [(reg:SI 1) (reg:SI 12)] UNSPEC_TIE))
>     (const_int 0))
> 
> Some notes:
> - I tried putting the mem on both source and destination of the set,
>  but that runs afoul of rtl dead code elimination.
> - The rs6000_emit_epilogue places that called gen_frame_tie and
>  gen_stack_tie both used alias set 0 mems.  I believe this was
>  unnecessarily restrictive.
> - CODE_FOR_stack_tie is mentioned in rs6000.c but not
>  CODE_FOR_frame_tie.  Removing frame_tie fixes this sched bug too.
> 
> Bootstrapped and regression tested powerpc-linux with usual -O2
> BOOT_CFLAGS and also -Os, and testcases from pr44199, pr30282, pr52828,
> the url above and http://gcc.gnu.org/ml/gcc/2011-03/msg00123.html
> examined for sanity.  OK to apply?
> 
> 	PR target/52828
> 	* config/rs6000/rs6000.c (rs6000_emit_stack_tie): Rewrite with
> 	tie regs on destination of set.  Delete forward declaration.
> 	(rs6000_emit_stack_reset): Update rs6000_emit_stack_tie calls.
> 	(rs6000_emit_prologue): Likewise.
> 	(rs6000_emit_epilogue): Likewise.  Use in place of gen_frame_tie
> 	and gen_stack_tie.
> 	* config/rs6000/predicates.md (tie_operand): New.
> 	* config/rs6000/rs6000.md (restore_stack_block): Generate new
> 	stack tie rtl.
> 	(restore_stack_nonlocal): Likewise.
> 	(stack_tie): Update.
> 	(frame_tie): Delete.
> 
> Index: gcc/config/rs6000/rs6000.c
> ===================================================================
> --- gcc/config/rs6000/rs6000.c	(revision 185830)
> +++ gcc/config/rs6000/rs6000.c	(working copy)
> @@ -925,7 +925,6 @@ static const char *rs6000_invalid_within_doloop (c
> static bool rs6000_legitimate_address_p (enum machine_mode, rtx, bool);
> static bool rs6000_debug_legitimate_address_p (enum machine_mode, rtx, bool);
> static rtx rs6000_generate_compare (rtx, enum machine_mode);
> -static void rs6000_emit_stack_tie (void);
> static bool spe_func_has_64bit_regs_p (void);
> static rtx gen_frame_mem_offset (enum machine_mode, rtx, int);
> static unsigned rs6000_hash_constant (rtx);
> @@ -18517,12 +18516,28 @@ rs6000_aix_asm_output_dwarf_table_ref (char * fram
>    and the change to the stack pointer.  */
> 
> static void
> -rs6000_emit_stack_tie (void)
> +rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
> {
> -  rtx mem = gen_frame_mem (BLKmode,
> -			   gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
> +  rtx u;
> +  rtvec p;
> +  int i;
> 
> -  emit_insn (gen_stack_tie (mem));
> +  if (REGNO (fp) == STACK_POINTER_REGNUM
> +      || (hard_frame_needed
> +	  && REGNO (fp) == HARD_FRAME_POINTER_REGNUM))
> +    fp = NULL_RTX;
> +
> +  p = rtvec_alloc (1 + hard_frame_needed + (fp != NULL_RTX));
> +
> +  i = 0;
> +  RTVEC_ELT (p, i++) = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
> +  if (hard_frame_needed)
> +    RTVEC_ELT (p, i++) = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
> +  if (fp != NULL_RTX)
> +    RTVEC_ELT (p, i++) = fp;
> +  u = gen_rtx_UNSPEC (Pmode, p, UNSPEC_TIE);
> +
> +  emit_insn (gen_stack_tie (gen_frame_mem (BLKmode, u)));
> }
> 
> /* Emit the correct code for allocating stack space, as insns.
> @@ -19142,7 +19157,7 @@ rs6000_emit_stack_reset (rs6000_stack_t *info,
>       || (TARGET_SPE_ABI
> 	  && info->spe_64bit_regs_used != 0
> 	  && info->first_gp_reg_save != 32))
> -    rs6000_emit_stack_tie ();
> +    rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
> 
>   if (frame_reg_rtx != sp_reg_rtx)
>     {
> @@ -19362,7 +19377,7 @@ rs6000_emit_prologue (void)
> 	}
>       rs6000_emit_allocate_stack (info->total_size, copy_reg);
>       if (frame_reg_rtx != sp_reg_rtx)
> -	rs6000_emit_stack_tie ();
> +	rs6000_emit_stack_tie (frame_reg_rtx, false);
>     }
> 
>   /* Handle world saves specially here.  */
> @@ -19866,7 +19881,7 @@ rs6000_emit_prologue (void)
> 	sp_offset = info->total_size;
>       rs6000_emit_allocate_stack (info->total_size, copy_reg);
>       if (frame_reg_rtx != sp_reg_rtx)
> -	rs6000_emit_stack_tie ();
> +	rs6000_emit_stack_tie (frame_reg_rtx, false);
>     }
> 
>   /* Set frame pointer, if needed.  */
> @@ -20437,13 +20452,7 @@ rs6000_emit_epilogue (int sibcall)
>       /* Prevent reordering memory accesses against stack pointer restore.  */
>       else if (cfun->calls_alloca
> 	       || offset_below_red_zone_p (-info->total_size))
> -	{
> -	  rtx mem1 = gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx);
> -	  rtx mem2 = gen_rtx_MEM (BLKmode, sp_reg_rtx);
> -	  MEM_NOTRAP_P (mem1) = 1;
> -	  MEM_NOTRAP_P (mem2) = 1;
> -	  emit_insn (gen_frame_tie (mem1, mem2));
> -	}
> +	rs6000_emit_stack_tie (frame_reg_rtx, true);
> 
>       insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
> 				       GEN_INT (info->total_size)));
> @@ -20456,11 +20465,7 @@ rs6000_emit_epilogue (int sibcall)
>       /* Prevent reordering memory accesses against stack pointer restore.  */
>       if (cfun->calls_alloca
> 	  || offset_below_red_zone_p (-info->total_size))
> -	{
> -	  rtx mem = gen_rtx_MEM (BLKmode, sp_reg_rtx);
> -	  MEM_NOTRAP_P (mem) = 1;
> -	  emit_insn (gen_stack_tie (mem));
> -	}
> +	rs6000_emit_stack_tie (frame_reg_rtx, false);
>       insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
> 				       GEN_INT (info->total_size)));
>       sp_offset = 0;
> Index: gcc/config/rs6000/predicates.md
> ===================================================================
> --- gcc/config/rs6000/predicates.md	(revision 185830)
> +++ gcc/config/rs6000/predicates.md	(working copy)
> @@ -1451,3 +1451,11 @@
> 
>   return 1;
> })
> +
> +;; Return 1 if OP is a stack tie operand.
> +(define_predicate "tie_operand"
> +  (match_code "mem")
> +{
> +  return (GET_CODE (XEXP (op, 0)) == UNSPEC
> +	  && XINT (XEXP (op, 0), 1) == UNSPEC_TIE);
> +})
> Index: gcc/config/rs6000/rs6000.md
> ===================================================================
> --- gcc/config/rs6000/rs6000.md	(revision 185830)
> +++ gcc/config/rs6000/rs6000.md	(working copy)
> @@ -11989,17 +11989,20 @@
> (define_expand "restore_stack_block"
>   [(set (match_dup 2) (match_dup 3))
>    (set (match_dup 4) (match_dup 2))
> -   (set (match_dup 5) (unspec:BLK [(match_dup 5)] UNSPEC_TIE))
> +   (set (match_dup 5) (const_int 0))
>    (set (match_operand 0 "register_operand" "")
> 	(match_operand 1 "register_operand" ""))]
>   ""
>   "
> {
> +  rtx u;
> +
>   operands[1] = force_reg (Pmode, operands[1]);
>   operands[2] = gen_reg_rtx (Pmode);
>   operands[3] = gen_frame_mem (Pmode, operands[0]);
>   operands[4] = gen_frame_mem (Pmode, operands[1]);
> -  operands[5] = gen_frame_mem (BLKmode, operands[0]);
> +  u = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, operands[0]), UNSPEC_TIE);
> +  operands[5] = gen_frame_mem (BLKmode, u);
> }")
> 
> (define_expand "save_stack_nonlocal"
> @@ -12022,12 +12025,13 @@
>   [(set (match_dup 2) (match_operand 1 "memory_operand" ""))
>    (set (match_dup 3) (match_dup 4))
>    (set (match_dup 5) (match_dup 2))
> -   (set (match_dup 6) (unspec:BLK [(match_dup 6)] UNSPEC_TIE))
> +   (set (match_dup 6) (const_int 0))
>    (set (match_operand 0 "register_operand" "") (match_dup 3))]
>   ""
>   "
> {
>   int units_per_word = (TARGET_32BIT) ? 4 : 8;
> +  rtx u;
> 
>   /* Restore the backchain from the first word, sp from the second.  */
>   operands[2] = gen_reg_rtx (Pmode);
> @@ -12035,7 +12039,8 @@
>   operands[1] = adjust_address_nv (operands[1], Pmode, 0);
>   operands[4] = adjust_address_nv (operands[1], Pmode, units_per_word);
>   operands[5] = gen_frame_mem (Pmode, operands[3]);
> -  operands[6] = gen_frame_mem (BLKmode, operands[0]);
> +  u = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, operands[0]), UNSPEC_TIE);
> +  operands[6] = gen_frame_mem (BLKmode, u);
> }")
> 
> ;; TOC register handling.
> @@ -15899,25 +15904,15 @@
>   [(set_attr "type" "branch")
>    (set_attr "length" "4")])
> 
> -; These are to explain that changes to the stack pointer should
> -; not be moved over stores to stack memory.
> +; This is to explain that changes to the stack pointer should
> +; not be moved over loads from or stores to stack memory.
> (define_insn "stack_tie"
> -  [(set (match_operand:BLK 0 "memory_operand" "+m")
> -        (unspec:BLK [(match_dup 0)] UNSPEC_TIE))]
> +  [(set (match_operand:BLK 0 "tie_operand" "")
> +	(const_int 0))]
>   ""
>   ""
>   [(set_attr "length" "0")])
> 
> -; Like stack_tie, but depend on both fp and sp based memory.
> -(define_insn "frame_tie"
> -  [(set (match_operand:BLK 0 "memory_operand" "+m")
> -	(unspec:BLK [(match_dup 0)
> -		     (match_operand:BLK 1 "memory_operand" "m")] UNSPEC_TIE))]
> -  ""
> -  ""
> -  [(set_attr "length" "0")])
> -
> -
> (define_expand "epilogue"
>   [(use (const_int 0))]
>   ""
> 
> -- 
> Alan Modra
> Australia Development Lab, IBM
> 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]