This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] x86: Don't use get_frame_size to finalize stack frame


On 12/15/18, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Fri, Dec 14, 2018 at 04:04:02PM -0800, H.J. Lu wrote:
>> On Fri, Dec 14, 2018 at 3:24 PM Jeff Law <law@redhat.com> wrote:
>> >
>> > On 12/14/18 4:01 PM, H.J. Lu wrote:
>> > > On Thu, Dec 13, 2018 at 11:11 PM Uros Bizjak <ubizjak@gmail.com>
>> > > wrote:
>> > >> On Thu, Dec 13, 2018 at 6:36 PM H.J. Lu <hongjiu.lu@intel.com>
>> > >> wrote:
>> > >>> get_frame_size () returns used stack slots during compilation,
>> > >>> which
>> > >>> may be optimized out later.  Since
>> > >>> ix86_find_max_used_stack_alignment
>> > >>> is called by ix86_finalize_stack_frame_flags to check if stack
>> > >>> frame
>> > >>> is required, there is no need to call get_frame_size () which may
>> > >>> give
>> > >>> inaccurate final stack frame size.
>> > >>>
>> > >>> Tested on AVX512 machine configured with
>> > >>>
>> > >>> --with-arch=native --with-cpu=native
>> > >>>
>> > >>> OK for trunk?
>> > >>>
>> > >>>
>> > >>> H.J.
>> > >>> ---
>> > >>> gcc/
>> > >>>
>> > >>>         PR target/88483
>> > >>>         * config/i386/i386.c (ix86_finalize_stack_frame_flags):
>> > >>> Don't
>> > >>>         use get_frame_size ().
>> > >>>
>> > >>> gcc/testsuite/
>> > >>>
>> > >>>         PR target/88483
>> > >>>         * gcc.target/i386/stackalign/pr88483.c: New test.
>> > >> LGTM, but you know this part of the compiler better than I.
>> > >>
>> > >> Thanks,
>> > >> Uros.
>> > >>
>> > >>> ---
>> > >>>  gcc/config/i386/i386.c                          |  1 -
>> > >>>  .../gcc.target/i386/stackalign/pr88483.c        | 17
>> > >>> +++++++++++++++++
>> > >>>  2 files changed, 17 insertions(+), 1 deletion(-)
>> > >>>  create mode 100644
>> > >>> gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
>> > >>>
>> > >>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> > >>> index caa701fe242..edc8f4f092e 100644
>> > >>> --- a/gcc/config/i386/i386.c
>> > >>> +++ b/gcc/config/i386/i386.c
>> > >>> @@ -12876,7 +12876,6 @@ ix86_finalize_stack_frame_flags (void)
>> > >>>            && flag_exceptions
>> > >>>            && cfun->can_throw_non_call_exceptions)
>> > >>>        && !ix86_frame_pointer_required ()
>> > >>> -      && get_frame_size () == 0
>> > >>>        && ix86_nsaved_sseregs () == 0
>> > >>>        && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
>> > >>>      {
>> > >>> diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
>> > >>> b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
>> > >>> new file mode 100644
>> > >>> index 00000000000..5aec8fd4cf6
>> > >>> --- /dev/null
>> > >>> +++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
>> > >>> @@ -0,0 +1,17 @@
>> > >>> +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
>> > >>> +/* { dg-options "-O2 -mavx2" } */
>> > >>> +
>> > >>> +struct B
>> > >>> +{
>> > >>> +  char a[12];
>> > >>> +  int b;
>> > >>> +};
>> > >>> +
>> > >>> +struct B
>> > >>> +f2 (void)
>> > >>> +{
>> > >>> +  struct B x = {};
>> > >>> +  return x;
>> > >>> +}
>> > >>> +
>> > >>> +/* { dg-final { scan-assembler-not
>> > >>> "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */
>> > >>> --
>> > >>> 2.19.2
>> > >>>
>> > > My fix triggered a latent bug in ix86_find_max_used_stack_alignment.
>> > > Here is the fix.  OK for trunk?
>> > >
>> > > Thanks.
>> > >
>> > > -- H.J.
>> > >
>> > >
>> > > 0001-x86-Properly-check-stack-reference.patch
>> > >
>> > > From 83f0b37f287ed198a3b50e2be6b0f7f5c154020e Mon Sep 17 00:00:00
>> > > 2001
>> > > From: "H.J. Lu" <hjl.tools@gmail.com>
>> > > Date: Fri, 14 Dec 2018 12:21:02 -0800
>> > > Subject: [PATCH] x86: Properly check stack reference
>> > >
>> > > A latent bug in ix86_find_max_used_stack_alignment was uncovered by
>> > > the
>> > > fix for PR target/88483, which caused:
>> > >
>> > > FAIL: gcc.target/i386/incoming-8.c scan-assembler andl[\\t
>> > > ]*\\$-16,[\\t ]*%esp
>> > >
>> > > on i386.  ix86_find_max_used_stack_alignment failed to notice stack
>> > > reference via non-stack/frame registers and missed stack alignment
>> > > requirement.  We should track all registers which may reference stack
>> > > by checking registers set from stack referencing registers.
>> > >
>> > > Tested on i686 and x86-64 with
>> > >
>> > > --with-arch=native --with-cpu=native
>> > >
>> > > on AVX512 machine.  Tested on i686 and x86-64 without
>> > >
>> > > --with-arch=native --with-cpu=native
>> > >
>> > > on x86-64 machine.
>> > >
>> > >       PR target/88483
>> > >       * config/i386/i386.c (ix86_stack_referenced_p): New function.
>> > >       (ix86_find_max_used_stack_alignment): Call
>> > > ix86_stack_referenced_p
>> > >       to check if stack is referenced.
>> > > ---
>> > >  gcc/config/i386/i386.c | 43
>> > > ++++++++++++++++++++++++++++++++++++++----
>> > >  1 file changed, 39 insertions(+), 4 deletions(-)
>> > >
>> > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> > > index 4599ca2a7d5..bf93ec3722f 100644
>> > > --- a/gcc/config/i386/i386.c
>> > > +++ b/gcc/config/i386/i386.c
>> > > @@ -12777,6 +12777,18 @@ output_probe_stack_range (rtx reg, rtx end)
>> > >    return "";
>> > >  }
>> > >
>> > > +/* Return true if OP references stack frame though one of registers
>> > > +   in STACK_REF_REGS.  */
>> > > +
>> > > +static bool
>> > > +ix86_stack_referenced_p (const_rtx op, rtx *stack_ref_regs)
>> > > +{
>> > > +  for (int i = 0; i < LAST_REX_INT_REG; i++)
>> > > +    if (stack_ref_regs[i] && reg_mentioned_p (stack_ref_regs[i],
>> > > op))
>> > > +      return true;
>> > > +  return false;
>> > > +}
>> > > +
>> > >  /* Return true if stack frame is required.  Update STACK_ALIGNMENT
>> > >     to the largest alignment, in bits, of stack slot used if stack
>> > >     frame is required and CHECK_STACK_SLOT is true.  */
>> > > @@ -12801,6 +12813,12 @@ ix86_find_max_used_stack_alignment (unsigned
>> > > int &stack_alignment,
>> > >
>> > >    bool require_stack_frame = false;
>> > >
>> > > +  /* Array of hard registers which reference stack frame.  */
>> > > +  rtx stack_ref_regs[LAST_REX_INT_REG];
>> > > +  memset (stack_ref_regs, 0, sizeof (stack_ref_regs));
>> > > +  stack_ref_regs[STACK_POINTER_REGNUM] = stack_pointer_rtx;
>> > > +  stack_ref_regs[FRAME_POINTER_REGNUM] = frame_pointer_rtx;
>> > > +
>> > >    FOR_EACH_BB_FN (bb, cfun)
>> > >      {
>> > >        rtx_insn *insn;
>> > > @@ -12811,16 +12829,33 @@ ix86_find_max_used_stack_alignment (unsigned
>> > > int &stack_alignment,
>> > >         {
>> > >           require_stack_frame = true;
>> > >
>> > > +         rtx body = PATTERN (insn);
>> > > +         if (GET_CODE (body) == SET)
>> > > +           {
>> > > +             /* If a register is set from a stack referencing
>> > > +                register, it is a stack referencing register.  */
>> > > +             rtx dest = SET_DEST (body);
>> > > +             if (REG_P (dest) && !MEM_P (SET_SRC (body)))
>> > > +               {
>> > > +                 int regno = REGNO (dest);
>> > > +                 gcc_assert (regno < FIRST_PSEUDO_REGISTER);
>> > > +                 if (GENERAL_REGNO_P (regno))
>> > > +                   {
>> > > +                     add_to_hard_reg_set (&set_up_by_prologue,
>> > > Pmode,
>> > > +                                          regno);
>> > > +                     stack_ref_regs[regno] = dest;
>> > > +                   }
>> > > +               }
>> > > +           }
>> > > +
>> > >           if (check_stack_slot)
>> > >             {
>> > >               /* Find the maximum stack alignment.  */
>> > >               subrtx_iterator::array_type array;
>> > >               FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
>> > >                 if (MEM_P (*iter)
>> > > -                   && (reg_mentioned_p (stack_pointer_rtx,
>> > > -                                        *iter)
>> > > -                       || reg_mentioned_p (frame_pointer_rtx,
>> > > -                                           *iter)))
>> > > +                   && ix86_stack_referenced_p (*iter,
>> > > +                                               stack_ref_regs))
>> > >                   {
>> > >                     unsigned int alignment = MEM_ALIGN (*iter);
>> > >                     if (alignment > stack_alignment)
>> > This just does a linear scan of the blocks and insns within the block
>> > building up STACK_REF_REGS as we go.
>> >
>> > The problem is there's no guarantee that we're visiting the blocks in
>> > execution order.  So we might see an insn that indirectly references
>> > the
>> > stack reg *before* we see the insn which had the copy from the stack
>> > pointer.
>> >
>> > Or am I missing something?
>> >
>>
>> You are right.  We must be conservative in this case   Here is the
>> updated patch.  If there may be indirect stack references, stop and
>> restore the original stack alignment.
>>
>> I am testing it now.
>>
>
> This is updated patch.  Tested on i686 and x86-64 with
>
> --with-arch=native --with-cpu=native
>
> on AVX512 machine.  Tested on i686 and x86-64 without
>
> --with-arch=native --with-cpu=native
>
> on x86-64 machine.
>
> OK for trunk?

No. Please revert the original patch (it was not a regression), and
let's revisit this for gcc-10.

Uros.

> Thanks.
>
>
> H.J.
> ----
> A latent bug in ix86_find_max_used_stack_alignment was uncovered by the
> fix for PR target/88483, which caused:
>
> FAIL: gcc.target/i386/incoming-8.c scan-assembler andl[\\t ]*\\$-16,[\\t
> ]*%esp
>
> on i386.  ix86_find_max_used_stack_alignment failed to notice stack
> reference via non-stack/frame registers and missed stack alignment
> requirement.  There's no guarantee that we're visiting the blocks in
> execution order.  If there may be indirect stack references, stop and
> restore the original stack alignment.  Update stack alignment only if
> we check stack alignment.
>
> 	PR target/88483
> 	* config/i386/i386.c (ix86_stack_referenced_p): New function.
> 	(ix86_find_max_used_stack_alignment): If there may be indirect
> 	stack references, stop and restore the original stack alignment.
> ---
>  gcc/config/i386/i386.c | 76 ++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 74 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index b6dea0c061d..e28a8d50069 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -12777,6 +12777,57 @@ output_probe_stack_range (rtx reg, rtx end)
>    return "";
>  }
>
> +/* Return true if destination register of SET_BODY may reference stack.
> */
> +
> +static bool
> +ix86_stack_referenced_p (const_rtx set_body)
> +{
> +  rtx set_dest, set_src;
> +  int i;
> +
> +  switch (GET_CODE (set_body))
> +    {
> +    case SET:
> +      /* If a register is set from a stack stack address, it may be
> +	 used to reference stack indirectly.  */
> +      set_dest = SET_DEST (set_body);
> +      if (SUBREG_P (set_dest))
> +	set_dest = SUBREG_REG (set_dest);
> +      if (!REG_P (set_dest) || !GENERAL_REG_P (set_dest))
> +	return false;
> +      set_src = SET_SRC (set_body);
> +      if (address_operand (set_src, VOIDmode))
> +	{
> +	  struct ix86_address parts;
> +	  if (!ix86_decompose_address (set_src, &parts))
> +	    return false;
> +	  if (parts.base)
> +	    {
> +	      if (SUBREG_P (parts.base))
> +		parts.base = SUBREG_REG (parts.base);
> +	      return (parts.base == stack_pointer_rtx
> +		      || parts.base == frame_pointer_rtx);
> +	    }
> +	  if (parts.index)
> +	    {
> +	      if (SUBREG_P (parts.index))
> +		parts.index = SUBREG_REG (parts.index);
> +	      return (parts.index == stack_pointer_rtx
> +		      || parts.index == frame_pointer_rtx);
> +	    }
> +	}
> +      break;
> +    case PARALLEL:
> +      for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
> +	if (ix86_stack_referenced_p (XVECEXP (set_body, 0, i)))
> +	  return true;
> +      /* FALLTHROUGH */
> +    default:
> +      break;
> +    }
> +  return false;
> +}
> +
>  /* Return true if stack frame is required.  Update STACK_ALIGNMENT
>     to the largest alignment, in bits, of stack slot used if stack
>     frame is required and CHECK_STACK_SLOT is true.  */
> @@ -12795,8 +12846,13 @@ ix86_find_max_used_stack_alignment (unsigned int
> &stack_alignment,
>    add_to_hard_reg_set (&set_up_by_prologue, Pmode,
>  		       HARD_FRAME_POINTER_REGNUM);
>
> -  /* The preferred stack alignment is the minimum stack alignment.  */
> -  if (stack_alignment > crtl->preferred_stack_boundary)
> +  /* Save the original stack alignment.  */
> +  unsigned int original_stack_alignment = stack_alignment;
> +
> +  /* The preferred stack alignment is the minimum stack alignment.
> +     Update stack alignment only if we check stack alignment.  */
> +  if (check_stack_slot
> +      && stack_alignment > crtl->preferred_stack_boundary)
>      stack_alignment = crtl->preferred_stack_boundary;
>
>    bool require_stack_frame = false;
> @@ -12811,6 +12867,16 @@ ix86_find_max_used_stack_alignment (unsigned int
> &stack_alignment,
>  	  {
>  	    require_stack_frame = true;
>
> +	    if (ix86_stack_referenced_p (PATTERN (insn)))
> +	      {
> +		/* There's no guarantee that we're visiting the
> +		   blocks in execution order.  If there may be
> +		   indirect stack references, stop and restore
> +		   the original stack alignment.  */
> +		stack_alignment = original_stack_alignment;
> +		goto done;
> +	      }
> +
>  	    if (check_stack_slot)
>  	      {
>  		/* Find the maximum stack alignment.  */
> @@ -12827,9 +12893,15 @@ ix86_find_max_used_stack_alignment (unsigned int
> &stack_alignment,
>  			stack_alignment = alignment;
>  		    }
>  	      }
> +	    else
> +	      {
> +		/* We are done.  */
> +		goto done;
> +	      }
>  	  }
>      }
>
> +done:
>    return require_stack_frame;
>  }
>
> --
> 2.19.2
>
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]