This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] x86: Don't use get_frame_size to finalize stack frame
On 12/15/18, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Fri, Dec 14, 2018 at 04:04:02PM -0800, H.J. Lu wrote:
>> On Fri, Dec 14, 2018 at 3:24 PM Jeff Law <law@redhat.com> wrote:
>> >
>> > On 12/14/18 4:01 PM, H.J. Lu wrote:
>> > > On Thu, Dec 13, 2018 at 11:11 PM Uros Bizjak <ubizjak@gmail.com>
>> > > wrote:
>> > >> On Thu, Dec 13, 2018 at 6:36 PM H.J. Lu <hongjiu.lu@intel.com>
>> > >> wrote:
>> > >>> get_frame_size () returns used stack slots during compilation,
>> > >>> which
>> > >>> may be optimized out later. Since
>> > >>> ix86_find_max_used_stack_alignment
>> > >>> is called by ix86_finalize_stack_frame_flags to check if stack
>> > >>> frame
>> > >>> is required, there is no need to call get_frame_size () which may
>> > >>> give
>> > >>> inaccurate final stack frame size.
>> > >>>
>> > >>> Tested on AVX512 machine configured with
>> > >>>
>> > >>> --with-arch=native --with-cpu=native
>> > >>>
>> > >>> OK for trunk?
>> > >>>
>> > >>>
>> > >>> H.J.
>> > >>> ---
>> > >>> gcc/
>> > >>>
>> > >>> PR target/88483
>> > >>> * config/i386/i386.c (ix86_finalize_stack_frame_flags):
>> > >>> Don't
>> > >>> use get_frame_size ().
>> > >>>
>> > >>> gcc/testsuite/
>> > >>>
>> > >>> PR target/88483
>> > >>> * gcc.target/i386/stackalign/pr88483.c: New test.
>> > >> LGTM, but you know this part of the compiler better than I.
>> > >>
>> > >> Thanks,
>> > >> Uros.
>> > >>
>> > >>> ---
>> > >>> gcc/config/i386/i386.c | 1 -
>> > >>> .../gcc.target/i386/stackalign/pr88483.c | 17
>> > >>> +++++++++++++++++
>> > >>> 2 files changed, 17 insertions(+), 1 deletion(-)
>> > >>> create mode 100644
>> > >>> gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
>> > >>>
>> > >>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> > >>> index caa701fe242..edc8f4f092e 100644
>> > >>> --- a/gcc/config/i386/i386.c
>> > >>> +++ b/gcc/config/i386/i386.c
>> > >>> @@ -12876,7 +12876,6 @@ ix86_finalize_stack_frame_flags (void)
>> > >>> && flag_exceptions
>> > >>> && cfun->can_throw_non_call_exceptions)
>> > >>> && !ix86_frame_pointer_required ()
>> > >>> - && get_frame_size () == 0
>> > >>> && ix86_nsaved_sseregs () == 0
>> > >>> && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
>> > >>> {
>> > >>> diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
>> > >>> b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
>> > >>> new file mode 100644
>> > >>> index 00000000000..5aec8fd4cf6
>> > >>> --- /dev/null
>> > >>> +++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483.c
>> > >>> @@ -0,0 +1,17 @@
>> > >>> +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
>> > >>> +/* { dg-options "-O2 -mavx2" } */
>> > >>> +
>> > >>> +struct B
>> > >>> +{
>> > >>> + char a[12];
>> > >>> + int b;
>> > >>> +};
>> > >>> +
>> > >>> +struct B
>> > >>> +f2 (void)
>> > >>> +{
>> > >>> + struct B x = {};
>> > >>> + return x;
>> > >>> +}
>> > >>> +
>> > >>> +/* { dg-final { scan-assembler-not
>> > >>> "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */
>> > >>> --
>> > >>> 2.19.2
>> > >>>
>> > > My fix triggered a latent bug in ix86_find_max_used_stack_alignment.
>> > > Here is the fix. OK for trunk?
>> > >
>> > > Thanks.
>> > >
>> > > -- H.J.
>> > >
>> > >
>> > > 0001-x86-Properly-check-stack-reference.patch
>> > >
>> > > From 83f0b37f287ed198a3b50e2be6b0f7f5c154020e Mon Sep 17 00:00:00
>> > > 2001
>> > > From: "H.J. Lu" <hjl.tools@gmail.com>
>> > > Date: Fri, 14 Dec 2018 12:21:02 -0800
>> > > Subject: [PATCH] x86: Properly check stack reference
>> > >
>> > > A latent bug in ix86_find_max_used_stack_alignment was uncovered by
>> > > the
>> > > fix for PR target/88483, which caused:
>> > >
>> > > FAIL: gcc.target/i386/incoming-8.c scan-assembler andl[\\t
>> > > ]*\\$-16,[\\t ]*%esp
>> > >
>> > > on i386. ix86_find_max_used_stack_alignment failed to notice stack
>> > > reference via non-stack/frame registers and missed stack alignment
>> > > requirement. We should track all registers which may reference stack
>> > > by checking registers set from stack referencing registers.
>> > >
>> > > Tested on i686 and x86-64 with
>> > >
>> > > --with-arch=native --with-cpu=native
>> > >
>> > > on AVX512 machine. Tested on i686 and x86-64 without
>> > >
>> > > --with-arch=native --with-cpu=native
>> > >
>> > > on x86-64 machine.
>> > >
>> > > PR target/88483
>> > > * config/i386/i386.c (ix86_stack_referenced_p): New function.
>> > > (ix86_find_max_used_stack_alignment): Call
>> > > ix86_stack_referenced_p
>> > > to check if stack is referenced.
>> > > ---
>> > > gcc/config/i386/i386.c | 43
>> > > ++++++++++++++++++++++++++++++++++++++----
>> > > 1 file changed, 39 insertions(+), 4 deletions(-)
>> > >
>> > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> > > index 4599ca2a7d5..bf93ec3722f 100644
>> > > --- a/gcc/config/i386/i386.c
>> > > +++ b/gcc/config/i386/i386.c
>> > > @@ -12777,6 +12777,18 @@ output_probe_stack_range (rtx reg, rtx end)
>> > > return "";
>> > > }
>> > >
>> > > +/* Return true if OP references stack frame though one of registers
>> > > + in STACK_REF_REGS. */
>> > > +
>> > > +static bool
>> > > +ix86_stack_referenced_p (const_rtx op, rtx *stack_ref_regs)
>> > > +{
>> > > + for (int i = 0; i < LAST_REX_INT_REG; i++)
>> > > + if (stack_ref_regs[i] && reg_mentioned_p (stack_ref_regs[i],
>> > > op))
>> > > + return true;
>> > > + return false;
>> > > +}
>> > > +
>> > > /* Return true if stack frame is required. Update STACK_ALIGNMENT
>> > > to the largest alignment, in bits, of stack slot used if stack
>> > > frame is required and CHECK_STACK_SLOT is true. */
>> > > @@ -12801,6 +12813,12 @@ ix86_find_max_used_stack_alignment (unsigned
>> > > int &stack_alignment,
>> > >
>> > > bool require_stack_frame = false;
>> > >
>> > > + /* Array of hard registers which reference stack frame. */
>> > > + rtx stack_ref_regs[LAST_REX_INT_REG];
>> > > + memset (stack_ref_regs, 0, sizeof (stack_ref_regs));
>> > > + stack_ref_regs[STACK_POINTER_REGNUM] = stack_pointer_rtx;
>> > > + stack_ref_regs[FRAME_POINTER_REGNUM] = frame_pointer_rtx;
>> > > +
>> > > FOR_EACH_BB_FN (bb, cfun)
>> > > {
>> > > rtx_insn *insn;
>> > > @@ -12811,16 +12829,33 @@ ix86_find_max_used_stack_alignment (unsigned
>> > > int &stack_alignment,
>> > > {
>> > > require_stack_frame = true;
>> > >
>> > > + rtx body = PATTERN (insn);
>> > > + if (GET_CODE (body) == SET)
>> > > + {
>> > > + /* If a register is set from a stack referencing
>> > > + register, it is a stack referencing register. */
>> > > + rtx dest = SET_DEST (body);
>> > > + if (REG_P (dest) && !MEM_P (SET_SRC (body)))
>> > > + {
>> > > + int regno = REGNO (dest);
>> > > + gcc_assert (regno < FIRST_PSEUDO_REGISTER);
>> > > + if (GENERAL_REGNO_P (regno))
>> > > + {
>> > > + add_to_hard_reg_set (&set_up_by_prologue,
>> > > Pmode,
>> > > + regno);
>> > > + stack_ref_regs[regno] = dest;
>> > > + }
>> > > + }
>> > > + }
>> > > +
>> > > if (check_stack_slot)
>> > > {
>> > > /* Find the maximum stack alignment. */
>> > > subrtx_iterator::array_type array;
>> > > FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
>> > > if (MEM_P (*iter)
>> > > - && (reg_mentioned_p (stack_pointer_rtx,
>> > > - *iter)
>> > > - || reg_mentioned_p (frame_pointer_rtx,
>> > > - *iter)))
>> > > + && ix86_stack_referenced_p (*iter,
>> > > + stack_ref_regs))
>> > > {
>> > > unsigned int alignment = MEM_ALIGN (*iter);
>> > > if (alignment > stack_alignment)
>> > This just does a linear scan of the blocks and insns within the block
>> > building up STACK_REF_REGS as we go.
>> >
>> > The problem is there's no guarantee that we're visiting the blocks in
>> > execution order. So we might see an insn that indirectly references
>> > the
>> > stack reg *before* we see the insn which had the copy from the stack
>> > pointer.
>> >
>> > Or am I missing something?
>> >
>>
>> You are right. We must be conservative in this case Here is the
>> updated patch. If there may be indirect stack references, stop and
>> restore the original stack alignment.
>>
>> I am testing it now.
>>
>
> This is updated patch. Tested on i686 and x86-64 with
>
> --with-arch=native --with-cpu=native
>
> on AVX512 machine. Tested on i686 and x86-64 without
>
> --with-arch=native --with-cpu=native
>
> on x86-64 machine.
>
> OK for trunk?
No. Please revert the original patch (it was not a regression), and
let's revisit this for gcc-10.
Uros.
> Thanks.
>
>
> H.J.
> ----
> A latent bug in ix86_find_max_used_stack_alignment was uncovered by the
> fix for PR target/88483, which caused:
>
> FAIL: gcc.target/i386/incoming-8.c scan-assembler andl[\\t ]*\\$-16,[\\t
> ]*%esp
>
> on i386. ix86_find_max_used_stack_alignment failed to notice stack
> reference via non-stack/frame registers and missed stack alignment
> requirement. There's no guarantee that we're visiting the blocks in
> execution order. If there may be indirect stack references, stop and
> restore the original stack alignment. Update stack alignment only if
> we check stack alignment.
>
> PR target/88483
> * config/i386/i386.c (ix86_stack_referenced_p): New function.
> (ix86_find_max_used_stack_alignment): If there may be indirect
> stack references, stop and restore the original stack alignment.
> ---
> gcc/config/i386/i386.c | 76 ++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 74 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index b6dea0c061d..e28a8d50069 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -12777,6 +12777,57 @@ output_probe_stack_range (rtx reg, rtx end)
> return "";
> }
>
> +/* Return true if destination register of SET_BODY may reference stack.
> */
> +
> +static bool
> +ix86_stack_referenced_p (const_rtx set_body)
> +{
> + rtx set_dest, set_src;
> + int i;
> +
> + switch (GET_CODE (set_body))
> + {
> + case SET:
> + /* If a register is set from a stack stack address, it may be
> + used to reference stack indirectly. */
> + set_dest = SET_DEST (set_body);
> + if (SUBREG_P (set_dest))
> + set_dest = SUBREG_REG (set_dest);
> + if (!REG_P (set_dest) || !GENERAL_REG_P (set_dest))
> + return false;
> + set_src = SET_SRC (set_body);
> + if (address_operand (set_src, VOIDmode))
> + {
> + struct ix86_address parts;
> + if (!ix86_decompose_address (set_src, &parts))
> + return false;
> + if (parts.base)
> + {
> + if (SUBREG_P (parts.base))
> + parts.base = SUBREG_REG (parts.base);
> + return (parts.base == stack_pointer_rtx
> + || parts.base == frame_pointer_rtx);
> + }
> + if (parts.index)
> + {
> + if (SUBREG_P (parts.index))
> + parts.index = SUBREG_REG (parts.index);
> + return (parts.index == stack_pointer_rtx
> + || parts.index == frame_pointer_rtx);
> + }
> + }
> + break;
> + case PARALLEL:
> + for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
> + if (ix86_stack_referenced_p (XVECEXP (set_body, 0, i)))
> + return true;
> + /* FALLTHROUGH */
> + default:
> + break;
> + }
> + return false;
> +}
> +
> /* Return true if stack frame is required. Update STACK_ALIGNMENT
> to the largest alignment, in bits, of stack slot used if stack
> frame is required and CHECK_STACK_SLOT is true. */
> @@ -12795,8 +12846,13 @@ ix86_find_max_used_stack_alignment (unsigned int
> &stack_alignment,
> add_to_hard_reg_set (&set_up_by_prologue, Pmode,
> HARD_FRAME_POINTER_REGNUM);
>
> - /* The preferred stack alignment is the minimum stack alignment. */
> - if (stack_alignment > crtl->preferred_stack_boundary)
> + /* Save the original stack alignment. */
> + unsigned int original_stack_alignment = stack_alignment;
> +
> + /* The preferred stack alignment is the minimum stack alignment.
> + Update stack alignment only if we check stack alignment. */
> + if (check_stack_slot
> + && stack_alignment > crtl->preferred_stack_boundary)
> stack_alignment = crtl->preferred_stack_boundary;
>
> bool require_stack_frame = false;
> @@ -12811,6 +12867,16 @@ ix86_find_max_used_stack_alignment (unsigned int
> &stack_alignment,
> {
> require_stack_frame = true;
>
> + if (ix86_stack_referenced_p (PATTERN (insn)))
> + {
> + /* There's no guarantee that we're visiting the
> + blocks in execution order. If there may be
> + indirect stack references, stop and restore
> + the original stack alignment. */
> + stack_alignment = original_stack_alignment;
> + goto done;
> + }
> +
> if (check_stack_slot)
> {
> /* Find the maximum stack alignment. */
> @@ -12827,9 +12893,15 @@ ix86_find_max_used_stack_alignment (unsigned int
> &stack_alignment,
> stack_alignment = alignment;
> }
> }
> + else
> + {
> + /* We are done. */
> + goto done;
> + }
> }
> }
>
> +done:
> return require_stack_frame;
> }
>
> --
> 2.19.2
>
>