[PATCH][RFA][PR target/84128] Fix restore of scratch register used in probing loops -- V2

Uros Bizjak ubizjak@gmail.com
Thu Feb 1 07:52:00 GMT 2018


On Thu, Feb 1, 2018 at 2:44 AM, Jeff Law <law@redhat.com> wrote:
>
> This is V2 of the patch to fix 84128.
>
> What's change since V1.  The -fstack-check path for non-linux systems
> has been restored to its previous (correct) behavior.  Only the behavior
> for -fstack-check on linux systems and -fstack-clash-protection has been
> changed.
>
> release_scratch_regsiter_on_entry now has two paths.  The original path
> that is used by -fstack-check on non-linux systems (RESTORE_VIA_POP) and
> the new path used by -fstack-clash-protection and -fstack-check on linux
> systems.
>
> The original path restores the scratch register using a push.  The new
> path accepts an offset where the register was saved and uses a reg+d
> memory reference to restore it, the space allocated by the push in this
> case becomes part of the local frame and is deallocated by the epilogue.
>
> THe original path is used by ix86_emit_probe_stack_range which just
> needs to pass in the new RESTORE_VIA_POP argument as true.
>
> The new path is used by ix86_adjust_stack_and_probe_stack_clash and
> ix86_adjust_stack_and_probe where the space used by the push becomes
> part of the local frame and is deallocated by the epilogue.  It passes
> false for RESTORE_VIA_POP.
>
> Per my invesitgations we have never tried to describe the restore of
> %eax in the CFI info.   So that's a non-issue for this change.

Agreed.

> I've done testing of all three paths paths now.
> -fstack-clash-protection, -fstack-check with moving sp, and
> -fstack-check without moving sp.  The details of that testing can be
> found in the original thread (it's not automated and requires a fair
> amount of hackery and hand verification of the test results).
>
> OK for the trunk now?
>
> THanks,
> Jeff
>
>
>
>
>
>         PR target/84128
>         * config/i386/i386.c (release_scratch_register_on_entry): Add new
>         OFFSET and RELEASE_VIA_POP arguments.  Use SP+OFFSET to restore
>         the scratch if RELEASE_VIA_POP is false.
>         (ix86_adjust_stack_and_probe_stack_clash): Un-constify SIZE.
>         If we have to save a temporary register, decrement SIZE appropriately.
>         Pass new arguments to release_scratch_register_on_entry.
>         (ix86_adjust_stack_and_probe): Likewise.
>         (ix86_emit_probe_stack_range): Pass new arguments to
>         release_scratch_register_on_entry.
>
>         PR target/84128
>         * gcc.target/i386/pr84128.c: New test.

OK.

Thanks,
Uros.

> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index fef34a1..3196ac4 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -12567,22 +12567,39 @@ get_scratch_register_on_entry (struct scratch_reg *sr)
>      }
>  }
>
> -/* Release a scratch register obtained from the preceding function.  */
> +/* Release a scratch register obtained from the preceding function.
> +
> +   If RELEASE_VIA_POP is true, we just pop the register off the stack
> +   to release it.  This is what non-Linux systems use with -fstack-check.
> +
> +   Otherwise we use OFFSET to locate the saved register and the
> +   allocated stack space becomes part of the local frame and is
> +   deallocated by the epilogue.  */
>
>  static void
> -release_scratch_register_on_entry (struct scratch_reg *sr)
> +release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
> +                                  bool release_via_pop)
>  {
>    if (sr->saved)
>      {
> -      struct machine_function *m = cfun->machine;
> -      rtx x, insn = emit_insn (gen_pop (sr->reg));
> +      if (release_via_pop)
> +       {
> +         struct machine_function *m = cfun->machine;
> +         rtx x, insn = emit_insn (gen_pop (sr->reg));
>
> -      /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop.  */
> -      RTX_FRAME_RELATED_P (insn) = 1;
> -      x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
> -      x = gen_rtx_SET (stack_pointer_rtx, x);
> -      add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
> -      m->fs.sp_offset -= UNITS_PER_WORD;
> +         /* The RX FRAME_RELATED_P mechanism doesn't know about pop.  */
> +         RTX_FRAME_RELATED_P (insn) = 1;
> +         x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
> +         x = gen_rtx_SET (stack_pointer_rtx, x);
> +         add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
> +         m->fs.sp_offset -= UNITS_PER_WORD;
> +       }
> +      else
> +       {
> +         rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
> +         x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
> +         emit_insn (x);
> +       }
>      }
>  }
>
> @@ -12597,7 +12614,7 @@ release_scratch_register_on_entry (struct scratch_reg *sr)
>     pushed on the stack.  */
>
>  static void
> -ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size,
> +ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size,
>                                          const bool int_registers_saved)
>  {
>    struct machine_function *m = cfun->machine;
> @@ -12713,6 +12730,12 @@ ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size,
>        struct scratch_reg sr;
>        get_scratch_register_on_entry (&sr);
>
> +      /* If we needed to save a register, then account for any space
> +        that was pushed (we are not going to pop the register when
> +        we do the restore).  */
> +      if (sr.saved)
> +       size -= UNITS_PER_WORD;
> +
>        /* Step 1: round SIZE down to a multiple of the interval.  */
>        HOST_WIDE_INT rounded_size = size & -probe_interval;
>
> @@ -12761,7 +12784,9 @@ ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size,
>                                    m->fs.cfa_reg == stack_pointer_rtx);
>        dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
>
> -      release_scratch_register_on_entry (&sr);
> +      /* This does not deallocate the space reserved for the scratch
> +        register.  That will be deallocated in the epilogue.  */
> +      release_scratch_register_on_entry (&sr, size, false);
>      }
>
>    /* Make sure nothing is scheduled before we are done.  */
> @@ -12774,7 +12799,7 @@ ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size,
>     pushed on the stack.  */
>
>  static void
> -ix86_adjust_stack_and_probe (const HOST_WIDE_INT size,
> +ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
>                              const bool int_registers_saved)
>  {
>    /* We skip the probe for the first interval + a small dope of 4 words and
> @@ -12847,6 +12872,11 @@ ix86_adjust_stack_and_probe (const HOST_WIDE_INT size,
>
>        get_scratch_register_on_entry (&sr);
>
> +      /* If we needed to save a register, then account for any space
> +        that was pushed (we are not going to pop the register when
> +        we do the restore).  */
> +      if (sr.saved)
> +       size -= UNITS_PER_WORD;
>
>        /* Step 1: round SIZE to the previous multiple of the interval.  */
>
> @@ -12906,7 +12936,9 @@ ix86_adjust_stack_and_probe (const HOST_WIDE_INT size,
>                                                     (get_probe_interval ()
>                                                      + dope))));
>
> -      release_scratch_register_on_entry (&sr);
> +      /* This does not deallocate the space reserved for the scratch
> +        register.  That will be deallocated in the epilogue.  */
> +      release_scratch_register_on_entry (&sr, size, false);
>      }
>
>    /* Even if the stack pointer isn't the CFA register, we need to correctly
> @@ -13055,7 +13087,7 @@ ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
>                                                        sr.reg),
>                                          rounded_size - size));
>
> -      release_scratch_register_on_entry (&sr);
> +      release_scratch_register_on_entry (&sr, size, true);
>      }
>
>    /* Make sure nothing is scheduled before we are done.  */
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr84128.c b/gcc/testsuite/gcc.target/i386/pr84128.c
> new file mode 100644
> index 0000000..a8323fd6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr84128.c
> @@ -0,0 +1,30 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -march=i686 -mtune=generic -fstack-clash-protection" } */
> +/* { dg-require-effective-target ia32 } */
> +
> +__attribute__ ((noinline, noclone, weak, regparm (3)))
> +int
> +f1 (long arg0, int (*pf) (long, void *))
> +{
> +  unsigned char buf[32768];
> +  return pf (arg0, buf);
> +}
> +
> +__attribute__ ((noinline, noclone, weak))
> +int
> +f2 (long arg0, void *ignored)
> +{
> +  if (arg0 != 17)
> +    __builtin_abort ();
> +  return 19;
> +}
> +
> +int
> +main (void)
> +{
> +  if (f1 (17, f2) != 19)
> +    __builtin_abort ();
> +  return 0;
> +}
> +
> +
>



More information about the Gcc-patches mailing list