This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH v2] Fix PR64242
- From: Wilco Dijkstra <Wilco dot Dijkstra at arm dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: nd <nd at arm dot com>
- Date: Fri, 7 Dec 2018 14:52:48 +0000
- Subject: [PATCH v2] Fix PR64242
Improve the fix for PR64242. Various optimizations can change a memory reference
into a frame access. Given there are multiple virtual frame pointers which may
be replaced by multiple hard frame pointers, there are no checks for writes to the
various frame pointers. So updates to a frame pointer tends to generate incorrect
code. Improve the previous fix to also add clobbers of several frame pointers and
add a scheduling barrier. This should work in most cases until GCC supports a
generic "don't optimize across this instruction" feature.
Also improve testcase to handle alloca implementations which allocate too much,
and increase stack sizes to avoid accidentally passing the test like reported by
Jakub in comment #10 in the PR.
Bootstrap OK. Testcase passes on AArch64 and x86-64. Inspected x86, Arm,
Thumb-1 and Thumb-2 assembler which looks correct.
ChangeLog:
2018-12-07 Wilco Dijkstra <wdijkstr@arm.com>
gcc/
PR middle-end/64242
* builtins.c (expand_builtin_longjmp): Add frame clobbers and schedule block.
(expand_builtin_nonlocal_goto): Likewise.
testsuite/
PR middle-end/64242
* gcc.c-torture/execute/pr64242.c: Update test.
---
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 669e548706f537fa9a92c5f47f30fc3c6ee38176..2ef9c9afcc69fcb775dc6a6fff550025bdc76337 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -1138,15 +1138,20 @@ expand_builtin_longjmp (rtx buf_addr, rtx value)
emit_insn (targetm.gen_nonlocal_goto (value, lab, stack, fp));
else
{
- lab = copy_to_reg (lab);
-
emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
+ lab = copy_to_reg (lab);
+
/* Restore the frame pointer and stack pointer. We must use a
temporary since the setjmp buffer may be a local. */
fp = copy_to_reg (fp);
emit_stack_restore (SAVE_NONLOCAL, stack);
+
+ /* Ensure the frame pointer move is not optimized. */
+ emit_insn (gen_blockage ());
+ emit_clobber (hard_frame_pointer_rtx);
+ emit_clobber (frame_pointer_rtx);
emit_move_insn (hard_frame_pointer_rtx, fp);
emit_use (hard_frame_pointer_rtx);
@@ -1285,15 +1290,20 @@ expand_builtin_nonlocal_goto (tree exp)
emit_insn (targetm.gen_nonlocal_goto (const0_rtx, r_label, r_sp, r_fp));
else
{
- r_label = copy_to_reg (r_label);
-
emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
+ r_label = copy_to_reg (r_label);
+
/* Restore the frame pointer and stack pointer. We must use a
temporary since the setjmp buffer may be a local. */
r_fp = copy_to_reg (r_fp);
emit_stack_restore (SAVE_NONLOCAL, r_sp);
+
+ /* Ensure the frame pointer move is not optimized. */
+ emit_insn (gen_blockage ());
+ emit_clobber (hard_frame_pointer_rtx);
+ emit_clobber (frame_pointer_rtx);
emit_move_insn (hard_frame_pointer_rtx, r_fp);
/* USE of hard_frame_pointer_rtx added for consistency;
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr64242.c b/gcc/testsuite/gcc.c-torture/execute/pr64242.c
index 46a7b23d28d71604d141281c21fb0b77849b1b0d..e6139ede3f34d587ac53d04e286e5d75fd2ca76c 100644
--- a/gcc/testsuite/gcc.c-torture/execute/pr64242.c
+++ b/gcc/testsuite/gcc.c-torture/execute/pr64242.c
@@ -5,46 +5,31 @@ extern void abort (void);
__attribute ((noinline)) void
broken_longjmp (void *p)
{
- void *buf[5];
+ void *buf[32];
__builtin_memcpy (buf, p, 5 * sizeof (void*));
/* Corrupts stack pointer... */
__builtin_longjmp (buf, 1);
}
-__attribute ((noipa)) __UINTPTR_TYPE__
-foo (void *p)
-{
- return (__UINTPTR_TYPE__) p;
-}
-
-__attribute ((noipa)) void
-bar (void *p)
-{
- asm volatile ("" : : "r" (p));
-}
-
volatile int x = 0;
-void *volatile p;
-void *volatile q;
+char *volatile p;
+char *volatile q;
int
main ()
{
void *buf[5];
- struct __attribute__((aligned (32))) S { int a[4]; } s;
- bar (&s);
p = __builtin_alloca (x);
+ q = __builtin_alloca (x);
if (!__builtin_setjmp (buf))
broken_longjmp (buf);
+ /* Compute expected next alloca offset - some targets don't align properly
+ and allocate too much. */
+ p = q + (q - p);
+
/* Fails if stack pointer corrupted. */
- q = __builtin_alloca (x);
- if (foo (p) < foo (q))
- {
- if (foo (q) - foo (p) >= 1024)
- abort ();
- }
- else if (foo (p) - foo (q) >= 1024)
+ if (p != __builtin_alloca (x))
abort ();
return 0;